JAVA解析PDF、WORD、EXCELドキュメント

6054 ワード

JAva解析pdf、doc、docx、xls、xlsxフォーマットドキュメント
PDFファイルjarリファレンスの読み込み
		
			org.apache.pdfbox
			pdfbox
			1.8.13
		

WORDファイルjarリファレンスの読み込み
		
			org.apache.poi
			poi-scratchpad
			3.16-beta1
		
		
			org.apache.poi
			poi
			3.16-beta1
		

EXCELファイルjarリファレンスの読み込み


	org.apache.xmlbeans
	xmlbeans
	2.6.0


	org.apache.poi
	poi-ooxml
	3.16-beta1


	org.apache.poi
	poi-ooxml-schemas
	3.16-beta1

WORDファイルの読み込み方法
/**
	 * 
	 * @Title: getTextFromWord
	 * @Description:   word
	 * @param filePath
	 *                
	 * @return: String    Word   
	 */
	public static String getTextFromWord(String filePath) {
		String result = null;
		File file = new File(filePath);
		FileInputStream fis = null;
		try {
			fis = new FileInputStream(file);
			@SuppressWarnings("resource")
			WordExtractor wordExtractor = new WordExtractor(fis);
			result = wordExtractor.getText();
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			if (fis != null) {
				try {
					fis.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}
		return result;
	}

PDFファイルの読み込み方法
	/**
	 * 
	 * @Title: getTextFromPdf
	 * @Description:   pdf    
	 * @param filePath
	 * @return:    pdf   
	 */
	public static String getTextFromPdf(String filePath) {
		String result = null;
		FileInputStream is = null;
		PDDocument document = null;
		try {
			is = new FileInputStream(filePath);
			PDFParser parser = new PDFParser(is);
			parser.parse();
			document = parser.getPDDocument();
			PDFTextStripper stripper = new PDFTextStripper();
			result = stripper.getText(document);
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			if (is != null) {
				try {
					is.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
			if (document != null) {
				try {
					document.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}
		return result;
	}

EXCEL、xls形式の読み込み
	/**
	 * @param filePath
	 *                
	 * @return    Excel   
	 */
	@SuppressWarnings({"resource", "deprecation"})
	public static String getTextFromExcel(String filePath) {
		StringBuffer buff = new StringBuffer();
		try {
			//    Excel        
			HSSFWorkbook wb = new HSSFWorkbook(new FileInputStream(filePath));
			//          。
			for (int numSheets = 0; numSheets < wb
					.getNumberOfSheets(); numSheets++) {
				if (null != wb.getSheetAt(numSheets)) {
					HSSFSheet aSheet = wb.getSheetAt(numSheets);//     sheet
					for (int rowNumOfSheet = 0; rowNumOfSheet <= aSheet
							.getLastRowNum(); rowNumOfSheet++) {
						if (null != aSheet.getRow(rowNumOfSheet)) {
							HSSFRow aRow = aSheet.getRow(rowNumOfSheet); //      
							for (int cellNumOfRow = 0; cellNumOfRow <= aRow
									.getLastCellNum(); cellNumOfRow++) {
								if (null != aRow.getCell(cellNumOfRow)) {
									HSSFCell aCell = aRow.getCell(cellNumOfRow);//     
									switch (aCell.getCellType()) {
										case HSSFCell.CELL_TYPE_FORMULA :
											break;
										case HSSFCell.CELL_TYPE_NUMERIC :
											buff.append(
													aCell.getNumericCellValue())
													.append('\t');
											break;
										case HSSFCell.CELL_TYPE_STRING :
											buff.append(
													aCell.getStringCellValue())
													.append('\t');
											break;
									}
								}
							}
							buff.append('
'); } } } } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return buff.toString(); }

EXCEL、xlxs形式の読み込み
@SuppressWarnings("deprecation")
	public static String getTextFromExcel2007(String filePath) {
		StringBuffer buff = new StringBuffer();
		try {
			//    Excel        
			@SuppressWarnings("resource")
			XSSFWorkbook wb = new XSSFWorkbook(new FileInputStream(filePath));
			//          。
			for (int numSheets = 0; numSheets < wb
					.getNumberOfSheets(); numSheets++) {
				if (null != wb.getSheetAt(numSheets)) {
					XSSFSheet aSheet = wb.getSheetAt(numSheets);//     sheet
					for (int rowNumOfSheet = 0; rowNumOfSheet <= aSheet
							.getLastRowNum(); rowNumOfSheet++) {
						if (null != aSheet.getRow(rowNumOfSheet)) {
							XSSFRow aRow = aSheet.getRow(rowNumOfSheet); //      
							for (int cellNumOfRow = 0; cellNumOfRow <= aRow
									.getLastCellNum(); cellNumOfRow++) {
								if (null != aRow.getCell(cellNumOfRow)) {
									XSSFCell aCell = aRow.getCell(cellNumOfRow);//     
									switch (aCell.getCellType()) {
										case HSSFCell.CELL_TYPE_FORMULA :
											break;
										case HSSFCell.CELL_TYPE_NUMERIC :
											buff.append(
													aCell.getNumericCellValue())
													.append('\t');
											break;
										case HSSFCell.CELL_TYPE_STRING :
											buff.append(
													aCell.getStringCellValue())
													.append('\t');
											break;
									}
								}
							}
							buff.append('
'); } } } } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return buff.toString(); }