XPDF pdfをtextファイルに変換
2505 ワード
package com.hdzx.util;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
public class PdfToHtmlUtil {
private static String INPUT_PATH; //
private static String PROJECT_PATH; //
public static void convertToHtml(String file, String project) {
INPUT_PATH = file;
PROJECT_PATH = project;
if (checkContentType() == 0) {
toHtml();
}
}
/**
* pdf
* @return
*/
private static int checkContentType() {
String type = INPUT_PATH.substring(INPUT_PATH.lastIndexOf(".") + 1,
INPUT_PATH.length()).toLowerCase();
System.out.println("type:" + type);
if (type.equals("pdf")){
System.out.println("pdf");
return 0;
}
else {
System.out.println("not pdf");
return 9;
}
}
/**
*
*/
private static void toHtml() {
if (new File(INPUT_PATH).isFile()) {
try {
String cmd ="cmd /k start D:\\Xpdf\\pdftotext.bat \""
+ INPUT_PATH + "\" \"" + PROJECT_PATH + "\"";
System.out.println("cmd:" + cmd);
Runtime.getRuntime().exec(cmd);
System.out.println("OK");
} catch (IOException e) {
e.printStackTrace();
System.out.println("error");
}
}
}
public static void main(String[] args) {
String fileStr = "D:\\Xpdf\\slzw.pdf";
setPDFtoHTML(fileStr);
}
public static void setPDFtoHTML(String fileStr) {
String path = "D:\\Xpdf";
convertToHtml(fileStr, path);
String strFile = "D:\\Xpdf\\slzw.txt";
try {
Thread.sleep(2000);
File file = new File(strFile);
FileReader fr = new FileReader(file);
BufferedReader read = new BufferedReader(fr);
StringBuffer sb = new StringBuffer();
String str="";
while((str=read.readLine()) != null){
sb.append(str);
sb.append("
");
}
read.close();
fr.close();
System.out.println(sb.toString().replaceAll("", ""));
} catch (Exception e) {
e.printStackTrace();
}
}
}