luceceインデックス作成プロセス

2481 ワード

import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.util.Date;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;


public class TextFileIndexer {

	/**
	 * @param args
	 * @throws IOException 
	 */
	public static void main(String[] args) throws IOException {
		// TODO         
		File fileDir=new File("D:\\chenzk\\lucene");
		Analyzer luceneAnalyzer=new StandardAnalyzer();
		//     StandardAnalyzer     ，               
		File   indexDir = new File("C:\\luceneIndex");

		IndexWriter indexWriter = new IndexWriter(indexDir,luceneAnalyzer,true);
        //               ，                         ，                
		//             ，                  
		File [] textFiles=fileDir.listFiles();
		long startTime=new Date().getTime();
		
		//add documents to the index
		for(int i=0;i<textFiles.length;i++)
		{
			if(textFiles[i].isFile()&&textFiles[i].getName().endsWith(".txt"))
			{ //                  lucene           
				System.out.println("File "+textFiles[i].getCanonicalPath()+"  is being indexed");
				Reader textReader=new FileReader(textFiles[i]);
				
				//                 
			    Document document=new Document();//	         Field  ，               
			    //   Field         ，
			    document.add(Field.Text("content",textReader));//                  
			    document.add(Field.Text("path", textFiles[i].getPath()));//              
			    indexWriter.addDocument(document);
			    
			    
			}
		}
		
		indexWriter.optimize();
		indexWriter.close();//             ，        ，     Lucene            。
		long endTime=new Date().getTime();
		
		
		
		System.out.println("It took "+(endTime-startTime)+" millseconds to create an index for the files in the directory "+fileDir.getPath()) ;
	}

}

File D:\chenzk\lucene\1.txt  is being indexed
File D:\chenzk\lucene\2.txt  is being indexed
File D:\chenzk\lucene\3.txt  is being indexed
File D:\chenzk\lucene\segments.txt  is being indexed
It took 187 millseconds to create an index for the files in the directory D:\chenzk\lucene

貴重なご意見をどうぞ!ありがとう

pyppeteer

vimの翻訳プラグイン