`
nwj2010
  • 浏览: 89629 次
  • 性别: Icon_minigender_1
  • 来自: 宁波
社区版块
存档分类
最新评论

Lucene3.0 demo

阅读更多

import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Date;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;


/**
 * 创建处理文档的索引类,这里使用的是Lucene3.0
 * @author wawa
 *
 */
public class IndexTest {
	
	public static void main(String [] args)
	{
		//存放索引的目录
		String INDEX_STORE_PATH="D:\\java\\lucene\\indexCh2";
		
		//需要建立索引的文件
		String input="D:\\java\\lucene\\zhuxian";
		
		try {
			long start = new Date().getTime(); 
			int docNum=createIndex(new File(INDEX_STORE_PATH), new File(input));
			long end = new Date().getTime();  
		    System.out.println("Indexing " + docNum + " files took " + (end - start) + " milliseconds"); 
			
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
	
	public static int createIndex(File indexDir, File dataDir) throws Exception
	{
		//与2的不同之处
		IndexWriter writer=new IndexWriter(FSDirectory.open(indexDir), new StandardAnalyzer(Version.LUCENE_30), true,   
				IndexWriter.MaxFieldLength.LIMITED);
		
		indexDirectory(writer, dataDir);  
	    int numIndexed = writer.numDocs();  
	    writer.optimize();  
	    writer.close();  
	    return numIndexed;
		
	}
	 /**循环遍历目录下的所有.txt文件并进行索引 
	 * @param writer 
	 * @param dir 
	 * @throws IOException 
	 */  
	private static void indexDirectory(IndexWriter writer, File dir)  
	    throws IOException {  
	  
	    File[] files = dir.listFiles();  
	  
	    for (int i = 0; i < files.length; i++) {  
	      File f = files[i];  
	      if (f.isDirectory()) {  
	        indexDirectory(writer, f);  // recurse  
	      } else if (f.getName().endsWith(".txt")) {  
	        indexFile(writer, f);  
	      }  
	    }  
	  }
	/**对单个txt文件进行索引 
	 * @param writer 
	 * @param f 
	 * @throws IOException 
	 */  
	private static void indexFile(IndexWriter writer, File f)  
	    throws IOException {  
	      
	    if (f.isHidden() || !f.exists() || !f.canRead()) {  
	      return;  
	    }  
	  
	    System.out.println("Indexing " + f.getCanonicalPath());  
	    Document doc = new Document();  
	    doc.add(new Field("contents",new FileReader(f)));//有变化的地方  
	    doc.add(new Field("filename",f.getCanonicalPath(),Field.Store.YES, Field.Index.ANALYZED));//有变化的地方  
	   
	    writer.addDocument(doc);  
	  }  
}

 

import java.io.File;
import java.io.FileReader;
import java.util.Date;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;


public class SearchTest {

	/**lucene3.0 搜索类
	 * @param args
	 */
	public static void main(String[] args) {
		// TODO Auto-generated method stub
		
		String srotPath="D:\\java\\lucene\\indexCh2\\";
		String keys="林惊羽";
		try {
			createSearch(new File(srotPath), keys);
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
	
	
	public static void createSearch(File storPath,String keys) throws Exception
	{
		IndexSearcher searcher=new IndexSearcher(FSDirectory.open(storPath),true);//只读
		String field="contents";
		
		QueryParser parser = new QueryParser(Version.LUCENE_30, field, new StandardAnalyzer(Version.LUCENE_30));//有变化的地方
		Query query = parser.parse(keys);
		
		TopScoreDocCollector collector = TopScoreDocCollector.create(100 , false);//有变化的地方  
	
		long start = new Date().getTime();// start time  
	      
		searcher.search(query, collector);  
	    ScoreDoc[] hits = collector.topDocs().scoreDocs;  
	  
	    System.out.println(hits.length);  
	    for (int i = 0; i < hits.length; i++) {  
	        Document doc = searcher.doc(hits[i].doc);//new method searcher.doc()  
	        System.out.println(doc.getField("filename")+"   "+hits[i].toString()+"  ");  
	    }  
	    long end = new Date().getTime();//end time  
	  
	    System.out.println("Found :" + collector.getTotalHits() +  
	              " document(s) (in " + (end - start) +  
	              " milliseconds) that matched query '" +  
	                keys + "':");  
	 }  

}
 
分享到:
评论
1 楼 huangfenghit 2011-07-28  
受用了。对于3.0中没有hits我郁闷了很久。

相关推荐

Global site tag (gtag.js) - Google Analytics