Lucene3.0 demo

nwj2010

浏览: 89629 次
性别:
来自: 宁波

最近访客更多访客>>

HiMeeJn

gaojin

st4024589553

jielen

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

lucene

lucene Apache Java F#

import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Date;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;


/**
 * 创建处理文档的索引类,这里使用的是Lucene3.0
 * @author wawa
 *
 */
public class IndexTest {
	
	public static void main(String [] args)
	{
		//存放索引的目录
		String INDEX_STORE_PATH="D:\\java\\lucene\\indexCh2";
		
		//需要建立索引的文件
		String input="D:\\java\\lucene\\zhuxian";
		
		try {
			long start = new Date().getTime(); 
			int docNum=createIndex(new File(INDEX_STORE_PATH), new File(input));
			long end = new Date().getTime();  
		    System.out.println("Indexing " + docNum + " files took " + (end - start) + " milliseconds"); 
			
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
	
	public static int createIndex(File indexDir, File dataDir) throws Exception
	{
		//与2的不同之处
		IndexWriter writer=new IndexWriter(FSDirectory.open(indexDir), new StandardAnalyzer(Version.LUCENE_30), true,   
				IndexWriter.MaxFieldLength.LIMITED);
		
		indexDirectory(writer, dataDir);  
	    int numIndexed = writer.numDocs();  
	    writer.optimize();  
	    writer.close();  
	    return numIndexed;
		
	}
	 /**循环遍历目录下的所有.txt文件并进行索引 
	 * @param writer 
	 * @param dir 
	 * @throws IOException 
	 */  
	private static void indexDirectory(IndexWriter writer, File dir)  
	    throws IOException {  
	  
	    File[] files = dir.listFiles();  
	  
	    for (int i = 0; i < files.length; i++) {  
	      File f = files[i];  
	      if (f.isDirectory()) {  
	        indexDirectory(writer, f);  // recurse  
	      } else if (f.getName().endsWith(".txt")) {  
	        indexFile(writer, f);  
	      }  
	    }  
	  }
	/**对单个txt文件进行索引 
	 * @param writer 
	 * @param f 
	 * @throws IOException 
	 */  
	private static void indexFile(IndexWriter writer, File f)  
	    throws IOException {  
	      
	    if (f.isHidden() || !f.exists() || !f.canRead()) {  
	      return;  
	    }  
	  
	    System.out.println("Indexing " + f.getCanonicalPath());  
	    Document doc = new Document();  
	    doc.add(new Field("contents",new FileReader(f)));//有变化的地方  
	    doc.add(new Field("filename",f.getCanonicalPath(),Field.Store.YES, Field.Index.ANALYZED));//有变化的地方  
	   
	    writer.addDocument(doc);  
	  }  
}

import java.io.File;
import java.io.FileReader;
import java.util.Date;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;


public class SearchTest {

	/**lucene3.0 搜索类
	 * @param args
	 */
	public static void main(String[] args) {
		// TODO Auto-generated method stub
		
		String srotPath="D:\\java\\lucene\\indexCh2\\";
		String keys="林惊羽";
		try {
			createSearch(new File(srotPath), keys);
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
	
	
	public static void createSearch(File storPath,String keys) throws Exception
	{
		IndexSearcher searcher=new IndexSearcher(FSDirectory.open(storPath),true);//只读
		String field="contents";
		
		QueryParser parser = new QueryParser(Version.LUCENE_30, field, new StandardAnalyzer(Version.LUCENE_30));//有变化的地方
		Query query = parser.parse(keys);
		
		TopScoreDocCollector collector = TopScoreDocCollector.create(100 , false);//有变化的地方  
	
		long start = new Date().getTime();// start time  
	      
		searcher.search(query, collector);  
	    ScoreDoc[] hits = collector.topDocs().scoreDocs;  
	  
	    System.out.println(hits.length);  
	    for (int i = 0; i < hits.length; i++) {  
	        Document doc = searcher.doc(hits[i].doc);//new method searcher.doc()  
	        System.out.println(doc.getField("filename")+"   "+hits[i].toString()+"  ");  
	    }  
	    long end = new Date().getTime();//end time  
	  
	    System.out.println("Found :" + collector.getTotalHits() +  
	              " document(s) (in " + (end - start) +  
	              " milliseconds) that matched query '" +  
	                keys + "':");  
	 }  

}

分享到：

jbpm4 连接指定数据库 | FCKEditor编辑器的使用（二）

2010-12-27 21:25
浏览 1121
评论(1)
分类:编程语言
查看更多

1 楼 huangfenghit 2011-07-28

受用了。对于3.0中没有hits我郁闷了很久。

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

Lucene3.0 demo

评论

发表评论

相关推荐

最近访客 更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

Lucene3.0 demo

评论

发表评论

相关推荐

开放源代码的全文检索引擎Lucene（一）

最近访客更多访客>>