import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Date;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
/**
* 创建处理文档的索引类,这里使用的是Lucene3.0
* @author wawa
*
*/
public class IndexTest {
public static void main(String [] args)
{
//存放索引的目录
String INDEX_STORE_PATH="D:\\java\\lucene\\indexCh2";
//需要建立索引的文件
String input="D:\\java\\lucene\\zhuxian";
try {
long start = new Date().getTime();
int docNum=createIndex(new File(INDEX_STORE_PATH), new File(input));
long end = new Date().getTime();
System.out.println("Indexing " + docNum + " files took " + (end - start) + " milliseconds");
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static int createIndex(File indexDir, File dataDir) throws Exception
{
//与2的不同之处
IndexWriter writer=new IndexWriter(FSDirectory.open(indexDir), new StandardAnalyzer(Version.LUCENE_30), true,
IndexWriter.MaxFieldLength.LIMITED);
indexDirectory(writer, dataDir);
int numIndexed = writer.numDocs();
writer.optimize();
writer.close();
return numIndexed;
}
/**循环遍历目录下的所有.txt文件并进行索引
* @param writer
* @param dir
* @throws IOException
*/
private static void indexDirectory(IndexWriter writer, File dir)
throws IOException {
File[] files = dir.listFiles();
for (int i = 0; i < files.length; i++) {
File f = files[i];
if (f.isDirectory()) {
indexDirectory(writer, f); // recurse
} else if (f.getName().endsWith(".txt")) {
indexFile(writer, f);
}
}
}
/**对单个txt文件进行索引
* @param writer
* @param f
* @throws IOException
*/
private static void indexFile(IndexWriter writer, File f)
throws IOException {
if (f.isHidden() || !f.exists() || !f.canRead()) {
return;
}
System.out.println("Indexing " + f.getCanonicalPath());
Document doc = new Document();
doc.add(new Field("contents",new FileReader(f)));//有变化的地方
doc.add(new Field("filename",f.getCanonicalPath(),Field.Store.YES, Field.Index.ANALYZED));//有变化的地方
writer.addDocument(doc);
}
}
import java.io.File;
import java.io.FileReader;
import java.util.Date;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class SearchTest {
/**lucene3.0 搜索类
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
String srotPath="D:\\java\\lucene\\indexCh2\\";
String keys="林惊羽";
try {
createSearch(new File(srotPath), keys);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static void createSearch(File storPath,String keys) throws Exception
{
IndexSearcher searcher=new IndexSearcher(FSDirectory.open(storPath),true);//只读
String field="contents";
QueryParser parser = new QueryParser(Version.LUCENE_30, field, new StandardAnalyzer(Version.LUCENE_30));//有变化的地方
Query query = parser.parse(keys);
TopScoreDocCollector collector = TopScoreDocCollector.create(100 , false);//有变化的地方
long start = new Date().getTime();// start time
searcher.search(query, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
System.out.println(hits.length);
for (int i = 0; i < hits.length; i++) {
Document doc = searcher.doc(hits[i].doc);//new method searcher.doc()
System.out.println(doc.getField("filename")+" "+hits[i].toString()+" ");
}
long end = new Date().getTime();//end time
System.out.println("Found :" + collector.getTotalHits() +
" document(s) (in " + (end - start) +
" milliseconds) that matched query '" +
keys + "':");
}
}
分享到:
相关推荐
lucene3.0 lucene3.0 lucene3.0 lucene3.0 lucene3.0
Lucene3.0 Demo 索引的创建,更新(未实现),删除
Lucene3.0 使 用 教 程 Lucene3.0 使 用 教 程 Lucene3.0 使 用 教 程 Lucene3.0 使 用 教 程 Lucene3.0 使 用 教 程 Lucene3.0 使 用 教 程 Lucene3.0 使 用 教 程 Lucene3.0 使 用 教 程 Lucene3.0 使 用 教 程 ...
lucene 3.0 API中文帮助,学习的人懂得的
Lucene3.0特性Lucene3.0特性
Lucene3.0之查询处理(1):原理和查询类型 各种Query对象详解
Lucene 3.0 原理与代码分析完整版
lucene3.0 中文分词器, 庖丁解牛
lucene3.0 实例,在jdk1.5,lucene3.0下调式通过,可以直接运行。先运行生成索引文件的class,在运行搜索的class。
lucene3.0的核心jar包文件,lucene3.0的核心jar包文件,lucene3.0的核心jar包文件,lucene3.0的核心jar包文件。
传智播客Lucene3.0课程,Lucene3.0的入门教程.
lucene3.0-highlighter.jar lucene3.0的高亮jar包,从lucene3.0源码中导出来的
lucene升级了,分词也得升级哦! 在使用lucene3与paoding集成的时候可能会出现以下错误: Exception in thread "main" java.lang.AbstractMethodError: org.apache.lucene.analysis.TokenStream.incrementToken()Z ...
Lucene 3.0 原理 Lucene 3.0 原理 Lucene 3.0 原理 Lucene 3.0 原理
基于lucene3.0 书籍查询系统 基于lucene3.0 书籍查询系统
Lucene3.0浅析Lucene3.0浅析Lucene3.0浅析Lucene3.0浅析
全面好用的lucene 2.0 api以及lucene 3.0 api帮助文档
Lucene3.0分词系统.doc
全文检索 lucene 3.0 叶涛 全文检索 lucene 3.0 叶涛 非常好用.上手极快!