Pages

Tuesday, 1 November 2011

Lucene, sample JAVA code to Search an indexed file folder


Please find below the Lucene sample JAVA code to search the files inside a folder. This code will search the indexed folder for a search query in an indexed field.

This java code is expecting the index path ( where the index files were created ) , field which need to be searched and the query need be searched as program arguments like  "java SearchFiles [-index dir] [-field f] [-query string]" .


import java.io.File;
import java.util.ArrayList;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class SearchFiles {

public static void main(String[] args){
try{
String usage = "Usage: SearchFiles [-index dir] [-field f] [-query string] \n\n";
String index = "index";
String field = "contents";
String queryString = null;
int hitsPerPage = 100;
for (int i = 0; i < args.length; i++) {
if ("-index".equals(args[i])) {
index = args[i + 1];
i++;
} else if ("-field".equals(args[i])) {
field = args[i + 1];
i++;
} else if ("-query".equals(args[i])) {
queryString = args[i + 1];
i++;
}
}
new SearchFiles().searchFiles(index,field,queryString,hitsPerPage);
}catch (Exception e) {
e.printStackTrace();
}
}

public ArrayList<String> searchFiles(String index,String field,
String queryString,int hitsPerPage) throws Exception {

ArrayList<String> returnStringList = new ArrayList<String>();

IndexSearcher searcher = new IndexSearcher(FSDirectory.open(new File(index)));
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
QueryParser parser = new QueryParser(Version.LUCENE_31, field, analyzer);
Query query = parser.parse(queryString);
int numberOfPages = 5;

TopDocs results = searcher.search(query, numberOfPages * hitsPerPage);

ScoreDoc[] hits = results.scoreDocs;
int numTotalHits = results.totalHits;
System.out.println(numTotalHits + " total matching documents");
returnStringList.add(numTotalHits + " total matching documents");

int start = 0;
int end = Math.min(numTotalHits, hitsPerPage);
for (int i = start; i < end; i++) {
Document doc = searcher.doc(hits[i].doc);
String path = doc.get("path");
if (path != null) {
System.out.println((i + 1) + ". " + path);
returnStringList.add((i + 1) + ". " + path);
String title = doc.get("title");
if (title != null) {
System.out.println("   Title: " + doc.get("title"));
returnStringList.add("   Title: " + doc.get("title"));
}
}
}
searcher.close();
return returnStringList;
}
}

No comments:

Post a Comment