Apache Lucene 5.1.0 indexing and searching java example


      Click here to attend Spring Framework 4.x and certification course with a discount


import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.analysis.core.SimpleAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.document.Field.Store;
import java.io.*;
import java.nio.file.Path;
import java.nio.file.Paths;

public class LuceneExamples {
	
	public static void main(String[] args) {
		indexDirectory();
		search("java");
	}	
		
	private static void indexDirectory() {		
		 //Apache Lucene Indexing Directory .txt files     
	     try {	
		 //indexing directory	 
		 Path path = Paths.get("C:/Users/Tuna/Desktop/lucene-5.1.0/indexes");
		 Directory directory = FSDirectory.open(path);
		 IndexWriterConfig config = new IndexWriterConfig(new SimpleAnalyzer());		
		 IndexWriter indexWriter = new IndexWriter(directory, config);
		 indexWriter.deleteAll();
		 File f = new File("C:/Users/Tuna/Desktop/sample"); // current directory     
		     for (File file : f.listFiles()) {
			        System.out.println("indexed " + file.getCanonicalPath());		        
					Document doc = new Document();
					doc.add(new TextField("path", file.getName(), Store.YES));
					FileInputStream is = new FileInputStream(file);
					BufferedReader reader = new BufferedReader(new InputStreamReader(is));
					StringBuffer stringBuffer = new StringBuffer();
					String line = null;
					while((line = reader.readLine())!=null){
					  stringBuffer.append(line).append("\n");
					}
					reader.close();
					doc.add(new TextField("contents", stringBuffer.toString(), Store.YES));
					indexWriter.addDocument(doc);			
		     }	 		     
		     indexWriter.close();		    
		     directory.close();
		} catch (Exception e) {
			// TODO: handle exception
			e.printStackTrace();
		}					
	}
	
	private static void search(String text) {	
		//Apache Lucene searching text inside .txt files
		try {	
			Path path = Paths.get("C:/Users/Tuna/Desktop/lucene-5.1.0/indexes");
			Directory directory = FSDirectory.open(path);		
			IndexReader indexReader =  DirectoryReader.open(directory);
			IndexSearcher indexSearcher = new IndexSearcher(indexReader);
			QueryParser queryParser = new QueryParser("contents",  new StandardAnalyzer());  
			Query query = queryParser.parse(text);
			TopDocs topDocs = indexSearcher.search(query,10);
	        System.out.println("totalHits " + topDocs.totalHits);
			for (ScoreDoc scoreDoc : topDocs.scoreDocs) {			
			    Document document = indexSearcher.doc(scoreDoc.doc);
			    System.out.println("path " + document.get("path"));
			    System.out.println("content " + document.get("contents"));
			}
		} catch (Exception e) {
			// TODO: handle exception
			e.printStackTrace();
		}				
	}
  }

output

indexed C:\Users\Tuna\Desktop\sample\New Text Document (2).txt
indexed C:\Users\Tuna\Desktop\sample\New Text Document (3).txt
indexed C:\Users\Tuna\Desktop\sample\New Text Document (4).txt
indexed C:\Users\Tuna\Desktop\sample\New Text Document (5).txt
indexed C:\Users\Tuna\Desktop\sample\New Text Document.txt
totalHits 1
path New Text Document.txt
content package nl.kameroom.emailservice;

import java.util.List;
import java.util.Locale;
import java.util.Map;

import javax.mail.internet.MimeMessage;
import javax.sql.DataSource;

import nl.kameroom.user.UserManager;

import org.apache.velocity.app.VelocityEngine;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.support.ReloadableResourceBundleMessageSource;
import org.springframework.context.support.ResourceBundleMessageSource;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.mail.javamail.JavaMailSender;
import org.springframework.mail.javamail.MimeMessageHelper;
import org.springframework.mail.javamail.MimeMessagePreparator;
import org.springframework.ui.velocity.VelocityEngineUtils;
import org.springframework.util.Assert;

public class EmailService {

/* Email From param */
public static final String FROM = “from”;

/* Email To param */
public static final String TO = “to”;

/* Email Subject param */
public static final String SUBJECT = “subject”;

/* Email CC param */
public static final String CC_LIST = “ccList”;
public static final String KEY = “key”;

@Autowired
private JavaMailSender mailSender;

@Autowired
private VelocityEngine velocityEngine;

@Autowired
private ReloadableResourceBundleMessageSource messageSourceVM = null;
public void setResource(ReloadableResourceBundleMessageSource resource){
this.messageSourceVM = resource;
}
public boolean send(final String templateName, final Map model) {
boolean r = false;
try {

MimeMessagePreparator preparator = new MimeMessagePreparator() {

@SuppressWarnings(“unchecked”)
public void prepare(MimeMessage mimeMessage) throws Exception {
String from = (String) model.get(FROM);
String to = (String) model.get(TO);
String subject = (String) model.get(SUBJECT);
String key = (String) model.get(KEY);
Assert.notNull(from);
Assert.notNull(to);
Assert.notNull(subject);
List ccList = (List) model.get(CC_LIST);
MimeMessageHelper message = new MimeMessageHelper(mimeMessage);
message.setFrom(from);
message.setTo(to);
message.setSubject(subject);
if (ccList != null) {
for (String cc : ccList) {
//message.addCc(cc);
message.addBcc(cc);
}
}
model.put(“messageSource”,messageSourceVM);
model.put(“noArgs”, new Object[]{});
model.put(“key”, key);

model.put(“locale”, Locale.ENGLISH);

String text = VelocityEngineUtils.mergeTemplateIntoString(
velocityEngine, templateName,”utf-8″, model);
message.setText(text, true);
}
};

mailSender.send(preparator);
r = true;
}catch(Exception e) {
e.printStackTrace();
}
return r;
}
}

Advertisements

Apache Lucene Installation and example


Apache Lucene is high performance open source search engine based on full-featured text searching

It is based on Java language and cross platform

It provided high performance searching with high-performance indexing functionality

Apache Lucene runs on Java 7 or greater

You check system requirements through the following
https://lucene.apache.org/core/5_1_0/SYSTEM_REQUIREMENTS.html

Apache Lucene’s documentation
https://lucene.apache.org/core/5_1_0/index.html

This tutorial is based on lucene-core-5.1.0 version
Go to http://apache.mirror1.spango.com/lucene/java/5.1.0/

Download lucene-5.1.0.zip

In order to use Lucene we need to put the following jars into CLASSPATH
lucene-core-5.1.0. jar.
lucene-queryparser-5.1.0.jar,
lucene-analyzers-common-5.1.0.jar
lucene-demo-5.1.0.jar

set LUCENE=C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0
java -cp %LUCENE%\core\lucene-core-5.1.0.jar;%LUCENE%\demo\lucene-demo-5.1.0.jar;%LUCENE%\queryparser\lucene-queryparser-5.1.0.jar;%LUCENE%\analysis\common\lucene-analyzers-common-5.1.0.jar org.apache.lucene.demo.IndexFiles -docs C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0

adding C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0\replicator\lib\httpcore-4.3.jar
adding C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0\replicator\lib\javax.servlet-3.0.0.v201112011016.jar
adding C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0\replicator\lib\jetty-continuation-8.1.10.v20130312.ja
r
adding C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0\replicator\lib\jetty-http-8.1.10.v20130312.jar
adding C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0\replicator\lib\jetty-io-8.1.10.v20130312.jar
adding C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0\replicator\lib\jetty-server-8.1.10.v20130312.jar
adding C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0\replicator\lib\jetty-servlet-8.1.10.v20130312.jar
adding C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0\replicator\lib\jetty-util-8.1.10.v20130312.jar
adding C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0\replicator\lucene-replicator-5.1.0.jar
adding C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0\sandbox\lib\jakarta-regexp-1.4.jar
adding C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0\sandbox\lucene-sandbox-5.1.0.jar
adding C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0\spatial\lib\spatial4j-0.4.1.jar
adding C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0\spatial\lucene-spatial-5.1.0.jar
adding C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0\suggest\lucene-suggest-5.1.0.jar
adding C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0\SYSTEM_REQUIREMENTS.txt
adding C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0\test-framework\lib\ant-1.8.2.jar
adding C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0\test-framework\lib\junit-4.10.jar
adding C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0\test-framework\lib\junit4-ant-2.1.12.jar
adding C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0\test-framework\lib\randomizedtesting-runner-2.1.12.ja
r
adding C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0\test-framework\lucene-test-framework-5.1.0.jar
48688 total milliseconds

After indexing with Lucene you can use the following code in order to search 
java -cp %LUCENE%\core\lucene-core-5.1.0.jar;%LUCENE%\demo\lucene-demo-5.1.0.jar;%LUCENE%\queryparser\lucene-queryparser-5.1.0.jar;%LUCENE%\analysis\common\lucene-analyzers-common-5.1.0.jar org.apache.lucene.demo.SearchFiles

C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0\demo>java -cp %LUCENE%\core\lucene-core-5.1.0.jar;%LUCENE%\d
emo\lucene-demo-5.1.0.jar;%LUCENE%\queryparser\lucene-queryparser-5.1.0.jar;%LUCENE%\analysis\common\lucene-an
alyzers-common-5.1.0.jar org.apache.lucene.demo.SearchFiles
Enter query:
test
Searching for: test
186 total matching documents
1. C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0\licenses\commons-codec-NOTICE.txt
2. C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0\analysis\README.txt
3. C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0\queryparser\docs\xml\README.htm
4. C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0\benchmark\README.enwiki
5. C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0\docs\benchmark\org\apache\lucene\benchmark\byTask\program
matic\package-summary.html
6. C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0\docs\test-framework\org\apache\lucene\util\BaseBitSetTest
Case.html
7. C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0\docs\test-framework\org\apache\lucene\util\package-summar
y.html
8. C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0\queryparser\docs\surround\README.txt
9. C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0\docs\classification\org\apache\lucene\classification\util
s\DatasetSplitter.html
10. C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0\docs\test-framework\overview-summary.html
Press (n)ext page, (q)uit or enter number to jump to a page.

Apache Lucene exception java.lang.NoClassDefFoundError: org/apache/lucene/analysis/standard/StandardAnalyze


C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0\demo>java -cp C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.
1.0\core\lucene-core-5.1.0.jar;C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0\demo\lucene-demo-5.1.0.jar or
g.apache.lucene.demo.IndexFiles C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0
Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/lucene/analysis/standard/StandardAnalyze
r
        at java.lang.Class.getDeclaredMethods0(Native Method)
        at java.lang.Class.privateGetDeclaredMethods(Unknown Source)
        at java.lang.Class.getMethod0(Unknown Source)
        at java.lang.Class.getMethod(Unknown Source)
        at sun.launcher.LauncherHelper.getMainMethod(Unknown Source)
        at sun.launcher.LauncherHelper.checkAndLoadMain(Unknown Source)
Caused by: java.lang.ClassNotFoundException: org.apache.lucene.analysis.standard.StandardAnalyzer
        at java.net.URLClassLoader$1.run(Unknown Source)
        at java.net.URLClassLoader$1.run(Unknown Source)
        at java.security.AccessController.doPrivileged(Native Method)
        at java.net.URLClassLoader.findClass(Unknown Source)
        at java.lang.ClassLoader.loadClass(Unknown Source)
        at sun.misc.Launcher$AppClassLoader.loadClass(Unknown Source)
        at java.lang.ClassLoader.loadClass(Unknown Source)
        ... 6 more

C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0\demo>

You should add the following jar to CLASSPATH
lucene-analyzers-common-5.1.0.jar
C:\Users\tunato\Desktop\lucene-5.1.0\lucene-5.1.0\analysis\common\lucene-analyzers-common-5.1.0.jar