Lucene note 26 - using Lucene - Custom QueryParser to solve some query performance problems

Keywords: Big Data Apache Java

I. reasons for using custom QueryParser

  1. For some query parsers (fuzzy query, wild card query) the performance will be reduced when querying, so consider canceling these queries.
  2. In the specific query, there is probably a requirement: the obtained query range is a number, so the original QueryParser must be expanded to carry out.

II. Implementation method

Customize QueryParser class, inherit QueryParser class, rewrite response method, and implement its own logic. Here's an example: disable fuzzy query and wildcard query. When fuzzy query or wildcard query occurs, an exception will be thrown directly to prompt the user.

III. implementation code

package com.wsy;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

import java.io.File;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;

public class CustomParser extends QueryParser {
    private static Directory directory;
    private static IndexReader indexReader;
    private static Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35);

    static {
        try {
            directory = FSDirectory.open(new File("E:\\Lucene\\IndexLibrary"));
            indexReader = IndexReader.open(directory);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public CustomParser(Version matchVersion, String f, Analyzer a) {
        super(matchVersion, f, a);
    }

    @Override
    protected Query getWildcardQuery(String field, String termStr) throws ParseException {
        throw new ParseException("Wildcard Queries have been disabled for performance reasons. Please enter more precise information to query");
    }

    @Override
    protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException {
        throw new ParseException("Due to performance reasons, fuzzy query has been disabled. Please enter more accurate information for query");
    }

    public void searchByQuery(String value) {
        try {
            CustomParser customParser = new CustomParser(Version.LUCENE_35, "content", analyzer);
            Query query = customParser.parse(value);
            IndexSearcher indexSearcher = new IndexSearcher(indexReader);
            TopDocs topDocs = indexSearcher.search(query, 100);
            ScoreDoc[] scoreDocs = topDocs.scoreDocs;
            SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
            for (ScoreDoc scoreDoc : scoreDocs) {
                Document document = indexSearcher.doc(scoreDoc.doc);
                // Output document id, document score, document name, document path, document size and document modification time in sequence
                System.out.println(scoreDoc.doc + "-->" + scoreDoc.score + "-->" + document.get("fileName") + "-->" + document.get("path") + "-->" + document.get("size") + "-->" + simpleDateFormat.format(new Date(Long.valueOf(document.get("date")))));
            }
            indexSearcher.close();
        } catch (IOException e) {
            e.printStackTrace();
        } catch (ParseException e) {
            System.err.println(e.getMessage());
        }
    }

    public static void main(String[] args) {
        CustomParser customParser = new CustomParser(Version.LUCENE_35, "content", analyzer);
        // Fuzzy query
        customParser.searchByQuery("java~");
        // Wildcard query
        customParser.searchByQuery("*java");
    }
}

 

Posted by CorkyMcDoogle on Thu, 12 Dec 2019 13:46:11 -0800