lucene学习-3 - 代码重构

发布于 2019-09-26 作者 风铃 369次 浏览 版块 前端

内容就是标题了。是要重构下上一节的代码,大体上按如下的思路:

  • 功能拆分;
  • 创建必要的工具类;

两个工具类StringUtils和TxtUtils。

StringUtils,主要是获取当前系统的换行符:

package com.zhyea.util;
                
public class StringUtils {
	public static final String NEWLINE = System.getProperty("line.separator");
}

TxtUtils,主要是读取txt文件,这里使用了一个自定义类FileCharsetDetector,可以点击这个超链接:

package com.zhyea.util;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;

/**
 * txt文件处理工具类
 * 
 * @author robin
 * 
 */
public class TxtUtils {

    /**
     * 检查txt文件编码格式
     * 
     * @param file
     *            txt文件对象
     * @return
     * @throws IOException
     */
    public static String checkEncode(File file) throws IOException {
        String encode = FileCharsetDetector.checkEncoding(file);
        return (encode.equals("windows-1252") ? "Unicode" : encode);
    }

    /**
     * 读取txt文件内容
     * 
     * @param file
     *            Txt文件对象
     * @return
     * @throws IOException
     */
    public static String readTxt(File file) throws IOException {
        BufferedReader reader = null;
        try {
            String encode = checkEncode(file);
            reader = new BufferedReader(new InputStreamReader(
                    new FileInputStream(file), encode));
            StringBuilder builder = new StringBuilder();
            String content = null;
            while (null != (content = reader.readLine())) {
                builder.append(content).append(StringUtils.NEWLINE);
            }
            return builder.toString();
        } finally {
            reader.close();
        }
    }

}

然后是拆分后的Lucene操作类:

package com.zhyea.doggie;

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

import com.zhyea.util.TxtUtils;

public class DoggieLucene {

    /**
     * 分词器
     */
    private static Analyzer analyzer;

    /**
     * 创建分词器实例
     * 
     * @param clazz
     *            创建分词器使用的类
     * @return
     * @throws InstantiationException
     * @throws IllegalAccessException
     */
    public static Analyzer createAnalyzer(Class<?> clazz)
            throws InstantiationException, IllegalAccessException {
        if (null != analyzer && analyzer.getClass().equals(clazz)) {
            return analyzer;
        }
        return analyzer = (Analyzer) clazz.newInstance();
    }

    /**
     * 创建索引写出器
     * 
     * @param analyzer
     *            分词器
     * @param indexPath
     *            索引存储路径
     * @return
     * @throws IOException
     */
    public static IndexWriter createIndexWriter(Analyzer analyzer,
            String indexPath) throws IOException {
        // 创建索引存储目录
        Directory dir = FSDirectory.open(new File(indexPath));
        // 创建索引写入器配置
        IndexWriterConfig config = new IndexWriterConfig(Version.LATEST,
                analyzer);
        // 创建索引写入器
        return new IndexWriter(dir, config);
    }

    /**
     * 写入索引,索引文件为本地文本文件
     * 
     * @param writer
     *            索引写出器
     * @param localDocPath
     *            本地文本文件存储地址
     * @throws IOException
     */
    public static void addLocalDocument(IndexWriter writer, String localDocPath)
            throws IOException {
        File directory = new File(localDocPath);
        for (File tmp : directory.listFiles()) {
            Document doc = new Document();
            doc.add(new StringField("path", tmp.getCanonicalPath(),
                    Field.Store.YES));
            doc.add(new TextField("content", TxtUtils.readTxt(tmp),
                    Field.Store.YES));
            writer.addDocument(doc);
            writer.commit();
        }
    }

    /**
     * 创建索引写入器
     * 
     * @param indexPath
     *            索引存储路径
     * @return
     * @throws IOException
     */
    public static IndexReader createIndexReader(String indexPath)
            throws IOException {
        return DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    }

    /**
     * 创建索引搜索器
     * 
     * @param reader
     *            索引写入器
     * @return
     */
    public static IndexSearcher createIndexSearcher(IndexReader reader) {
        return new IndexSearcher(reader);
    }

    /**
     * 执行搜索
     * 
     * @param searcher
     *            搜索器
     * @param target
     *            搜索对象
     * @return
     * @throws IOException
     */
    public static TopDocs executeSearch(IndexSearcher searcher, Query query)
            throws IOException {
        return searcher.search(query, 10000);
    }

    /**
     * 展示查询结果
     * 
     * @param docs
     *            查询结果文档
     * @throws IOException
     */
    public static void showResult(TopDocs docs, IndexReader reader)
            throws IOException {
        Document doc = null;
        for (ScoreDoc tmp : docs.scoreDocs) {
            doc = reader.document(tmp.doc);
            System.out.println(tmp.score + "  " + doc.get("path"));
            // System.out.println(doc.getField("path").stringValue());
        }
    }
}

拆的比较琐碎了,凑合看吧。

创建索引的类:

package com.zhyea.doggie;

import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.index.IndexWriter;

import com.zhyea.util.FileUtil;

public class IndexTest{
    
    String indexPath = "D:\\\\aqsiqDevelop\\\\workspace3\\\\doggie\\\\WebContent\\\\index";
    String docPath = "D:\\\\aqsiqDevelop\\\\workspace3\\\\doggie\\\\WebContent\\\\docs";
    
    public static void main(String[] args){
        try{
            new IndexTest().createIndex();
        }catch(Exception e){
            e.printStackTrace();
        }
    }
    
    /**
     * 创建索引
     * @throws IOException
     * @throws InstantiationException
     * @throws IllegalAccessException
     */
    private void createIndex() throws IOException, 
                                      InstantiationException, 
                                      IllegalAccessException{
        IndexWriter writer = null;
        try{
            Analyzer analyzer = DoggieLucene.createAnalyzer(SmartChineseAnalyzer.class);
            writer = DoggieLucene.createIndexWriter(analyzer, indexPath);
            DoggieLucene.addLocalDocument(writer, docPath);
        }finally{
            if(null!=writer)writer.close();
        }
    }
}

执行搜索的类:

package com.zhyea.doggie;

import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;

public class SearchTest {

    String indexPath = "D:\\\\aqsiqDevelop\\\\workspace3\\\\doggie\\\\WebContent\\\\index";
    
    public static void main(String[] args){
        try{
            new SearchTest().executeSearch();
        }catch(Exception e){
            e.printStackTrace();
        }
    }
    
    public void executeSearch() throws ParseException, 
                                       IOException, 
                                       InstantiationException, 
                                       IllegalAccessException{
        IndexReader reader = null;
        try{
            reader = DoggieLucene.createIndexReader(indexPath);
            IndexSearcher searcher = DoggieLucene.createIndexSearcher(reader);
            Analyzer analyzer = DoggieLucene.createAnalyzer(SmartChineseAnalyzer.class);
            Query query = new QueryParser("content", analyzer).parse("杨过");
            TopDocs docs = DoggieLucene.executeSearch(searcher, query);
            DoggieLucene.showResult(docs, reader);
        }finally{
            if(null!=reader)reader.close();
        }
    }
}

OK。

收藏
暂无回复