Lucene(2):Springboot整合全文检索引擎TermInSetQuery应用实例附源码

发布于:2024-11-27 ⋅ 阅读:(6) ⋅ 点赞:(0)

前言

本章代码已分享至Gitee: https://gitee.com/lengcz/springbootlucene01

接上文。Lucene(1):Springboot整合全文检索引擎Lucene常规入门附源码

如何在指定范围内查询。从lucene 7 开始,filter 被弃用,导致无法进行调节过滤。

TermInSetQuery 指定集合条件过滤

如图,想要设定fromType为CSDN和小米,不需要查询其他来源的文字该怎么办?
在这里插入图片描述

前文提到的TermRangeQuery 属于数值范围的条件,这里显然不适用。

TermRangeQuery query2  = new TermRangeQuery("id", new BytesRef("1001".getBytes()), new BytesRef("1005".getBytes()), true, true);
        builder.add(query2, BooleanClause.Occur.MUST);

我们需要使用TermInSetQuery

        List<BytesRef> bytesRefList = Arrays.asList(new BytesRef("CSDN".getBytes()),new BytesRef("小米".getBytes()));
        TermInSetQuery query3 = new TermInSetQuery("fromType",bytesRefList);
        builder.add(query3, BooleanClause.Occur.MUST);

多关键词在多字段中搜索

//多条件查询构造
        BooleanQuery.Builder builder = new BooleanQuery.Builder();

//        // 条件一
//        MultiFieldQueryParser parser = new MultiFieldQueryParser(str, new IKAnalyzer());
        // 创建查询对象
//        Query query = parser.parse(text);
//        builder.add(query, BooleanClause.Occur.MUST);

        BooleanQuery.Builder builder2 = new BooleanQuery.Builder();//这里很重要,必须单独构建一个query,相当于预设一个括号,把几个关键词放到括号里
        for (String key : text.split(",")) {
            String fields[] = {"title", "description"};//在标题和描述中搜索
            String kws[] = {key, key};
            BooleanClause.Occur[] flags = new BooleanClause.Occur[]{BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD};
            Query queryKey = MultiFieldQueryParser.parse(kws, fields, flags, new IKAnalyzer()); //通常就是关键词搜索
            if (rule.equals("and")) { // and 或者 or
                builder2.add(queryKey, BooleanClause.Occur.MUST); //相当于各关键词之间的关系是AND
            } else {
                builder2.add(queryKey, BooleanClause.Occur.SHOULD); /// 相当于各关键词之间的关系是OR
            }
        }
        builder.add(builder2.build(), BooleanClause.Occur.MUST);

完整示例

 /**
     *
     * @param text  关键词,多关键词逗号分割
     * @param rule 规则, 多关键词之间的关系是and 还是or
     * @return
     * @throws IOException
     * @throws ParseException
     * @throws InvalidTokenOffsetsException
     */
    @GetMapping("/searchTextMoreParam")
    public List<BlogTitle> searchTextMoreParam(String text,String rule) throws IOException, ParseException, InvalidTokenOffsetsException {
        String[] str = {"title", "description"};
        Directory directory = FSDirectory.open(FileSystems.getDefault().getPath("d:\\indexDir"));
        // 索引读取工具
        IndexReader reader = DirectoryReader.open(directory);
        // 索引搜索工具
        IndexSearcher searcher = new IndexSearcher(reader);

        //多条件查询构造
        BooleanQuery.Builder builder = new BooleanQuery.Builder();

//        // 条件一
//        MultiFieldQueryParser parser = new MultiFieldQueryParser(str, new IKAnalyzer());
        // 创建查询对象
//        Query query = parser.parse(text);
//        builder.add(query, BooleanClause.Occur.MUST);

        BooleanQuery.Builder builder2 = new BooleanQuery.Builder();//这里很重要,必须单独构建一个query,相当于预设一个括号,把几个关键词放到括号里
        for (String key : text.split(",")) {
            String fields[] = {"title", "description"};
            String kws[] = {key, key};
            BooleanClause.Occur[] flags = new BooleanClause.Occur[]{BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD};
            Query queryKey = MultiFieldQueryParser.parse(kws, fields, flags, new IKAnalyzer()); //通常就是关键词搜索
            if (rule.equals("and")) { //
                builder2.add(queryKey, BooleanClause.Occur.MUST); //相当于各关键词之间的关系是AND
            } else {
                builder2.add(queryKey, BooleanClause.Occur.SHOULD); /// 相当于各关键词之间的关系是OR
            }
        }
        builder.add(builder2.build(), BooleanClause.Occur.MUST);
        // 条件二
        // TermQuery不使用分析器所以建议匹配不分词的Field域(StringField, )查询,比如价格、分类ID号等。这里只能演示个ID了。。。
//        Query termQuery = new TermQuery(new Term("id", "1001"));
//        builder.add(termQuery, BooleanClause.Occur.MUST);

//        TermRangeQuery query2  = new TermRangeQuery("id", new BytesRef("1001".getBytes()), new BytesRef("1005".getBytes()), true, true);
//        builder.add(query2, BooleanClause.Occur.MUST);

        List<BytesRef> bytesRefList = Arrays.asList(new BytesRef("CSDN".getBytes()),new BytesRef("小米".getBytes()));
        TermInSetQuery query3 = new TermInSetQuery("fromType",bytesRefList);
        builder.add(query3, BooleanClause.Occur.MUST);

        // 获取前十条记录
        TopDocs topDocs = searcher.search(builder.build(), 100);
        // 获取总条数
        log.info("本次搜索共找到" + topDocs.totalHits + "条数据");
        //高亮显示
        SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<span style='color:red'>", "</span>");
        Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(builder2.build()));//高亮只是关键词,其他属于过滤条件
        //高亮后的段落范围在100字内
        Fragmenter fragmenter = new SimpleFragmenter(100);
        highlighter.setTextFragmenter(fragmenter);

        // 获取得分文档对象(ScoreDoc)数组.SocreDoc中包含:文档的编号、文档的得分
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        List<BlogTitle> list = new ArrayList<>();
        for (ScoreDoc scoreDoc : scoreDocs) {
            // 取出文档编号
            int docId = scoreDoc.doc;
            // 根据编号去找文档
            Document doc = reader.document(docId);
            BlogTitle content = selectById(doc.get("id"));
            //处理高亮字段显示
            String title = highlighter.getBestFragment(new IKAnalyzer(), "title", doc.get("title"));
            if (title == null) {
                title = content.getTitle();
            }
            String description = highlighter.getBestFragment(new IKAnalyzer(), "description", content.getDescription());
            content.setDescription(description);
            content.setTitle(title);
            list.add(content);
        }
        return list;
    }