其他分享
首页 > 其他分享> > 记一次springboot2.3.*项目整合elasticsearch7.6.2实现中文拼音分词搜索

记一次springboot2.3.*项目整合elasticsearch7.6.2实现中文拼音分词搜索

作者:互联网

一、elasticsearch官网下载:Elasticsearch 7.6.2 | Elastic

二、拼音、ik、繁简体转换插件安装

ik分词:GitHub - medcl/elasticsearch-analysis-ik: The IK Analysis plugin integrates Lucene IK analyzer into elasticsearch, support customized dictionary.

拼音分词:GitHub - medcl/elasticsearch-analysis-pinyin: This Pinyin Analysis plugin is used to do conversion between Chinese characters and Pinyin.

繁简体转换:GitHub - medcl/elasticsearch-analysis-stconvert: STConvert is analyzer that convert chinese characters between traditional and simplified.中文简繁體互相转换.

安装过程:从github上下载源码到本地,idea打开项目,修改对应项目中的pom.xml将

<elasticsearch.version>7.6.2</elasticsearch.version>修改为对应的elasticsearch版本

,alt+f12打开cmd命令界面,输入mvn install,项目编译成功后会在对应目录中生成对应zip包,效果如图:

将对应zip包解压到elasticsearch存放目录的plugins下:

然后启动elasticsearch.bat,

这样对应插件就算安装成功了 

三、springboot项目集成elasticsearch,pom.xml修改如下

<properties>
        
        <elasticsearch.version>7.6.2</elasticsearch.version>
      
        <remote-passwd>123456</remote-passwd>
 </properties> 

 <dependencies>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-data-elasticsearch</artifactId>
        </dependency>
</dependencies>

 spring-boot.yml新增elasticsearch配置:

spring:
    data:
        elasticsearch:
             cluster-name: elasticsearch
             cluster-nodes: 127.0.0.1:9300
             repositories:
                enabled: true

document查询对象:

 

import lombok.*;
import org.springframework.data.annotation.Id;
import org.springframework.data.elasticsearch.annotations.*;

import java.io.Serializable;

 

@Data
@Builder
@Setting(settingPath = "/json/esTest_setting.json")
@Mapping(mappingPath = "/json/esTest_mapping.json")
@Document(indexName = "chin_new_index")
 
public class EsTest implements Serializable {
    /**
     *
 
     使用步骤:
      一、首先使用下面的命令在elasticsearch中创建对应的索引、setting、mapping
            1.创建索引并设置setting
             postman put   http://localhost:9200/chin_new_index  json:xxxx_setting.json
             2.给索引设置mapping 7.x以后逇版本默认都是type为_doc 还需要上 include_type_name=true , 但是该属性会在8.x废弃  json: xxxx_mapping.json
             postman   put  http://localhost:9200/chin_test_index/_mapping/_doc?include_type_name=true
            3.如何查看对象的分词情况:postman  post   http://localhost:9200/chin_new_index/_doc/{你想查询对象的id}/_termvectors?fields={你的字段名字}

       二、启动项目 初始化elsaticsearch 将数据库中的 , 使用查询方法即可进行查询了




     *
     */
    @Id
    private String id ;

    
    private String title;

 
    private String content;

    public EsTest () {

    }

    public EsTest (String id, String title, String content) {
        this.id = id;
        this.title = title;
        this.content = content;
    }
 


}

 在src/main/sources目录下新建json文件夹存放setting、mapping

setting.json

{
  "settings": {
    "index": {
      "analysis": {
        "filter": {
          "edge_ngram_filter": {
            "type": "edge_ngram",
            "min_gram": 1,
            "max_gram": 50
          },
          "pinyin_simple_filter": {
            "type": "pinyin",
            "keep_separate_first_letter": true,
            "keep_full_pinyin": true,
            "keep_original": true,
            "limit_first_letter_length": 16,
            "lowercase": true,
            "remove_duplicated_term": true
          }
        },
        "char_filter": {
          "tsconvert": {
            "type": "stconvert",
            "convert_type": "t2s"
          }
        },
        "analyzer": {
          " ikSearchAnalyzer": {
            "type": "custom",
            "tokenizer": "ik_max_word",
            "char_filter": [
              "tsconvert"
            ]
          },
          "pinyinSimpleIndexAnalyzer": {
            "type": "custom",
            "tokenizer": "ik_max_word",
            "filter": [
              "pinyin_simple_filter",
              "edge_ngram_filter",
              "lowercase"
            ]
          }
        }
      }
    }
  }
}

mapping.json

{
  "properties": {
    "id": {
      "type": "text"
    },
    "content": {
      "type": "text",
      "analyzer": "ik_max_word",
      "search_analyzer": "ik_max_word",
      "fields": {
        "pinyin": {
          "type": "text",
          "analyzer": "pinyinSimpleIndexAnalyzer",
          "search_analyzer": "pinyinSimpleIndexAnalyzer"
        }
      }
    },
    "title": {
      "type": "text",
      "analyzer": "ik_max_word",
      "search_analyzer": "ik_max_word",
      "fields": {
        "pinyin": {
          "type": "text",
          "analyzer": "pinyinSimpleIndexAnalyzer",
          "search_analyzer": "pinyinSimpleIndexAnalyzer"
        }
      }
    }
  }
}

curd接口

@Repository
public interface EsTestRepository extends ElasticsearchRepository<EsTest, String> {




}

controller:

 
@RestController
@RequestMapping("testEs")
@Api(tags = "elasticsearch测试")
public class EsMaintenanceController {

    //这里用你自己的连接数据库的services用来取数据
    @Autowired
    private YourServices yourService;

    @Autowired
    private EsTestRepository searchService;

    @Autowired
    private ElasticsearchRestTemplate searchTemp;

 

 



        /**
         * @return   
         */
    @GetMapping("initEs")
    @ApiOperation(value = "初始化es", notes = "")
    public void save() {

        searchService.deleteAll();

        searchTemp.putMapping(EsMaintenance.class);

 
        List<SqlEntity> dataList =  yourService.selectAll();;

        List<EsTest> esList = new ArrayList<>();

        dataList.forEach(item ->{
            esList.add(new EsMaintenance(item.getId() , item.getName(), item.getContent()));
        });
         

        searchService.saveAll(esList);



    }

 


 
    @GetMapping("searchByKey")
    @ApiOperation(value = "根据关键字搜索", notes = "")
    public R searchByKey(@RequestParam String key ){
        /**
         * 不指定具体字段,在所有field中进行查询
         */
        //QueryStringQueryBuilder query = QueryBuilders.queryStringQuery(key);

        /**
         * 这里使用了多字段匹配方法,multiMatchQuery (String key , String... fieldName)
         * withPageable 采用 分页查询,  elasticsearch分页是从第0页开始的
         */

         /* MultiMatchQueryBuilder queryBuilder2 = QueryBuilders.multiMatchQuery(key, "title", "content", "title.pinyin" , "content.pinyin");

      NativeSearchQuery query = new NativeSearchQueryBuilder()
                .withQuery(    queryBuilder2   )
                .withPageable(PageRequest.of(0, 10)).build();*/


        DisMaxQueryBuilder disMaxQuery = QueryBuilders.disMaxQuery();
        MatchQueryBuilder title = QueryBuilders.matchQuery("title", key);
        MatchQueryBuilder content = QueryBuilders.matchQuery("content", key);
        MatchQueryBuilder pinyContent = QueryBuilders.matchQuery("content.pinyin", key);
        MatchQueryBuilder pinyTitle = QueryBuilders.matchQuery("title.pinyin", key);

        disMaxQuery.add(title);
        disMaxQuery.add(content);
        disMaxQuery.add(pinyContent);
        disMaxQuery.add(pinyTitle);

        NativeSearchQueryBuilder nativBuilder = new NativeSearchQueryBuilder().withQuery(disMaxQuery);
        NativeSearchQuery query = nativBuilder.build();


        //单个字段模糊匹配查询
        /*NativeSearchQuery query = new NativeSearchQueryBuilder().withQuery(QueryBuilders.matchQuery("title", key))
                .withQuery(QueryBuilders.matchQuery("content", key))
                .build();*/
        
        Iterable<EsMaintenance> search = searchService.search(query).getContent();
       JSON.toJSON(search));

        return R.ok().put("data", search);
    }


    @GetMapping("searchHighValue")
    @ApiOperation(value = "根据关键字搜索高亮显示", notes = "")
    public R searchHighValue(@RequestParam String key){
        //设置查询条件
        BoolQueryBuilder queryBuilder = QueryBuilders.boolQuery().should(QueryBuilders.matchQuery("title", key))
                .should(QueryBuilders.matchQuery("content", key))
                .should(QueryBuilders.matchQuery("title.pinyin", key))
                .should(QueryBuilders.matchQuery("content.pinyin", key));

        NativeSearchQuery query = new NativeSearchQueryBuilder()
                .withQuery(queryBuilder)
                //设定高亮显示字段
                .withHighlightFields(new HighlightBuilder.Field("title"), new HighlightBuilder.Field("content")
                ,new HighlightBuilder.Field("title.pinyin"), new HighlightBuilder.Field("content.pinyin")  )
                //设置高亮显示内容样式
                .withHighlightBuilder(new HighlightBuilder().preTags(" <p style='color:red'>").postTags("</p>"))
                .build();

        SearchHits<EsMaintenance> search = searchTemp.search(query, EsMaintenance.class);
        List<SearchHit<EsMaintenance>> datas = search.getSearchHits();
        ArrayList<EsMaintenance> returnList = new ArrayList<>();
        //处理高亮显示数据
        for (SearchHit<EsMaintenance> item : datas) {
            Map<String, List<String>> fields = item.getHighlightFields();
            item.getContent().setTitle(fields.get("title") == null ? item.getContent().getTitle() : fields.get("title").get(0));
            item.getContent().setContent(fields.get("content") == null ? item.getContent().getContent() : fields.get("content").get(0));
            item.getContent().setTitle(fields.get("title.pinyin") == null ? item.getContent().getTitle() : fields.get("title.pinyin").get(0));
            item.getContent().setContent(fields.get("content.pinyin") == null ? item.getContent().getContent() : fields.get("content.pinyin").get(0));
            returnList.add(item.getContent());
        }
        return R.ok().put("data", returnList);
    }




}

标签:title,elasticsearch7.6,pinyin,springboot2.3,content,item,elasticsearch,new,分词
来源: https://blog.csdn.net/limChin/article/details/120095701