0


SpringBoot2.3.x整合ElasticSearch7.6.2 实现PDF,WORD全文检索

文章目录

1、下载安装,只下载elasticSearch、Kibana即可

  • 下载安装参考Springboot/Springcloud整合ELK平台,(Filebeat方式)日志采集及管理(Elasticsearch+Logstash+Filebeat+Kibana)
  • elastic中文社区 下载地址

这里我使用7.6.2的elasticsearch版本, 因为项目使用的springboot2.3.x,避免低版本客户端,高版本索引库·,这里我先退回使用低版本索引库
在这里插入图片描述
在这里插入图片描述

插件安装

  • ik 分词器

在这里插入图片描述

  • ingest-attachment 这里将链接修改为自己的版本即可

插件下载完成之后,将压缩包解压到 elasticsearch的plugins目录, 之后重启elasticsearch
在这里插入图片描述
在这里插入图片描述

定义文本抽取管道

PUT/_ingest/pipeline/attachment
{"description":"Extract attachment information","processors":[{"attachment":{"field":"data","indexed_chars":-1,"ignore_missing":true}},{"remove":{"field":"data"}}]}

2、SpringBoot整合ElasticSearch

<dependencies><dependency><groupId>org.springframework.boot</groupId><artifactId>spring-boot-starter-web</artifactId></dependency><dependency><groupId>org.springframework.boot</groupId><artifactId>spring-boot-starter-test</artifactId></dependency><dependency><groupId>org.springframework.boot</groupId><artifactId>spring-boot-starter-data-elasticsearch</artifactId></dependency><dependency><groupId>com.alibaba</groupId><artifactId>fastjson</artifactId><version>1.2.58</version></dependency><dependency><groupId>org.projectlombok</groupId><artifactId>lombok</artifactId><version>1.18.20</version></dependency></dependencies>

application.yml

server:port:9090spring:application:name: elasticsearch-service
  elasticsearch:rest:uris: http://127.0.0.1:9200

实体类

packagetop.fate.entity;importlombok.Data;importorg.springframework.data.elasticsearch.annotations.Document;importorg.springframework.data.elasticsearch.annotations.Field;importorg.springframework.data.elasticsearch.annotations.FieldType;/**
 * @auther:Wangxl
 * @Emile:[email protected]
 * @Time:2020/11/2 14:15
 */@Data@Document(indexName ="filedata")publicclassFileData{@Field(type =FieldType.Keyword)privateString filePk;@Field(type =FieldType.Keyword)privateString fileName;@Field(type =FieldType.Keyword)privateInteger page;@Field(type =FieldType.Keyword)privateString departmentId;@Field(type =FieldType.Keyword)privateString ljdm;@Field(type =FieldType.Text, analyzer ="ik_max_word")privateString data;@Field(type =FieldType.Keyword)privateString realName;@Field(type =FieldType.Keyword)privateString url;@Field(type =FieldType.Keyword)privateString type;}

接口类

packagetop.fate.controller;importcom.alibaba.fastjson.JSON;importorg.elasticsearch.action.index.IndexRequest;importorg.elasticsearch.action.index.IndexResponse;importorg.elasticsearch.action.search.SearchRequest;importorg.elasticsearch.action.search.SearchResponse;importorg.elasticsearch.client.RequestOptions;importorg.elasticsearch.client.RestHighLevelClient;importorg.elasticsearch.common.text.Text;importorg.elasticsearch.common.xcontent.XContentType;importorg.elasticsearch.index.query.QueryBuilders;importorg.elasticsearch.search.SearchHit;importorg.elasticsearch.search.builder.SearchSourceBuilder;importorg.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;importorg.elasticsearch.search.fetch.subphase.highlight.HighlightField;importorg.springframework.beans.factory.annotation.Autowired;importorg.springframework.data.elasticsearch.core.ElasticsearchRestTemplate;importorg.springframework.data.elasticsearch.core.IndexOperations;importorg.springframework.data.elasticsearch.core.document.Document;importorg.springframework.data.elasticsearch.core.mapping.IndexCoordinates;importorg.springframework.util.Base64Utils;importorg.springframework.web.bind.annotation.GetMapping;importorg.springframework.web.bind.annotation.RequestMapping;importorg.springframework.web.bind.annotation.RequestParam;importorg.springframework.web.bind.annotation.RestController;importtop.fate.entity.FileData;importjava.io.File;importjava.io.FileInputStream;importjava.lang.reflect.Method;importjava.util.ArrayList;importjava.util.Iterator;importjava.util.List;importjava.util.Map;/**
 * @auther:Wangxl
 * @Emile:[email protected]
 * @Time:2022/6/1 16:33
 */@RestController@RequestMapping(value ="fullTextSearch")publicclassFullTextSearchController{@AutowiredprivateElasticsearchRestTemplate elasticsearchRestTemplate;@AutowiredprivateRestHighLevelClient restHighLevelClient;@GetMapping("createIndex")publicvoidadd(){IndexOperations indexOperations = elasticsearchRestTemplate.indexOps(IndexCoordinates.of("testindex"));
        indexOperations.create();Document mapping = indexOperations.createMapping(FileData.class);
        indexOperations.putMapping(mapping);}@GetMapping("deleteIndex")publicvoiddeleteIndex(){IndexOperations indexOperations = elasticsearchRestTemplate.indexOps(FileData.class);
        indexOperations.delete();}@GetMapping("uploadFileToEs")publicvoiduploadFileToEs(){try{//            File file = new File("D:\\desktop\\Java开发工程师-4年-王晓龙-2022-05.pdf");File file =newFile("D:\\desktop\\Java开发工程师-4年-王晓龙-2022-05.docx");FileInputStream inputFile =newFileInputStream(file);byte[] buffer =newbyte[(int)file.length()];
            inputFile.read(buffer);
            inputFile.close();//将文件转成base64编码String fileString =Base64Utils.encodeToString(buffer);FileData fileData =newFileData();
            fileData.setFileName(file.getName());
            fileData.setFilePk(file.getName());
            fileData.setData(fileString);IndexRequest indexRequest =newIndexRequest("testindex").id(fileData.getFilePk());
            indexRequest.source(JSON.toJSONString(fileData),XContentType.JSON);
            indexRequest.setPipeline("attachment");IndexResponse index = restHighLevelClient.index(indexRequest,RequestOptions.DEFAULT);return;}catch(Exception e){
            e.printStackTrace();}}@GetMapping("search")publicObjectsearch(@RequestParam("txt")String txt){List list =newArrayList();try{SearchRequest searchRequest =newSearchRequest("testindex");SearchSourceBuilder builder =newSearchSourceBuilder();

            builder.query(QueryBuilders.matchQuery("attachment.content",txt).analyzer("ik_max_word"));

            searchRequest.source(builder);// 返回实际命中数
            builder.trackTotalHits(true);//高亮HighlightBuilder highlightBuilder =newHighlightBuilder();
            highlightBuilder.field("attachment.content");
            highlightBuilder.requireFieldMatch(false);//多个高亮关闭
            highlightBuilder.preTags("<span style='color:red'>");
            highlightBuilder.postTags("</span>");
            builder.highlighter(highlightBuilder);SearchResponse search = restHighLevelClient.search(searchRequest,RequestOptions.DEFAULT);if(search.getHits()!=null){for(SearchHit documentFields : search.getHits().getHits()){Map<String,HighlightField> highlightFields = documentFields.getHighlightFields();HighlightField title = highlightFields.get("attachment.content");Map<String,Object> sourceAsMap = documentFields.getSourceAsMap();if(title !=null){Text[] fragments = title.fragments();String n_title ="";for(Text fragment : fragments){
                            n_title += fragment;}
                        sourceAsMap.put("data", n_title);}
                    list.add(dealObject(sourceAsMap,FileData.class));}}}catch(Exception e){
            e.printStackTrace();}return list;}/*public static void ignoreSource(Map<String, Object> map) {
        for (String key : IGNORE_KEY) {
            map.remove(key);
        }
    }*/publicstatic<T>TdealObject(Map<String,Object> sourceAsMap,Class<T> clazz){try{//            ignoreSource(sourceAsMap);Iterator<String> keyIterator = sourceAsMap.keySet().iterator();T t = clazz.newInstance();while(keyIterator.hasNext()){String key = keyIterator.next();String replaceKey = key.replaceFirst(key.substring(0,1), key.substring(0,1).toUpperCase());Method method =null;try{
                    method = clazz.getMethod("set"+ replaceKey, sourceAsMap.get(key).getClass());}catch(NoSuchMethodException e){continue;}
                method.invoke(t, sourceAsMap.get(key));}return t;}catch(Exception e){
            e.printStackTrace();}returnnull;}}

测试

创建索引

 localhost:9090/fullTextSearch/createIndex

在这里插入图片描述

上传文档

localhost:9090/fullTextSearch/uploadFileToEs

在这里插入图片描述

搜索

localhost:9090/fullTextSearch/search?txt=索引库

在这里插入图片描述


本文转载自: https://blog.csdn.net/weixin_43627706/article/details/125080637
版权归原作者 fate急速出击 所有, 如有侵权,请联系我们删除。

“SpringBoot2.3.x整合ElasticSearch7.6.2 实现PDF,WORD全文检索”的评论:

还没有评论