0


elasticsearch 7.6.2 - 索引管理

小伙伴们,你们好呀,我是老寇,跟我一起学习es 7.6.2

注:请点击我,获取源码

一.创建索引及参数解析

1.创建索引 (语法及例子)

1).新建索引语法

PUT 索引名称
{
   "setting" : {  ... },
   "mapping" : { 
      "properties" : {
          "field1" : {
             "type" : "keyword"
          },
          "field2" : {
             "type" : "keyword"
          }
          ...
          ...
          ...
       }        
   }
}

2).新建消息索引例子

PUT msg_202203
{
    "settings": {
        "index": {
            "refresh_interval": "-1",
            "number_of_shards": "5",
            "analysis": {
                "filter": {
                    "laokou-remote-synonym": {
                        "dynamic_reload": "true",
                        "interval": "30000",
                        "type": "dynamic_synonym",
                        "synonyms_path": "http://localhost:9048/laokou-service/synonym"
                    },
                    "laokou-pinyin": {
                        "lowercase": "true",
                        "keep_original": "true",
                        "remove_duplicated_term": "true",
                        "keep_first_letter": "true",
                        "keep_separate_first_letter": "false",
                        "type": "pinyin",
                        "limit_first_letter_length": "16",
                        "keep_full_pinyin": "true"
                    }
                },
                "analyzer": {
                    "ik-search-pinyin": {
                        "filter": ["laokou-pinyin", "word_delimiter"],
                        "char_filter": ["html_strip"],
                        "type": "custom",
                        "tokenizer": "ik_smart"
                    },
                    "ik-search-synonym": {
                        "filter": ["laokou-remote-synonym", "lowercase", "asciifolding"],
                        "char_filter": ["html_strip"],
                        "type": "custom",
                        "tokenizer": "ik_smart"
                    },
                    "ik-index-synonym": {
                        "filter": ["laokou-remote-synonym", "lowercase", "asciifolding"],
                        "char_filter": ["html_strip"],
                        "type": "custom",
                        "tokenizer": "ik_max_word"
                    }
                }
            },
            "number_of_replicas": "1"
        }
    },
    "aliases": {
      "msg": {}
    }, 
    "mappings": {
        "dynamic": "true",
        "properties": {
            "sendId": {
                "type": "long"
            },
            "data": {
                "eager_global_ordinals": true,
                "search_analyzer": "ik-search-synonym",
                "fielddata": true,
                "analyzer": "ik-index-synonym",
                "boost": 100,
                "type": "text",
                "fields": {
                    "data-pinyin": {
                        "analyzer": "ik-search-pinyin",
                        "term_vector": "with_positions_offsets",
                        "boost": 100,
                        "type": "text"
                    }
                }
            },
            "type": {
                "type": "integer"
            },
      "remark":{
        "type": "keyword"
      },
            "fromId": {
                "type": "long"
            },
            "createDate": {
                "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis",
                "type": "date"
            },
            "username": {
                "type": "keyword"
            }
        }
    }
}

2.索引参数解析 (着重解析例子中参数含义)

1).setting

setting是索引的配置属性

文章参考:Elasticsearch 拼音分词器(上)

{    
    "settings": { #settings配置
        "index": {
            "refresh_interval": "-1", #表示索引刷新频率,频繁刷新索引会降低性能,一般设置为30s~60s;-1表示禁用刷新
            "number_of_shards": "5", #index分片数,需要注意的是es7.0默认索引分片调整为1
            "analysis": { #定制分词器,包含filter、analyzer
                "filter": {
                    "laokou-remote-synonym": { #同义词分词过滤器名称
                        "dynamic_reload": "true", #开启动态加载同义词库
                        "interval": "60", #同步频率,单位为秒
                        "type": "dynamic_synonym", #同义词分词
                        "synonyms_path": "http://localhost:9048/laokou-service/synonym" #远程同义词库地址
                    },
                    "laokou-pinyin": { #拼音分词过滤器名称
                        "lowercase": "true", #开启小写
                        "keep_original": "true", #是否保留原始输入 默认值false
                        "remove_duplicated_term": "true", #是否删除重复项保留索引,默认值false
                        "keep_first_letter": "true", #是否开启首字母匹配 例如:寇申海 > ksh 默认值true
                        "keep_separate_first_letter": "false", #保留第一个字母分开,例如:寇申海> k,s,h,默认:false
                        "type": "pinyin", #拼音分词
                        "limit_first_letter_length": "16", #设置first_letter结果的最大长度,默认值:16
                        "keep_full_pinyin": "true" #是否开启全拼 例如寇申海 > kou shen hai 默认值true
                    }
                },
                "analyzer": { #分词器配置
                    "ik-search-pinyin": { #自定义查询拼音分词器
                        "filter": ["laokou-pinyin", "word_delimiter"], #拼音过滤器,word_delimiter 词元分析器(将单词分为字词,例如koushenhai 分成kou shen hai)
                        "char_filter": ["html_strip"], #字符过滤器,html_filter字符过滤器可删除所有html标签,例如<p>
                        "type": "custom", #自定义
                        "tokenizer": "ik_smart" #ik分词中的简单分词器,支持自定义字典,远程字典
                    },
                    "ik-search-synonym": { #自定义查询同义词分词器
                        "filter": ["laokou-remote-synonym", "lowercase", "asciifolding"],  #同义词过滤器,单词小写过滤器,asciifolding词元过滤器(将不在前127个ASCII字符(“基本拉丁文”Unicode块)中的字母,数字和符号Unicode字符转换为ASCII等效项)
                        "char_filter": ["html_strip"], #字符过滤器,html_filter字符过滤器删除所有html标签
                        "type": "custom", #自定义
                        "tokenizer": "ik_smart" #ik分词的简单分词,支持自定义字典、远程字典
                    },
                    "ik-index-synonym": { #自定义索引同义词分词器
                        "filter": ["laokou-remote-synonym", "lowercase", "asciifolding"], #同义词过滤器,单词小写过滤器,asciifolding词元过滤器
                        "char_filter": ["html_strip"], #字符过滤器,html_filter字符过滤器删除所有html标签
                        "type": "custom", #自定义
                        "tokenizer": "ik_max_word" #ik分词中的最大分词器,支持自定义字典,远程字典,例如我是中国人,分成我是,中国,中国人,我是中国人
                    }
                }
            },
            "number_of_replicas": "1" #索引分片的备份数
        }
    }
}

2).mapping

mapping是指定索引存储文档的字段类型

"mappings": {
        "dynamic": "true", #动态加载
        "properties": { #属性
            "sendId": { 
                "type": "long"
            },
            "data": {
                "eager_global_ordinals": true, #开启后,每次refresh以后即可更新字典,字典常驻内存,减少查询时构建字典的耗时
                "search_analyzer": "ik-search-synonym", #查询时 ik-smart分词器
                "fielddata": true,
                "analyzer": "ik-index-synonym", #建立索引-ik-max分词器
                "boost": 100,
                "type": "text",
                "fields": {
                    "data-pinyin": { #因进行拼音查询,需要设置属性字段
                        "analyzer": "ik-search-pinyin", #自定义拼音查询分词器
                        "term_vector": "with_positions_offsets", #文档的统计信息
                        "boost": 100, #权重
                        "type": "text" 
                    }
                }
            },
            "type": {
                "type": "integer"
            },
            "remaker":{
                "type": "keyword"
            }
            "fromId": {
                "type": "long"
            },
            "createDate": {
                "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis", #存入date类型,需要进行格式化
                "type": "date"
            },
            "username": {
                "type": "keyword"
            }
        }
    }

3).aliases

aliases是给索引取别名

aliases:{
  "msg":{} #别名名称
}

4).执行命令(使用kibana执行,没有就安装kibana)

补充: 获取远程同义词库核心代码(详细代码已上传到码云)


@RestController
@RequestMapping("/synonym")
public class SynonymController {

    private static final Logger log = LoggerFactory.getLogger(SynonymController.class);

    @Autowired
    private SynonymDao synonymDao;

    /**
     * SimpleDateFormat线程不安全
     */
    private static final ThreadLocal<DateFormat> df = ThreadLocal.withInitial(() -> new SimpleDateFormat(DateUtil.DATE_TIME));

    @GetMapping
    @CrossOrigin
    public String text(HttpServletRequest request, HttpServletResponse response) {
        String result = "";
        String eTag = request.getHeader("If-None-Match");
        String modified = request.getHeader("If-Modified-Since");
        String currentDate = df.get().format(new Date());
        List<SynonymEntity> list = synonymDao.selectList(new QueryWrapper<>());
        if (CollectionUtils.isEmpty(list)) {
            return null;
        }
        List<String> valueList = list.stream().map(SynonymEntity::getValue).collect(Collectors.toList());
        log.info("加载ik同义词,上次同义词:{},上次修改时间:{},当前日期:{}",eTag,modified,currentDate);
        if(!valueList.isEmpty()) {
            // 获取数据库同义词
            StringBuilder words = new StringBuilder();
            for (String synonym : valueList) {
                // 可以使用“=>”方式或者直接“,”分割形式,自行考虑应用场景
                words.append(synonym);
                words.append("\n");
            }
            modified = currentDate;
            result = words.toString();
        }
        //更新时间
        response.setHeader("Last-Modified", modified);
        response.setHeader("ETag", synonymDao.selectCount(new QueryWrapper<>()).toString());
        response.setHeader("Content-Type", "text/plain");
        return result;
    }

}
标签: elasticsearch

本文转载自: https://blog.csdn.net/qq_39893313/article/details/123311487
版权归原作者 k↑ 所有, 如有侵权,请联系我们删除。

“elasticsearch 7.6.2 - 索引管理”的评论:

还没有评论