小伙伴们,你们好呀,我是老寇,跟我一起学习es 7.6.2
注:请点击我,获取源码
一.创建索引及参数解析
1.创建索引 (语法及例子)
1).新建索引语法
PUT 索引名称
{
"setting" : { ... },
"mapping" : {
"properties" : {
"field1" : {
"type" : "keyword"
},
"field2" : {
"type" : "keyword"
}
...
...
...
}
}
}
2).新建消息索引例子
PUT msg_202203
{
"settings": {
"index": {
"refresh_interval": "-1",
"number_of_shards": "5",
"analysis": {
"filter": {
"laokou-remote-synonym": {
"dynamic_reload": "true",
"interval": "30000",
"type": "dynamic_synonym",
"synonyms_path": "http://localhost:9048/laokou-service/synonym"
},
"laokou-pinyin": {
"lowercase": "true",
"keep_original": "true",
"remove_duplicated_term": "true",
"keep_first_letter": "true",
"keep_separate_first_letter": "false",
"type": "pinyin",
"limit_first_letter_length": "16",
"keep_full_pinyin": "true"
}
},
"analyzer": {
"ik-search-pinyin": {
"filter": ["laokou-pinyin", "word_delimiter"],
"char_filter": ["html_strip"],
"type": "custom",
"tokenizer": "ik_smart"
},
"ik-search-synonym": {
"filter": ["laokou-remote-synonym", "lowercase", "asciifolding"],
"char_filter": ["html_strip"],
"type": "custom",
"tokenizer": "ik_smart"
},
"ik-index-synonym": {
"filter": ["laokou-remote-synonym", "lowercase", "asciifolding"],
"char_filter": ["html_strip"],
"type": "custom",
"tokenizer": "ik_max_word"
}
}
},
"number_of_replicas": "1"
}
},
"aliases": {
"msg": {}
},
"mappings": {
"dynamic": "true",
"properties": {
"sendId": {
"type": "long"
},
"data": {
"eager_global_ordinals": true,
"search_analyzer": "ik-search-synonym",
"fielddata": true,
"analyzer": "ik-index-synonym",
"boost": 100,
"type": "text",
"fields": {
"data-pinyin": {
"analyzer": "ik-search-pinyin",
"term_vector": "with_positions_offsets",
"boost": 100,
"type": "text"
}
}
},
"type": {
"type": "integer"
},
"remark":{
"type": "keyword"
},
"fromId": {
"type": "long"
},
"createDate": {
"format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis",
"type": "date"
},
"username": {
"type": "keyword"
}
}
}
}
2.索引参数解析 (着重解析例子中参数含义)
1).setting
setting是索引的配置属性
文章参考:Elasticsearch 拼音分词器(上)
{
"settings": { #settings配置
"index": {
"refresh_interval": "-1", #表示索引刷新频率,频繁刷新索引会降低性能,一般设置为30s~60s;-1表示禁用刷新
"number_of_shards": "5", #index分片数,需要注意的是es7.0默认索引分片调整为1
"analysis": { #定制分词器,包含filter、analyzer
"filter": {
"laokou-remote-synonym": { #同义词分词过滤器名称
"dynamic_reload": "true", #开启动态加载同义词库
"interval": "60", #同步频率,单位为秒
"type": "dynamic_synonym", #同义词分词
"synonyms_path": "http://localhost:9048/laokou-service/synonym" #远程同义词库地址
},
"laokou-pinyin": { #拼音分词过滤器名称
"lowercase": "true", #开启小写
"keep_original": "true", #是否保留原始输入 默认值false
"remove_duplicated_term": "true", #是否删除重复项保留索引,默认值false
"keep_first_letter": "true", #是否开启首字母匹配 例如:寇申海 > ksh 默认值true
"keep_separate_first_letter": "false", #保留第一个字母分开,例如:寇申海> k,s,h,默认:false
"type": "pinyin", #拼音分词
"limit_first_letter_length": "16", #设置first_letter结果的最大长度,默认值:16
"keep_full_pinyin": "true" #是否开启全拼 例如寇申海 > kou shen hai 默认值true
}
},
"analyzer": { #分词器配置
"ik-search-pinyin": { #自定义查询拼音分词器
"filter": ["laokou-pinyin", "word_delimiter"], #拼音过滤器,word_delimiter 词元分析器(将单词分为字词,例如koushenhai 分成kou shen hai)
"char_filter": ["html_strip"], #字符过滤器,html_filter字符过滤器可删除所有html标签,例如<p>
"type": "custom", #自定义
"tokenizer": "ik_smart" #ik分词中的简单分词器,支持自定义字典,远程字典
},
"ik-search-synonym": { #自定义查询同义词分词器
"filter": ["laokou-remote-synonym", "lowercase", "asciifolding"], #同义词过滤器,单词小写过滤器,asciifolding词元过滤器(将不在前127个ASCII字符(“基本拉丁文”Unicode块)中的字母,数字和符号Unicode字符转换为ASCII等效项)
"char_filter": ["html_strip"], #字符过滤器,html_filter字符过滤器删除所有html标签
"type": "custom", #自定义
"tokenizer": "ik_smart" #ik分词的简单分词,支持自定义字典、远程字典
},
"ik-index-synonym": { #自定义索引同义词分词器
"filter": ["laokou-remote-synonym", "lowercase", "asciifolding"], #同义词过滤器,单词小写过滤器,asciifolding词元过滤器
"char_filter": ["html_strip"], #字符过滤器,html_filter字符过滤器删除所有html标签
"type": "custom", #自定义
"tokenizer": "ik_max_word" #ik分词中的最大分词器,支持自定义字典,远程字典,例如我是中国人,分成我是,中国,中国人,我是中国人
}
}
},
"number_of_replicas": "1" #索引分片的备份数
}
}
}
2).mapping
mapping是指定索引存储文档的字段类型
"mappings": {
"dynamic": "true", #动态加载
"properties": { #属性
"sendId": {
"type": "long"
},
"data": {
"eager_global_ordinals": true, #开启后,每次refresh以后即可更新字典,字典常驻内存,减少查询时构建字典的耗时
"search_analyzer": "ik-search-synonym", #查询时 ik-smart分词器
"fielddata": true,
"analyzer": "ik-index-synonym", #建立索引-ik-max分词器
"boost": 100,
"type": "text",
"fields": {
"data-pinyin": { #因进行拼音查询,需要设置属性字段
"analyzer": "ik-search-pinyin", #自定义拼音查询分词器
"term_vector": "with_positions_offsets", #文档的统计信息
"boost": 100, #权重
"type": "text"
}
}
},
"type": {
"type": "integer"
},
"remaker":{
"type": "keyword"
}
"fromId": {
"type": "long"
},
"createDate": {
"format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis", #存入date类型,需要进行格式化
"type": "date"
},
"username": {
"type": "keyword"
}
}
}
3).aliases
aliases是给索引取别名
aliases:{
"msg":{} #别名名称
}
4).执行命令(使用kibana执行,没有就安装kibana)
补充: 获取远程同义词库核心代码(详细代码已上传到码云)
@RestController
@RequestMapping("/synonym")
public class SynonymController {
private static final Logger log = LoggerFactory.getLogger(SynonymController.class);
@Autowired
private SynonymDao synonymDao;
/**
* SimpleDateFormat线程不安全
*/
private static final ThreadLocal<DateFormat> df = ThreadLocal.withInitial(() -> new SimpleDateFormat(DateUtil.DATE_TIME));
@GetMapping
@CrossOrigin
public String text(HttpServletRequest request, HttpServletResponse response) {
String result = "";
String eTag = request.getHeader("If-None-Match");
String modified = request.getHeader("If-Modified-Since");
String currentDate = df.get().format(new Date());
List<SynonymEntity> list = synonymDao.selectList(new QueryWrapper<>());
if (CollectionUtils.isEmpty(list)) {
return null;
}
List<String> valueList = list.stream().map(SynonymEntity::getValue).collect(Collectors.toList());
log.info("加载ik同义词,上次同义词:{},上次修改时间:{},当前日期:{}",eTag,modified,currentDate);
if(!valueList.isEmpty()) {
// 获取数据库同义词
StringBuilder words = new StringBuilder();
for (String synonym : valueList) {
// 可以使用“=>”方式或者直接“,”分割形式,自行考虑应用场景
words.append(synonym);
words.append("\n");
}
modified = currentDate;
result = words.toString();
}
//更新时间
response.setHeader("Last-Modified", modified);
response.setHeader("ETag", synonymDao.selectCount(new QueryWrapper<>()).toString());
response.setHeader("Content-Type", "text/plain");
return result;
}
}
版权归原作者 k↑ 所有, 如有侵权,请联系我们删除。