Elasticsearch-edge_ngramとngramの違い
16796 ワード
前言
本稿はelasticsearch 7に基づく.バージョン3.0
説明
edge_ngramとngramはelasticsearchに内蔵された2つのtokenizerとfilterです
≪インスタンス|Instance|emdw≫
ステップ
edge_ngramとngramはelasticsearchに内蔵された2つのtokenizerとfilterです
≪インスタンス|Instance|emdw≫
ステップ
テストインデックスの作成
PUT analyzer_test
{
"settings": {
"refresh_interval": "1s",
"index": {
"max_ngram_diff": 10
},
"analysis": {
"analyzer": {
"edge_ngram_analyzer": {
"type": "custom",
"char_filter": [],
"tokenizer": "keyword",
"filter": [
"edge_ngram_filter"
]
},
"ngram_analyzer": {
"type": "custom",
"char_filter": [],
"tokenizer": "keyword",
"filter": [
"ngram_filter"
]
}
},
"filter": {
"edge_ngram_filter": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 11
},
"ngram_filter": {
"type": "ngram",
"min_gram": 2,
"max_gram": 5
}
}
}
}
}
テストedge_ngram_analyzerアナライザ
POST /analyzer_test/_analyze
{
"text": " ",
"analyzer": "edge_ngram_analyzer"
}
{
"tokens" : [
{
"token" : " ",
"start_offset" : 0,
"end_offset" : 4,
"type" : "word",
"position" : 0
},
{
"token" : " ",
"start_offset" : 0,
"end_offset" : 4,
"type" : "word",
"position" : 0
},
{
"token" : " ",
"start_offset" : 0,
"end_offset" : 4,
"type" : "word",
"position" : 0
},
{
"token" : " ",
"start_offset" : 0,
"end_offset" : 4,
"type" : "word",
"position" : 0
}
]
}
試験ngram_analyzerアナライザ
POST /analyzer_test/_analyze
{
"text": " ",
"analyzer": "ngram_analyzer"
}
{
"tokens" : [
{
"token" : " ",
"start_offset" : 0,
"end_offset" : 4,
"type" : "word",
"position" : 0
},
{
"token" : " ",
"start_offset" : 0,
"end_offset" : 4,
"type" : "word",
"position" : 0
},
{
"token" : " ",
"start_offset" : 0,
"end_offset" : 4,
"type" : "word",
"position" : 0
},
{
"token" : " ",
"start_offset" : 0,
"end_offset" : 4,
"type" : "word",
"position" : 0
},
{
"token" : " ",
"start_offset" : 0,
"end_offset" : 4,
"type" : "word",
"position" : 0
},
{
"token" : " ",
"start_offset" : 0,
"end_offset" : 4,
"type" : "word",
"position" : 0
}
]
}