Kun*_*the 7 camelcasing analyzer elasticsearch
我有一个标签在骆驼案例中的标签,如#teamIndia.现在当点击这个标签时,它应该获取其中包含"#teamIndia"的所有结果,它应首先显示带有"#teamIndia"的结果,然后使用"teamIndia"然后"team India"然后"team"或"印度"等.
我在做什么:
搜索文字: "#teamIndia","#NEWYORK","#jobis","#2016"
POST /clip
{
"settings": {
"analysis": {
"char_filter" : {
"space_hashtags" : {
"type" : "mapping",
"mappings" : ["#=>|#"]
}
},
"filter": {
"substring": {
"max_gram": "20",
"type": "nGram",
"min_gram": "1",
"token_chars": [
"whitespace"
]
},
"camelcase": {
"type": "word_delimiter",
"type_table": ["# => ALPHANUM", "@ => ALPHANUM"]
},
"stopword": {
"type": "stop",
"stopwords": ["and", "is", "the"]
}
},
"analyzer": {
"substring_analyzer": {
"filter": [
"lowercase",
"substring"
],
"tokenizer": "standard"
},
"camelcase_analyzer": {
"type" : "custom",
"char_filter" : "space_hashtags",
"tokenizer" : "whitespace",
"filter": [
"camelcase",
"lowercase",
"stopword"
]
}
}
}
},
"mappings": {
"Clip": {
"properties": {
"description": {
"type": "multi_field",
"fields": {
"description": {
"type": "string",
"analyzer": "substring_analyzer",
"search_analyzer": "standard"
},
"raw": {
"type": "string",
"index": "not_analyzed"
},
"hashtag": {
"type": "string",
"index": "analyzed",
"analyzer": "camelcase_analyzer"
}
}
},
....
}
}
}
}
Run Code Online (Sandbox Code Playgroud)
文档示例: -
POST /clip/Clip/2 {"id" : 1, "description" : "TheBestAndTheBeast"}
POST /clip/Clip/3 {"id" : 2, "description" : "bikes in DUBAI TheBestAndTheBeast profession"}
POST /clip/Clip/3 {"id" : 2, "description" : "Know how a software engineer surprised his wife! <a href="search/clips?q=%23theProvider&source=hashtag" ng-click="handleModalClick()"> #theProvider </a> rioOlympic <a href="search/clips?q=%23DUBAI&source=hashtag" ng-click="handleModalClick()"> #DUBAI </a> <a href="search/clips?q=%23TheBestAndTheBeast&source=hashtag" ng-click="handleModalClick()"> #TheBestAndTheBeast </a> <a href="search/clips?q=%23rioOlympic&source=hashtag" ng-click="handleModalClick()"> #rioOlympic </a>"}
Run Code Online (Sandbox Code Playgroud)
**搜索查询**
GET clip/_search
{
"size": 100,
"query": {
"filtered": {
"query": {
"bool": {
"must":
{
"query_string": {
"fields": [
"description.hashtag"
],
"query": "teamIndia"
}
},
"should": {
"match":
{ "description.raw": "#teamIndia"}
}
}
}
}
}
Run Code Online (Sandbox Code Playgroud)
}
例外结果: "#teamIndia","teamIndia","team India","team","India",
和其他测试关键字类似.
原始帖子中的查询无法按预期工作的原因之一是因为description.raw
is not_analyzed
。\n因此#teamIndia
永远不会与文档匹配description: "Animals and Pets and #teamIndia"
,因为该文档description.raw
将包含\n未分析的术语Animals and Pets and #teamIndia
而不是#teamIndia
假设您拥有的文档就像OP中的第二个示例。
\n\n例子:
\n\n{"id" : 2, "description" : "Animals and Pets and #teamIndia"}\n
Run Code Online (Sandbox Code Playgroud)\n\n或者
\n\n{"id":7,"description":"This <a href="search/clips?q=%23teamIndia&source=hashtag">#teamIndia</a>"}\n
Run Code Online (Sandbox Code Playgroud)\n\n那么您应该能够按以下顺序对文档进行排名:
\n\n1) 包含“#teamIndia”的描述,
\n2) 包含“teamIndia”的描述
\n3) 包含“team India”的描述
\n4) 包含“India”的描述
通过在字限制过滤器中启用preserve_orginal
和 ,如下例所示catenate_words
例子:
\n\n索引文件
\n\n PUT clip\n{\n "settings": {\n "analysis": {\n "char_filter": {\n "zwsp_normalize": {\n "type": "mapping",\n "mappings": [\n "\\\\u200B=>",\n "\\\\u200C=>",\n "\\\\u200D=>"\n ]\n },\n "html_decoder": {\n "type": "mapping",\n "mappings": [\n "<=> <",\n ">=> >"\n ]\n }\n },\n "filter": {\n "camelcase": {\n "type": "word_delimiter",\n "preserve_original": "true",\n "catenate_all": "true"\n },\n "stopword": {\n "type": "stop",\n "stopwords": [\n "and",\n "is",\n "the"\n ]\n }\n },\n "analyzer": {\n "camelcase_analyzer": {\n "type": "custom",\n "tokenizer": "whitespace",\n "filter": [\n "camelcase",\n "lowercase",\n "stopword"\n ],\n "char_filter": [\n "zwsp_normalize",\n "html_decoder",\n "html_strip"\n ]\n }\n }\n }\n },\n "mappings": {\n "Clip": {\n "properties": {\n "description": {\n "type": "multi_field",\n "fields": {\n "hashtag": {\n "type": "string",\n "index": "analyzed",\n "analyzer": "camelcase_analyzer",\n "norms": {\n "enabled": false\n }\n }\n }\n }\n }\n }\n }\n}\n\n\n\nPOST /clip/Clip/1\n{\n "id": 1,\n "description": "Animals and Pets and #teamIndia"\n}\n\nPOST /clip/Clip/2 \n{\n "id": 2,\n "description": "Animals and Pets and teamIndia"\n}\n\n\nPOST /clip/Clip/3\n{\n "id": 3,\n "description": "Animals and Pets and team India"\n}\n\n\nPOST /clip/Clip/4 \n{\n "id": 4,\n "description": "Animals and Pets and India"\n}\n\n\n\n POST /clip/Clip/7\n {\n "id": 7,\n "description": "This <a href="search/clips?q=%23teamIndia&source=hashtag">#teamIndia</a>"\n }\n
Run Code Online (Sandbox Code Playgroud)\n\n查询结果:
\n\nPOST clip/_search?search_type=dfs_query_then_fetch\n{\n "size": 100,\n "query": {\n "filtered": {\n "query": {\n "bool": {\n "must": [\n {\n "query_string": {\n "fields": [\n "description.hashtag"\n ],\n "query": "#teamIndia"\n }\n }\n ]\n }\n }\n }\n }\n}\n
Run Code Online (Sandbox Code Playgroud)\n\n结果:
\n\n "hits": {\n "total": 5,\n "max_score": 1.4969246,\n "hits": [\n {\n "_index": "clip",\n "_type": "Clip",\n "_id": "7",\n "_score": 1.4969246,\n "_source": {\n "id": 7,\n "description": "This <a href="search/clips?q=%23teamIndia&source=hashtag">#teamIndia</a>"\n }\n },\n {\n "_index": "clip",\n "_type": "Clip",\n "_id": "1",\n "_score": 1.4969246,\n "_source": {\n "id": 1,\n "description": "Animals and Pets and #teamIndia"\n }\n },\n {\n "_index": "clip",\n "_type": "Clip",\n "_id": "2",\n "_score": 1.0952718,\n "_source": {\n "id": 2,\n "description": "Animals and Pets and teamIndia"\n }\n },\n {\n "_index": "clip",\n "_type": "Clip",\n "_id": "3",\n "_score": 0.5207714,\n "_source": {\n "id": 3,\n "description": "Animals and Pets and team India"\n }\n },\n {\n "_index": "clip",\n "_type": "Clip",\n "_id": "4",\n "_score": 0.11123338,\n "_source": {\n "id": 4,\n "description": "Animals and Pets and India"\n }\n }\n ]\n }\n
Run Code Online (Sandbox Code Playgroud)\n\n示例#迪拜:
\n\nPOST /clip/Clip/5\n{\n "id": 5,\n "description": "#dubai is hot"\n}\n\nPOST /clip/Clip/6\n{\n "id": 6,\n "description": "dubai airport is huge"\n}\n\nPOST clip/_search?search_type=dfs_query_then_fetch\n{\n "size": 100,\n "query": {\n "filtered": {\n "query": {\n "bool": {\n "must": [\n {\n "query_string": {\n "fields": [\n "description.hashtag"\n ],\n "query": "#dubai"\n }\n }\n ]\n }\n }\n }\n }\n}\n\n "hits": {\n "total": 2,\n "max_score": 1.820827,\n "hits": [\n {\n "_index": "clip",\n "_type": "Clip",\n "_id": "5",\n "_score": 1.820827,\n "_source": {\n "id": 5,\n "description": "#dubai is hot"\n }\n },\n {\n "_index": "clip",\n "_type": "Clip",\n "_id": "6",\n "_score": 0.5856731,\n "_source": {\n "id": 6,\n "description": "dubai airport is huge"\n }\n }\n ]\n }\n
Run Code Online (Sandbox Code Playgroud)\n\n示例#professionalAndPunctual:
\n\nPOST /clip/Clip/7\n{\n "id": 7,\n "description": "professionalAndPunctual"\n}\nPOST clip/_search?search_type=dfs_query_then_fetch\n{\n "size": 100,\n "query": {\n "filtered": {\n "query": {\n "bool": {\n "must": [\n {\n "query_string": {\n "fields": [\n "description.hashtag"\n ],\n "query": "#professionalAndPunctual"\n }\n }\n ]\n }\n }\n }\n }\n} \n\n "hits": [\n {\n "_index": "clip",\n "_type": "Clip",\n "_id": "7",\n "_score": 2.2149992,\n "_source": {\n "id": 7,\n "description": "professionalAndPunctual"\n }\n }\n ]\n
Run Code Online (Sandbox Code Playgroud)\n\n示例:#TheBestAndTheBea\xe2\x80\x8c\xe2\x80\x8bst
\n\n POST /clip/Clip/10\n{"id" : 10, "description" : "TheBestAndTheBeast"}\n\nPOST /clip/Clip/11\n{"id" :11, "description" : "bikes in DUBAI TheBestAndTheBeast profession"}\n\nPOST /clip/Clip/12\n{"id" : 12, "description" : "Know how a software engineer surprised his wife! <a href=\\"search/clips?q=%23theProvider&source=hashtag\\" ng-click=\\"handleModalClick()\\"> #theProvider </a> rioOlympic <a href=\\"search/clips?q=%23DUBAI&source=hashtag\\" ng-click=\\"handleModalClick()\\"> #DUBAI </a> <a href=\\"search/clips?q=%23TheBestAndTheBeast&source=hashtag\\" ng-click=\\"handleModalClick()\\"> #TheBestAndTheBeast </a> <a href=\\"search/clips?q=%23rioOlympic&source=hashtag\\" ng-click=\\"handleModalClick()\\"> #rioOlympic </a>"}\n\nPOST clip/_search?search_type=dfs_query_then_fetch\n{\n "size": 100,\n "query": {\n "filtered": {\n "query": {\n "bool": {\n "must": [\n {\n "query_string": {\n "fields": [\n "description.hashtag"\n ],\n "query": "#TheBestAndTheBeast"\n }\n }\n ]\n }\n }\n }\n }\n}\n
Run Code Online (Sandbox Code Playgroud)\n\n#结果
\n\n "hits": [\n {\n "_index": "clip",\n "_type": "Clip",\n "_id": "12",\n "_score": 1.8701664,\n "_source": {\n "id": 12,\n "description": "Know how a software engineer surprised his wife! <a href=\\"search/clips?q=%23theProvider&source=hashtag\\" ng-click=\\"handleModalClick()\\"> #theProvider </a> rioOlympic <a href=\\"search/clips?q=%23DUBAI&source=hashtag\\" ng-click=\\"handleModalClick()\\"> #DUBAI </a> <a href=\\"search/clips?q=%23TheBestAndTheBeast&source=hashtag\\" ng-click=\\"handleModalClick()\\"> #TheBestAndTheBeast </a> <a href=\\"search/clips?q=%23rioOlympic&source=hashtag\\" ng-click=\\"handleModalClick()\\"> #rioOlympic </a>"\n }\n },\n {\n "_index": "clip",\n "_type": "Clip",\n "_id": "10",\n "_score": 0.9263139,\n "_source": {\n "id": 10,\n "description": "TheBestAndTheBeast"\n }\n },\n {\n "_index": "clip",\n "_type": "Clip",\n "_id": "11",\n "_score": 0.9263139,\n "_source": {\n "id": 11,\n "description": "bikes in DUBAI TheBestAndTheBeast profession"\n }\n }\n ]\n
Run Code Online (Sandbox Code Playgroud)\n\n分析仪示例:
\n\nget clip/_analyze?analyzer=camelcase_analyzer&text=%23DUBAI\n\n {\n "tokens": [\n {\n "token": "#dubai",\n "start_offset": 0,\n "end_offset": 6,\n "type": "word",\n "position": 0\n },\n {\n "token": "dubai",\n "start_offset": 1,\n "end_offset": 6,\n "type": "word",\n "position": 0\n }\n ]\n}\n\nget clip/_analyze?analyzer=camelcase_analyzer&text=This%20%26lt%3Ba%20href%3D%26quot%3Bsearch%2Fclips%3Fq%3D%2523teamIndia%26amp%3Bsource%3Dhashtag%26quot%3B%26gt%3B%23teamIndia%26lt%3B%2Fa%26gt%3B\n\n {\n "tokens": [\n {\n "token": "this",\n "start_offset": 0,\n "end_offset": 4,\n "type": "word",\n "position": 0\n },\n {\n "token": "#teamindia",\n "start_offset": 78,\n "end_offset": 98,\n "type": "word",\n "position": 1\n },\n {\n "token": "india",\n "start_offset": 78,\n "end_offset": 98,\n "type": "word",\n "position": 2\n },\n {\n "token": "team",\n "start_offset": 78,\n "end_offset": 98,\n "type": "word",\n "position": 2\n },\n {\n "token": "teamindia",\n "start_offset": 78,\n "end_offset": 98,\n "type": "word",\n "position": 2\n }\n ]\n}\n
Run Code Online (Sandbox Code Playgroud)\n
归档时间: |
|
查看次数: |
1341 次 |
最近记录: |