Ant*_*han 6 .net c# full-text-search elasticsearch nest
我有一个搜索页面,其中包含两种搜索结果类型:汇总结果和具体结果.
要获取"摘要"页面,我使用请求:
var searchDescriptor = new SearchDescriptor<ElasticType>();
searchDescriptor.Index("index_name")
.Query(q =>
q.MultiMatch(m => m
.Fields(fs => fs
.Field(f => f.Content1, 3)
.Field(f => f.Content2, 2)
.Field(f => f.Content3, 1))
.Fuzziness(Fuzziness.EditDistance(1))
.Query(query)
.Boost(1.1)
.Slop(2)
.PrefixLength(1)
.MaxExpansions(100)
.Operator(Operator.Or)
.MinimumShouldMatch(2)
.FuzzyRewrite(RewriteMultiTerm.ConstantScoreBoolean)
.TieBreaker(1.0)
.CutoffFrequency(0.5)
.Lenient()
.ZeroTermsQuery(ZeroTermsQuery.All))
&& (q.Terms(t => t.Field(f => f.LanguageId).Terms(1)) || q.Terms(t => t.Field(f => f.LanguageId).Terms(0))))
.Aggregations(a => a
.Terms("category", tagd => tagd
.Field(f => f.Category)
.Size(10)
.Aggregations(aggs => aggs.TopHits("top_tag_hits", t => t.Size(3)))))
.FielddataFields(fs => fs
.Field(p => p.Content1, 3)
.Field(p => p.Content2, 2)
.Field(p => p.Content3, 1));
var elasticResult = _elasticClient.Search<ElasticType>(_ => searchDescriptor);
Run Code Online (Sandbox Code Playgroud)
例如,我得到了结果
{
"aggregations": {
"category": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "category1",
"doc_count": 40,
"top_tag_hits": {
"hits": {
"total": 40,
"max_score": 5.4,
"hits": [{
"_index": "...",
"_type": "...",
"_id": "...",
"_score": 5.4,
"_source": {
"id": 1
}
},
{
"_index": "...",
"_type": "...",
"_id": "...",
"_score": 4.3,
"_source": {
"id": 3 // FAIL!
}
},
{
"_index": "...",
"_type": "...",
"_id": "...",
"_score": 4.3,
"_source": {
"id": 2
}
}]
}
}
}]
}
}
}
Run Code Online (Sandbox Code Playgroud)
所以我得到了相同的点击率_score.
要获得具体结果(按类别),我使用请求:
var searchDescriptor = new SearchDescriptor<ElasticType>();
searchDescriptor.Index("index_name")
.Size(perPage <= 0 ? 100 : perPage)
.From(page * perPage)
.Query(q => q
.MultiMatch(m => m
.Fields(fs => fs
.Field(f => f.Content1, 3)
.Field(f => f.Content2, 2)
.Field(f => f.Content3, 1)
.Field(f => f.Category))
.Fuzziness(Fuzziness.EditDistance(1))
.Query(searchRequest.Query)
.Boost(1.1)
.Slop(2)
.PrefixLength(1)
.MaxExpansions(100)
.Operator(Operator.Or)
.MinimumShouldMatch(2)
.FuzzyRewrite(RewriteMultiTerm.ConstantScoreBoolean)
.TieBreaker(1.0)
.CutoffFrequency(0.5)
.Lenient()
.ZeroTermsQuery(ZeroTermsQuery.All))
&& q.Term(t => t.Field(f => f.Category).Value(searchRequest.Category))
&& (q.Terms(t => t.Field(f => f.LanguageId).Terms(1)) || q.Terms(t => t.Field(f => f.LanguageId).Terms(0))))
.FielddataFields(fs => fs
.Field(p => p.Content1, 3)
.Field(p => p.Content2, 2)
.Field(p => p.Content3, 1))
.Aggregations(a => a
.Terms("category", tagd => tagd
.Field(f => f.Category)));
Run Code Online (Sandbox Code Playgroud)
结果是这样的:
{
"hits": {
"total": 40,
"max_score": 7.816723,
"hits": [{
"_index": "...",
"_type": "...",
"_id": "...",
"_score": 7.816723,
"_source": {
"id": 1
}
},
{
"_index": "...",
"_type": "...",
"_id": "...",
"_score": 6.514713,
"_source": {
"id": 2
}
},
{
"_index": "...",
"_type": "...",
"_id": "...",
"_score": 6.514709,
"_source": {
"id": 3
}
}]
}
}
Run Code Online (Sandbox Code Playgroud)
所以在第二种情况下,对于特定的类别,我得到了具有高精度和弹性的_score,可以很容易地正确地对结果进行排序.但是在聚合的情况下,结果是相同的_score,并且在这种情况下,排序不清楚它是如何工作的.
有人可以指引我走正确的道路如何解决这个问题?或者我如何在结果中实现相同的顺序?也许我可以提高汇总结果的准确性?
我使用elasticsearch服务器版本"5.3.0"和NEST库版本"5.0.0".
更新: 聚合请求的本机查询:
{
"fielddata_fields": [
"content1^3",
"content2^2",
"content3^1"
],
"aggs": {
"category": {
"terms": {
"field": "category",
"size": 10
},
"aggs": {
"top_tag_hits": {
"top_hits": {
"size": 3
}
}
}
}
},
"query": {
"bool": {
"must": [
{
"multi_match": {
"boost": 1.1,
"query": "sparta",
"fuzzy_rewrite": "constant_score_boolean",
"fuzziness": 1,
"cutoff_frequency": 0.5,
"prefix_length": 1,
"max_expansions": 100,
"slop": 2,
"lenient": true,
"tie_breaker": 1.0,
"minimum_should_match": 2,
"operator": "or",
"fields": [
"content1^3",
"content2^2",
"content3^1"
],
"zero_terms_query": "all"
}
},
{
"bool": {
"should": [
{
"terms": {
"languageId": [
1
]
}
},
{
"terms": {
"languageId": [
0
]
}
}
]
}
}
]
}
}
}
Run Code Online (Sandbox Code Playgroud)
具体请求的本机查询:
{
"from": 0,
"size": 100,
"fielddata_fields": [
"content1^3",
"content2^2",
"content3^1"
],
"aggs": {
"category": {
"terms": {
"field": "category"
}
}
},
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"multi_match": {
"boost": 1.1,
"query": ".....",
"fuzzy_rewrite": "constant_score_boolean",
"fuzziness": 1,
"cutoff_frequency": 0.5,
"prefix_length": 1,
"max_expansions": 100,
"slop": 2,
"lenient": true,
"tie_breaker": 1.0,
"minimum_should_match": 2,
"operator": "or",
"fields": [
"content1^3",
"content2^2",
"content3^1",
"category"
],
"zero_terms_query": "all"
}
},
{
"term": {
"category": {
"value": "category1"
}
}
}
]
}
},
{
"bool": {
"should": [
{
"terms": {
"languageId": [
1
]
}
},
{
"terms": {
"languageId": [
0
]
}
}
]
}
}
]
}
}
}
Run Code Online (Sandbox Code Playgroud)
我还使用下一个映射来创建索引:
var descriptor = new CreateIndexDescriptor(indexName)
.Mappings(ms => ms
.Map<ElasticType>(m => m
.Properties(ps => ps
.Keyword(s => s.Name(ecp => ecp.Title))
.Text(s => s.Name(ecp => ecp.Content1))
.Text(s => s.Name(ecp => ecp.Content2))
.Text(s => s.Name(ecp => ecp.Content3))
.Date(s => s.Name(ecp => ecp.Date))
.Number(s => s.Name(ecp => ecp.LanguageId).Type(NumberType.Integer))
.Keyword(s => s.Name(ecp => ecp.Category))
.Text(s => s.Name(ecp => ecp.PreviewImageUrl).Index(false))
.Text(s => s.Name(ecp => ecp.OptionalContent).Index(false))
.Text(s => s.Name(ecp => ecp.Url).Index(false)))));
_elasticClient.CreateIndex(indexName, _ => descriptor);
Run Code Online (Sandbox Code Playgroud)
您的查询有问题。
您使用的是must和的组合作为查询的should一部分。mustbool
因此,如果您在此链接中阅读更多内容,您可以看到must
子句(查询)必须出现在匹配的文档中,并将有助于得分。
因此,您所有符合条件的文件都会获得同等的 5 分。与条件不匹配的任何其他条件甚至不会出现在评分结果中。
你应该做什么使用should查询但在查询之外must,这样 Elasticsearch 将能够正确地对你的文档进行评分
欲了解更多信息作为此问题的一部分
有人可以指导我如何解决这个问题的正确路径吗?
您应该传递'explain': true查询。您可以在此链接中阅读有关解释查询以及如何解释结果的更多信息。
你对这个问题的回答是
我怎样才能在结果中达到相同的顺序?
由于每个分数都是相同的,因此 Elasticsearch 可以通过从其节点获取响应的任何方式对结果进行排序。
可能的解决方案:
您应该重新组织查询以真正利用should查询及其增强功能。您可以在此处阅读有关提升的更多信息。
我尝试了两个与您类似的查询,但使用正确,should并且它们给了我与预期相同的顺序。您的两个查询应按如下方式构建:
{
"from": 0,
"size": 10,
"_source": [
"content1^3",
"content2^2",
"content3^1"
],
"query": {
"bool": {
"should": [
{
"match": {
"languageId": 1
}
},
{
"match": {
"languageId": 0
}
}
],
"must": [
{
"multi_match": {
"boost": 1.1,
"query": ".....",
"fuzzy_rewrite": "constant_score_boolean",
"fuzziness": 1,
"cutoff_frequency": 0.5,
"prefix_length": 1,
"max_expansions": 100,
"slop": 2,
"lenient": true,
"tie_breaker": 1,
"minimum_should_match": 2,
"operator": "or",
"fields": [
"content1^3",
"content2^2",
"content3^1",
"category"
],
"zero_terms_query": "all"
}
}
]
}
}
}
Run Code Online (Sandbox Code Playgroud)
第二个查询为
{
"size": 0,
"query": {
"bool": {
"should": [
{
"match": {
"languageId": 1
}
},
{
"match": {
"languageId": 0
}
}
],
"must": [
{
"multi_match": {
"boost": 1.1,
"query": ".....",
"fuzzy_rewrite": "constant_score_boolean",
"fuzziness": 1,
"cutoff_frequency": 0.5,
"prefix_length": 1,
"max_expansions": 100,
"slop": 2,
"lenient": true,
"tie_breaker": 1,
"minimum_should_match": 2,
"operator": "or",
"fields": [
"content1^3",
"content2^2",
"content3^1",
"category"
],
"zero_terms_query": "all"
}
}
]
}
},
"aggs": {
"categories": {
"terms": {
"field": "category",
"size": 10
},
"aggs": {
"produdtcs": {
"top_hits": {
"_source": [
"content1^3",
"content2^2",
"content3^1"
],
"size": 3
}
}
}
}
}
}
Run Code Online (Sandbox Code Playgroud)