【问题标题】:Elasticsearch range bucket aggregation based on doc_count基于 doc_count 的 Elasticsearch 范围桶聚合
【发布时间】:2016-12-02 09:49:25
【问题描述】:

我有一个这样的弹性搜索聚合查询。

{
    "aggs": {
        "customer": {
            "aggs": {
                "Total_Sale": {
                    "sum": {
                        "field": "amount"
                    }
                }
            },
            "terms": {
                "field": "org",
                "size": 50000
            }
        }
    }
}

它会导致如下所示的桶聚合

{
    "aggregations": {
        "customer": {
            "buckets": [
                {
                    "Total_Sale": { "value": 9999 },
                    "doc_count": 8,
                    "key": "cats"
                },
                {
                    "Total_Sale": { "value": 8888 },
                    "doc_count": 6,
                    "key": "tigers"
                },
                {
                    "Total_Sale": { "value": 444},
                    "doc_count": 5,
                    "key": "lions"
                },
                {
                    "Total_Sale": { "value": 555 },
                    "doc_count": 2,
                    "key": "wolves"
                }
           ]
       }
    }
}

我想要另一个基于 doc_count 的范围桶聚合。所以,需要的最终结果是

{
    "buckets": [    
        {               
            "Sum_of_Total_Sale": 555, // If I can form bucket, I can get this using sum_bucket. So, getting bucket is important.
            "Sum_of_doc_count": 2, 
            "doc_count": 1, 
            "key": "*-3",   
            "to": 3.0       
        },              
        {               
            "Sum_of_Total_Sale": 9332,
            "Sum_of_doc_count": 11,
            "doc_count": 2, 
            "from": 4.0,    
            "key": "4-6",   
            "to": 6.0       
        },                  
        {               
            "Sum_of_Total_Sale": 9999,
            "Sum_of_doc_count": 8,
            "doc_count": 1, 
            "from": 7.0,    
            "key": "7-*"    
        }                   
    ]                   
}  
  • 桶选择器聚合,然后使用桶和聚合将不起作用,因为范围有多个键。
  • 桶脚本聚合在桶内进行计算。
  • 我可以为每个文档添加脚本文档字段以帮助我创建这些存储桶吗?

【问题讨论】:

    标签: elasticsearch elasticsearch-dsl


    【解决方案1】:

    据我所知,没有任何聚合可以让您一次性完成此操作。但是,我不时使用一种技术来克服这个限制。这个想法是重复相同的 terms/sum 聚合,然后对您感兴趣的每个范围使用 bucket_selector 管道聚合。

    POST index/_search
    {
      "size": 0,
      "aggs": {
        "*-3": {
          "terms": {
            "field": "org",
            "size": 1000
          },
          "aggs": {
            "Total_Sale": {
              "sum": {
                "field": "amount"
              }
            },
            "*-3": {
              "bucket_selector": {
                "buckets_path": {
                  "docCount": "_count"
                },
                "script": "params.docCount <= 3"
              }
            }
          }
        },
        "*-3_Total_Sales": {
          "sum_bucket": {
            "buckets_path": "*-3>Total_Sale"
          }
        },
        "*-3_Total_Docs": {
          "sum_bucket": {
            "buckets_path": "*-3>_count"
          }
        },
        "4-6": {
          "terms": {
            "field": "org",
            "size": 1000
          },
          "aggs": {
            "Total_Sale": {
              "sum": {
                "field": "amount"
              }
            },
            "4-6": {
              "bucket_selector": {
                "buckets_path": {
                  "docCount": "_count"
                },
                "script": "params.docCount >= 4 && params.docCount <= 6"
              }
            }
          }
        },
        "4-6_Total_Sales": {
          "sum_bucket": {
            "buckets_path": "4-6>Total_Sale"
          }
        },
        "4-6_Total_Docs": {
          "sum_bucket": {
            "buckets_path": "4-6>_count"
          }
        },
        "7-*": {
          "terms": {
            "field": "org",
            "size": 1000
          },
          "aggs": {
            "Total_Sale": {
              "sum": {
                "field": "amount"
              }
            },
            "7-*": {
              "bucket_selector": {
                "buckets_path": {
                  "docCount": "_count"
                },
                "script": "params.docCount >= 7"
              }
            }
          }
        },
        "7-*_Total_Sales": {
          "sum_bucket": {
            "buckets_path": "7-*>Total_Sale"
          }
        },
        "7_*_Total_Docs": {
          "sum_bucket": {
            "buckets_path": "7-*>_count"
          }
        }
      }
    }
    

    您将得到如下所示的答案,其中包含您在xyz_Total_Salesxyz_Total_Docs 结果中寻找的数字:

      "aggregations": {
        "*-3": {
          "doc_count_error_upper_bound": 0,
          "sum_other_doc_count": 0,
          "buckets": [
            {
              "key": "wolves",
              "doc_count": 2,
              "Total_Sale": {
                "value": 555
              }
            }
          ]
        },
        "7-*": {
          "doc_count_error_upper_bound": 0,
          "sum_other_doc_count": 0,
          "buckets": [
            {
              "key": "cats",
              "doc_count": 8,
              "Total_Sale": {
                "value": 9999
              }
            }
          ]
        },
        "4-6": {
          "doc_count_error_upper_bound": 0,
          "sum_other_doc_count": 0,
          "buckets": [
            {
              "key": "tigers",
              "doc_count": 6,
              "Total_Sale": {
                "value": 8888
              }
            },
            {
              "key": "lions",
              "doc_count": 5,
              "Total_Sale": {
                "value": 444
              }
            }
          ]
        },
        "*-3_Total_Sales": {
          "value": 555
        },
        "*-3_Total_Docs": {
          "value": 2
        },
        "4-6_Total_Sales": {
          "value": 9332
        },
        "4-6_Total_Docs": {
          "value": 11
        },
        "7-*_Total_Sales": {
          "value": 9999
        },
        "7_*_Total_Docs": {
          "value": 8
        }
      }
    

    【讨论】:

    • 如何在 Kibana 中可视化?我想 x 轴是 [-3_Total_Sales, 4-6_Total_Sales, 7-_Total_Sales] 和 y 轴是他们的价值。
    猜你喜欢
    • 1970-01-01
    • 1970-01-01
    • 2021-09-19
    • 2014-03-28
    • 2018-11-28
    • 1970-01-01
    • 1970-01-01
    • 2019-10-09
    • 1970-01-01
    相关资源
    最近更新 更多