ES聚合排序


theme: scrolls-light

聚合排序

根据之前的博客可知,ES对于聚合结果的默认排序规则有时并非是我们希望的。可以使用ES提供的sort子句进行自定义排序,有多种排序方式可供选择:

  • 按照聚合后的文档计数的大小进行排序
  • 按照聚合后的某个指标进行排序
  • 按照每个组的名称进行排序

1.1 按文档计数排序

在聚合排序时,业务需求可能有按照每个组聚合后的文档数量进行排序的场景。此时可以使用_count来引用每组聚合的文档技术进行排序。

以下DSL演示了按照城市的酒店平均价格进行聚合,并按照聚合后的文档计数进行升序排列的请求:

# 按文档计数排序
GET hotel_poly/_search
{
  "aggs": {
    "group_city": {
      "terms": {
        "field": "city",
        "order": {//按照文档计数进行升序排列
          "_count": "asc"
        }
      },
      "aggs": {
        "my_avg": {
          "avg": {//使用价格平均值作为聚合指标
            "field": "price",
            "missing": 200
          }
        }
      }
    }
  }
}

在Java中使用文档计数进行聚合排序的逻辑如下:

public void getAddDocCountOrderSearch() throws IOException{
    //创建搜索请求
    SearchRequest searchRequest = new SearchRequest("hotel_poly");
    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    String termsAggName="my_terms"; //聚合的名称
    //定义terms聚合,指定字段为城市
    TermsAggregationBuilder termsAggregationBuilder = AggregationBuilders.terms(termsAggName).field("city");
    BucketOrder bucketOrder = BucketOrder.count(true);
    termsAggregationBuilder.order(bucketOrder);
    String avgAggName="my_avg"; //avg聚合的名称
    //定义avg聚合,指定字段为价格
    AvgAggregationBuilder avgAgg=AggregationBuilders.avg(avgAggName).field("price");
    //定义聚合的父子关系
    termsAggregationBuilder.subAggregation(avgAgg);
    searchSourceBuilder.aggregation(termsAggregationBuilder);   //添加聚合
    searchRequest.source(searchSourceBuilder);                  //设置查询请求
    SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);//执行搜索
    SearchHits searchHits = searchResponse.getHits();
    //获取聚合结果
    Aggregations aggregations = searchResponse.getAggregations();
    Terms terms = aggregations.get(termsAggName);
    for (Terms.Bucket bucket : terms.getBuckets()) {
        String bucketKey = bucket.getKey().toString();
        log.info("termsKey={}",bucketKey);
        Avg avg=bucket.getAggregations().get(avgAggName);
        String key = avg.getName(); //获取聚合名称
        double sumVal=avg.getValue();   //获取聚合值
        log.info("key={},count={}",key,sumVal);
    }
}

1.2 按聚合指标排序

在聚合排序时,业务需求可能有按照每个组聚合后的指标值进行排序的场景。此时可以使用指标的聚合名称来引用每组聚合的文档计数。

以下DSL演示了按照城市的酒店平均价格进行聚合,并按照聚合后的平均价格进行升序排列:

# 按聚合指标排序
GET /hotel_poly/_search
{
  "aggs": {
    "group_city": {
      "terms": {
        "field": "city",
        "order": {//按照聚合指标进行升序排列
          "my_avg": "asc"
        }
      },
      "aggs": {
        "my_avg": {//定义聚合指标
          "avg": {
            "field": "price",
            "missing": 200
          }
        }
      }
    }
  }
}

在Java中按照聚合指标进行聚合排序的逻辑如下:

public void getAggMetricsOrderSearch() throws IOException{
    SearchRequest searchRequest = new SearchRequest("hotel_poly");
    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    String termsAggName="my_terms"; //聚合的名称
    //定义terms聚合,指定字段为城市
    String avgAggName="my_avg";     //avg聚合的名称
    TermsAggregationBuilder termsAggregationBuilder = AggregationBuilders.terms(termsAggName).field("city");
    BucketOrder bucketOrder = BucketOrder.aggregation(avgAggName, true);
    termsAggregationBuilder.order(bucketOrder);
//定义avg聚合,指定字段为价格
AvgAggregationBuilder avgAgg=AggregationBuilders.avg(avgAggName).field("price");
avgAgg.missing(200);
//定义聚合的父子关系
termsAggregationBuilder.subAggregation(avgAgg);
searchSourceBuilder.aggregation(termsAggregationBuilder);   //添加聚合
searchRequest.source(searchSourceBuilder);                  //设置查询请求
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);//执行搜索
//获取聚合结果
Aggregations aggregations = searchResponse.getAggregations();
Terms terms = aggregations.get(termsAggName);
for (Terms.Bucket bucket : terms.getBuckets()) {
    String bucketKey = bucket.getKey().toString();
    log.info("termsKey={}",bucketKey);
    Avg avg=bucket.getAggregations().get(avgAggName);
    String key = avg.getName();
    double avgVal = avg.getValue();
    log.info("key={},avgVal={}",key,avgVal);
}

}

image-20240423104637171

1.3 按分组key排序

在聚合排序时,业务需求可能有按照每个分组的组名称排序的场景,此时可以使用_key来引用分组名称。

以下DSL演示了按照城市的酒店平均价格进行聚合,并按照聚合后的分组名称进行升序排列的请求:

# 按分组key排序
GET /hotel_poly/_search
{
  "aggs": {
    "group_city": {
      "terms": {
        "field": "city",
        "order": {
          "_key": "asc"
        }
      },
      "aggs": {
        "my_avg": {
          "avg": {
            "field": "price",
            "missing": 200
          }
        }
      }
    }
  }
}

在Java中按照分组key进行聚合排序的逻辑如下:

public void getAggKeyOrderSearch() throws IOException{
    SearchRequest searchRequest = new SearchRequest("hotel_poly");
    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
    String termsAggName="my_terms"; //聚合的名称
    //定义terms聚合,指定字段为城市
    String avgAggName="my_avg";     //avg聚合的名称
    TermsAggregationBuilder termsAggregationBuilder = AggregationBuilders.terms(termsAggName).field("city");
    BucketOrder bucketOrder = BucketOrder.key(true);
    termsAggregationBuilder.order(bucketOrder);
//定义avg聚合,指定字段为价格
AvgAggregationBuilder avgAgg=AggregationBuilders.avg(avgAggName).field("price");
avgAgg.missing(200);
//定义聚合的父子关系
termsAggregationBuilder.subAggregation(avgAgg);
searchSourceBuilder.aggregation(termsAggregationBuilder);   //添加聚合
searchRequest.source(searchSourceBuilder);                  //设置查询请求
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);//执行搜索
//获取聚合结果
Aggregations aggregations = searchResponse.getAggregations();
Terms terms = aggregations.get(termsAggName);
for (Terms.Bucket bucket : terms.getBuckets()) {
    String bucketKey = bucket.getKey().toString();
    log.info("termsKey={}",bucketKey);
    Avg avg=bucket.getAggregations().get(avgAggName);
    String key = avg.getName();
    double avgVal = avg.getValue();
    log.info("key={},avgVal={}",key,avgVal);
}

}

数据源

索引结构

PUT /hotel_poly
{
  "settings": {
    "number_of_shards": 1
  },
  "mappings": {
    "properties": {
      "title":{
        "type": "text"
      },
      "city":{
        "type": "keyword"
      },
      "price":{
        "type": "double"
      },
      "create_time":{
        "type": "date"
      },
      "full_room":{
        "type": "boolean"
      },
      "location":{
        "type": "geo_point"
      },
      "tags":{
        "type": "keyword"
      },
      "comment_info":{
        "properties": {
          "favourable_comment":{
            "type":"integer"
          },
          "negative_comment":{
            "type":"integer"
          }
        }
      }
    }
  }
}

酒店数据

POST /_bulk
{"index":{"_index":"hotel_poly","_id":"001"}}
{"title":"文雅假日酒店","city":"北京","price":556.00,"create_time":"20200418120000","full_room":true,"location":{"lat":39.938838,"lon":106.449112},"tags":["wifi","小型电影院"],"comment_info":{"favourable_comment":20,"negative_comment":10}}
{"index":{"_index":"hotel_poly","_id":"002"}}
{"title":"金都嘉怡假日酒店","city":"北京","create_time":"20210315200000","full_room":false,"location":{"lat":39.915153,"lon":116.4030},"tags":["wifi","免费早餐"],"comment_info":{"favourable_comment":20,"negative_comment":10}}
{"index":{"_index":"hotel_poly","_id":"003"}}
{"title":"金都假日酒店","city":"北京","price":200.00,"create_time":"20210509160000","full_room":true,"location":{"lat":40.002096,"lon":116.386673},"comment_info":{"favourable_comment":20,"negative_comment":10}}
{"index":{"_index":"hotel_poly","_id":"004"}}
{"title":"金都假日酒店","city":"天津","price":500.00,"create_time":"20210218080000","full_room":false,"location":{"lat":39.155004,"lon":117.203976},"tags":["wifi","免费车位"]}
{"index":{"_index":"hotel_poly","_id":"005"}}
{"title":"文雅精选酒店","city":"天津","price":800.00,"create_time":"20210101080000","full_room":true,"location":{"lat":39.178447,"lon":117.219999},"tags":["wifi","充电车位"],"comment_info":{"favourable_comment":20,"negative_comment":10}}

这是一个从 https://juejin.cn/post/7369052013152485428 下的原始话题分离的讨论话题