test data
index structure
PUT /employees/ { "mappings" : { "properties" : { "age" : { "type" : "integer" }, "gender" : { "type" : "keyword" }, "job" : { "type" : "text", "fields" : { "keyword" : { "type" : "keyword", "ignore_above" : 50 } } }, "name" : { "type" : "keyword" }, "salary" : { "type" : "integer" } } } }
Insert 20 pieces of data
PUT /employees/_bulk { "index" : { "_id" : "1" } } { "name" : "Emma","age":32,"job":"Product Manager","gender":"female","salary":35000 } { "index" : { "_id" : "2" } } { "name" : "Underwood","age":41,"job":"Dev Manager","gender":"male","salary": 50000} { "index" : { "_id" : "3" } } { "name" : "Tran","age":25,"job":"Web Designer","gender":"male","salary":18000 } { "index" : { "_id" : "4" } } { "name" : "Rivera","age":26,"job":"Web Designer","gender":"female","salary": 22000} { "index" : { "_id" : "5" } } { "name" : "Rose","age":25,"job":"QA","gender":"female","salary":18000 } { "index" : { "_id" : "6" } } { "name" : "Lucy","age":31,"job":"QA","gender":"female","salary": 25000} { "index" : { "_id" : "7" } } { "name" : "Byrd","age":27,"job":"QA","gender":"male","salary":20000 } { "index" : { "_id" : "8" } } { "name" : "Foster","age":27,"job":"Java Programmer","gender":"male","salary": 20000} { "index" : { "_id" : "9" } } { "name" : "Gregory","age":32,"job":"Java Programmer","gender":"male","salary":22000 } { "index" : { "_id" : "10" } } { "name" : "Bryant","age":20,"job":"Java Programmer","gender":"male","salary": 9000} { "index" : { "_id" : "11" } } { "name" : "Jenny","age":36,"job":"Java Programmer","gender":"female","salary":38000 } { "index" : { "_id" : "12" } } { "name" : "Mcdonald","age":31,"job":"Java Programmer","gender":"male","salary": 32000} { "index" : { "_id" : "13" } } { "name" : "Jonthna","age":30,"job":"Java Programmer","gender":"female","salary":30000 } { "index" : { "_id" : "14" } } { "name" : "Marshall","age":32,"job":"Javascript Programmer","gender":"male","salary": 25000} { "index" : { "_id" : "15" } } { "name" : "King","age":33,"job":"Java Programmer","gender":"male","salary":28000 } { "index" : { "_id" : "16" } } { "name" : "Mccarthy","age":21,"job":"Javascript Programmer","gender":"male","salary": 16000} { "index" : { "_id" : "17" } } { "name" : "Goodwin","age":25,"job":"Javascript Programmer","gender":"male","salary": 16000} { "index" : { "_id" : "18" } } { "name" : "Catherine","age":29,"job":"Javascript Programmer","gender":"female","salary": 20000} { "index" : { "_id" : "19" } } { "name" : "Boone","age":30,"job":"DBA","gender":"male","salary": 30000} { "index" : { "_id" : "20" } } { "name" : "Kathy","age":29,"job":"DBA","gender":"female","salary": 20000}
query
The default scope of ES aggregation analysis is the query result set of query, that is to say, aggregation is performed in the result after query,
# Query employees older than or equal to 30 years old, and group qualified employees by position type POST /employees/_search { "size": 3, "query": { "range": { "age": { "gte": 30 } } }, "aggs": { "jobs": { "terms": { "field": "job.keyword" } } } }
Return result
{ "took" : 0, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 10, "relation" : "eq" }, "max_score" : 1.0, "hits" : [ { "_index" : "employees", "_type" : "_doc", "_id" : "1", "_score" : 1.0, "_source" : { "name" : "Emma", "age" : 32, "job" : "Product Manager", "gender" : "female", "salary" : 35000 } }, { "_index" : "employees", "_type" : "_doc", "_id" : "2", "_score" : 1.0, "_source" : { "name" : "Underwood", "age" : 41, "job" : "Dev Manager", "gender" : "male", "salary" : 50000 } }, { "_index" : "employees", "_type" : "_doc", "_id" : "6", "_score" : 1.0, "_source" : { "name" : "Lucy", "age" : 31, "job" : "QA", "gender" : "female", "salary" : 25000 } } ] }, "aggregations" : { "jobs" : { "doc_count_error_upper_bound" : 0, "sum_other_doc_count" : 0, "buckets" : [ { "key" : "Java Programmer", "doc_count" : 5 }, { "key" : "DBA", "doc_count" : 1 }, { "key" : "Dev Manager", "doc_count" : 1 }, { "key" : "Javascript Programmer", "doc_count" : 1 }, { "key" : "Product Manager", "doc_count" : 1 }, { "key" : "QA", "doc_count" : 1 } ] } } }
filter
If we want to filter only the aggregated data without affecting the results of query, or we want to filter only in one aggregation without affecting the results of other aggregations, then we can use filter
POST employees/_search { "size": 3, "query": { "range": { "age": { "gte": 30 } } }, "aggs": { "older_person": { "filter": { "range": { "age": { "from": 35 } } }, "aggs": { "jobs": { "terms": { "field": "job.keyword" } } } }, "all_jobs": { "terms": { "field": "job.keyword" } } } }
The results are as follows:
{ "took" : 1, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 10, "relation" : "eq" }, "max_score" : 1.0, "hits" : [ { "_index" : "employees", "_type" : "_doc", "_id" : "1", "_score" : 1.0, "_source" : { "name" : "Emma", "age" : 32, "job" : "Product Manager", "gender" : "female", "salary" : 35000 } }, { "_index" : "employees", "_type" : "_doc", "_id" : "2", "_score" : 1.0, "_source" : { "name" : "Underwood", "age" : 41, "job" : "Dev Manager", "gender" : "male", "salary" : 50000 } }, { "_index" : "employees", "_type" : "_doc", "_id" : "6", "_score" : 1.0, "_source" : { "name" : "Lucy", "age" : 31, "job" : "QA", "gender" : "female", "salary" : 25000 } } ] }, "aggregations" : { "older_person" : { "doc_count" : 2, "jobs" : { "doc_count_error_upper_bound" : 0, "sum_other_doc_count" : 0, "buckets" : [ { "key" : "Dev Manager", "doc_count" : 1 }, { "key" : "Java Programmer", "doc_count" : 1 } ] } }, "all_jobs" : { "doc_count_error_upper_bound" : 0, "sum_other_doc_count" : 0, "buckets" : [ { "key" : "Java Programmer", "doc_count" : 5 }, { "key" : "DBA", "doc_count" : 1 }, { "key" : "Dev Manager", "doc_count" : 1 }, { "key" : "Javascript Programmer", "doc_count" : 1 }, { "key" : "Product Manager", "doc_count" : 1 }, { "key" : "QA", "doc_count" : 1 } ] } } }
We can see that the result of query is the same as that of query above, and then the aggregation in the older person uses the conditions in the filter. The conditions in all jobs are the same as those in query above.
post_filter
What if you want the filter to apply only to queries, not aggregations? Use post filter
For example, I want to filter out employees younger than 23 years old and group them by position category, then find out employees younger than 35 years old and take the top five employees by id
POST employees/_search { "size": 5, "aggs": { "young_person": { "filter": { "range": { "age": { "lte": 23 } } }, "aggs": { "jobs": { "terms": { "field": "job.keyword" } } } } }, "post_filter": { "range": { "age": { "lte": 35 } } } }
The results are as follows:
{ "took" : 1, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 18, "relation" : "eq" }, "max_score" : 1.0, "hits" : [ { "_index" : "employees", "_type" : "_doc", "_id" : "1", "_score" : 1.0, "_source" : { "name" : "Emma", "age" : 32, "job" : "Product Manager", "gender" : "female", "salary" : 35000 } }, { "_index" : "employees", "_type" : "_doc", "_id" : "3", "_score" : 1.0, "_source" : { "name" : "Tran", "age" : 25, "job" : "Web Designer", "gender" : "male", "salary" : 18000 } }, { "_index" : "employees", "_type" : "_doc", "_id" : "4", "_score" : 1.0, "_source" : { "name" : "Rivera", "age" : 26, "job" : "Web Designer", "gender" : "female", "salary" : 22000 } }, { "_index" : "employees", "_type" : "_doc", "_id" : "5", "_score" : 1.0, "_source" : { "name" : "Rose", "age" : 25, "job" : "QA", "gender" : "female", "salary" : 18000 } }, { "_index" : "employees", "_type" : "_doc", "_id" : "6", "_score" : 1.0, "_source" : { "name" : "Lucy", "age" : 31, "job" : "QA", "gender" : "female", "salary" : 25000 } } ] }, "aggregations" : { "young_person" : { "doc_count" : 2, "jobs" : { "doc_count_error_upper_bound" : 0, "sum_other_doc_count" : 0, "buckets" : [ { "key" : "Java Programmer", "doc_count" : 1 }, { "key" : "Javascript Programmer", "doc_count" : 1 } ] } } } }
In this example, the filter conditions of post filter are not used in aggregation, so you can understand that post filter and query are a pair of opposite operations. Query filters and aggregates the aggregated data first, and post filter and aggregation are independent of each other.
global
The last one is global, which can ignore the impact of query. For example, we want to filter out employees older than 35 years old and group them by occupation type, and then we want to get the average salary of all employees.
POST /employees/_search { "size": 0, "query": { "range": { "age": { "gte": 35 } } }, "aggs": { "jobs": { "terms": { "field": "job.keyword" } }, "all": { "global": {}, "aggs": { "salary_avg": { "avg": { "field": "salary" } } } } } }
The results are as follows:
{ "took" : 1, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 2, "relation" : "eq" }, "max_score" : null, "hits" : [ ] }, "aggregations" : { "all" : { "doc_count" : 20, "salary_avg" : { "value" : 24700.0 } }, "jobs" : { "doc_count_error_upper_bound" : 0, "sum_other_doc_count" : 0, "buckets" : [ { "key" : "Dev Manager", "doc_count" : 1 }, { "key" : "Java Programmer", "doc_count" : 1 } ] } } }
As we can see above, 24700.0 is the average salary of all employees, and then jobs includes the categories of employees older than 35 years old, namely Dev Manager and Java programmer.
Of course, to achieve the above requirements, we can also have other writing methods, here is just to show the use of global.
For example, the following code
POST /employees/_search { "size": 0, "aggs": { "old_persons": { "filter": { "range": { "age": { "gte": 35 } } }, "aggs": { "jobs": { "terms": { "field": "job.keyword" } } } }, "avg_salary": { "avg": { "field": "salary" } } } }
The results are as follows:
{ "took" : 1, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 20, "relation" : "eq" }, "max_score" : null, "hits" : [ ] }, "aggregations" : { "old_persons" : { "doc_count" : 2, "jobs" : { "doc_count_error_upper_bound" : 0, "sum_other_doc_count" : 0, "buckets" : [ { "key" : "Dev Manager", "doc_count" : 1 }, { "key" : "Java Programmer", "doc_count" : 1 } ] } }, "avg_salary" : { "value" : 24700.0 } } }
What other people's blogs see, reprint it