Elasticsearch profile API来定位慢查询-天翼云开发者社区

有时在发起一个查询时，他的执行过程会延迟，或者响应时间很慢。查询缓慢可能会有多种原因范围包括 shard 问题，或者计算查询中的某些元素。从 elasticsearch 2.2版本开始提供 Profile API 供用户检查查询执行时间和其他详细信息

在官网中，profile API的首页，有一句警告：The Profile API is a debugging tool and adds significant overhead to search execution. 意思是，查看执行计划，会增加系统的资源开销。大家在生产环境的时候，还是要慎用这个功能。

执行一个基础查询请求，只要在请求参数中加入 "profile":true，即可查看ES的执行计划

GET /my-index-000001/_search
{
  "profile": true,
  "query" : {
    "match" : { "message" : "GET /search" }
  }
}

查看执行结果：

{
  "took": 25,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 5,
      "relation": "eq"
    },
    "max_score": 0.17402273,
    "hits": [...] 
  },
  "profile": {
    "shards": [
      {
        "id": "[2aE02wS1R8q_QFnYu6vDVQ][my-index-000001][0]",
        "searches": [
          {
            "query": [
              {
                "type": "BooleanQuery",
                "description": "message:get message:search",
                "time_in_nanos" : 11972972,
                "breakdown" : {
                  "set_min_competitive_score_count": 0,
                  "match_count": 5,
                  "shallow_advance_count": 0,
                  "set_min_competitive_score": 0,
                  "next_doc": 39022,
                  "match": 4456,
                  "next_doc_count": 5,
                  "score_count": 5,
                  "compute_max_score_count": 0,
                  "compute_max_score": 0,
                  "advance": 84525,
                  "advance_count": 1,
                  "score": 37779,
                  "build_scorer_count": 2,
                  "create_weight": 4694895,
                  "shallow_advance": 0,
                  "create_weight_count": 1,
                  "build_scorer": 7112295,
                  "count_weight": 0,
                  "count_weight_count": 0
                },
                "children": [
                  {
                    "type": "TermQuery",
                    "description": "message:get",
                    "time_in_nanos": 3801935,
                    "breakdown": {
                      "set_min_competitive_score_count": 0,
                      "match_count": 0,
                      "shallow_advance_count": 3,
                      "set_min_competitive_score": 0,
                      "next_doc": 0,
                      "match": 0,
                      "next_doc_count": 0,
                      "score_count": 5,
                      "compute_max_score_count": 3,
                      "compute_max_score": 32487,
                      "advance": 5749,
                      "advance_count": 6,
                      "score": 16219,
                      "build_scorer_count": 3,
                      "create_weight": 2382719,
                      "shallow_advance": 9754,
                      "create_weight_count": 1,
                      "build_scorer": 1355007,
                      "count_weight": 0,
                      "count_weight_count": 0
                    }
                  },
                  {
                    "type": "TermQuery",
                    "description": "message:search",
                    "time_in_nanos": 205654,
                    "breakdown": {
                      "set_min_competitive_score_count": 0,
                      "match_count": 0,
                      "shallow_advance_count": 3,
                      "set_min_competitive_score": 0,
                      "next_doc": 0,
                      "match": 0,
                      "next_doc_count": 0,
                      "score_count": 5,
                      "compute_max_score_count": 3,
                      "compute_max_score": 6678,
                      "advance": 12733,
                      "advance_count": 6,
                      "score": 6627,
                      "build_scorer_count": 3,
                      "create_weight": 130951,
                      "shallow_advance": 2512,
                      "create_weight_count": 1,
                      "build_scorer": 46153,
                      "count_weight": 0,
                      "count_weight_count": 0
                    }
                  }
                ]
              }
            ],
            "rewrite_time": 451233,
            "collector": [
              {
                "name": "SimpleTopScoreDocCollector",
                "reason": "search_top_hits",
                "time_in_nanos": 775274
              }
            ]
          }
        ],
        "aggregations": [],
        "fetch": {
          "type": "fetch",
          "description": "",
          "time_in_nanos": 660555,
          "breakdown": {
            "next_reader": 7292,
            "next_reader_count": 1,
            "load_stored_fields": 299325,
            "load_stored_fields_count": 5,
            "load_source": 3863,
            "load_source_count": 5
          },
          "debug": {
            "stored_fields": ["_id", "_routing", "_source"]
          },
          "children": [
            {
              "type": "FetchSourcePhase",
              "description": "",
              "time_in_nanos": 20443,
              "breakdown": {
                "next_reader": 745,
                "next_reader_count": 1,
                "process": 19698,
                "process_count": 5
              },
              "debug": {
                "fast_path": 5
              }
            },
            {
              "type": "StoredFieldsPhase",
              "description": "",
              "time_in_nanos": 5310,
              "breakdown": {
                "next_reader": 745,
                "next_reader_count": 1,
                "process": 4445,
                "process_count": 5
              }
            }
          ]
        }
      }
    ]
  }
}

由于一个搜索请求可能针对索引中的某个或多个分片执行，而一个搜索可能涵盖多个索引，因此profile响应的高层次元素是多个分片对象数组。每个分片对象列出其ID，该ID唯一标识该分片。ID格式为[节点ID][索引名称][分片ID]。
整个profile可能包括一个或多个“搜索”，其中搜索是对 underlying Lucene 索引的执行查询。大部分用户提交的请求只会针对Lucene索引执行一次搜索。但偶尔多个搜索将执行，例如包括全球聚合(这需要执行第二个“匹配全部”查询以获取全球上下文)。

Query阶段：

query部分包含Lucene在指定分片上执行的查询树详细的时间信息。

"query": [
    {
       "type": "BooleanQuery",
       "description": "message:get message:search",
       "time_in_nanos": "11972972",
       "breakdown": {...},               
       "children": [
          {
             "type": "TermQuery",
             "description": "message:get",
             "time_in_nanos": "3801935",
             "breakdown": {...}
          },
          {
             "type": "TermQuery",
             "description": "message:search",
             "time_in_nanos": "205654",
             "breakdown": {...}
          }
       ]
    }
]

根据Profile 结构，我们可以发现我们的匹配查询被 Lucene 改写成了一个布尔值查询(其中包含两个子查询：一个TermQuery)。

type字段显示的是 Lucene 类名，通常与 Elasticsearch 中相应的名称对齐。

description字段显示的是查询的解释文本，以便于区分您的查询部分(例如，"message:get"和"message:search"都是TermQuery，它们看起来非常相似)。
time_in_nanos 字段显示，这个查询整个布尔值查询执行花费了大约 11.9 毫秒。记录的时间包括所有子查询。
breakdown 字段将提供有关时间如何花费的详细信息。我们将稍后查看它。最后，children 数组列出可能存在的子查询。由于我们搜索了两个值(“get search”)，我们的布尔查询包含两个子Term queries。它们具有相同的信息(type、time、 breakdown、等等)。子查询可以有自己的子查询。

Collector阶段：

响应的collector部分显示高级执行详细信息。 Lucene 的工作方式是定义一个“Collector”，负责协调匹配文档的遍历、评分和收集。

"collector": [
  {
    "name": "SimpleTopScoreDocCollector",
    "reason": "search_top_hits",
    "time_in_nanos": 775274
  }
]

我们看到一个名为SimpleTopScoreDocCollector的收集器被包装为可取消的收集器。

SimpleTopScoreDocCollector是Elasticsearch默认的“得分和排序”收集器。

reason字段试图用简单的英语描述类名。

time_in_nanos类似于查询树的时间：包括所有子收集器的 wall-clock 时间。类似地，子集列表了所有子收集器。

将SimpleTopScoreDocCollector包裹在可取消的收集器中的收集器是用于检测当前搜索是否被取消，并在发生时立即停止收集文档的Elasticsearch工具。
需要指出的是，收集器时间与查询时间无关。它们计算、组合并标准化独立于查询时间！由于Lucene的执行本质，不可能将收集器时间“合并”到查询部分中，因此它们显示在独立的部分中。

rewrite阶段：

Lucene中的所有查询都经历着一个“rewrite”过程。一个查询(及其子查询)可能多次被改写，并且过程会继续直到查询不再发生变化。

这个过程允许Lucene进行优化，例如删除冗余条件句，以更高效的执行路径替换一个查询等等。例如，Boolean → Boolean → TermQuery可以改写为TermQuery，因为在这种情况下所有布尔查询都是不必要的。

改写过程非常复杂且难以显示，因为查询可能会发生剧烈的变化。 Rather than显示中间结果，总改写时间 simply 显示为值(in nanoseconds)。这个值是累加的，它包含了所有正在改写的查询的总时间。

{ "took": 25, "timed_out": false, "_shards": { "total": 1, "successful": 1, "skipped": 0, "failed": 0 }, "hits": { "total": { "value": 5, "relation": "eq" }, "max_score": 0.17402273, "hits": [...] }, "profile": { "shards": [ { "id": "[2aE02wS1R8q_QFnYu6vDVQ][my-index-000001][0]", "searches": [ { "query": [ { "type": "BooleanQuery", "description": "message:get message:search", "time_in_nanos" : 11972972, "breakdown" : { "set_min_competitive_score_count": 0, "match_count": 5, "shallow_advance_count": 0, "set_min_competitive_score": 0, "next_doc": 39022, "match": 4456, "next_doc_count": 5, "score_count": 5, "compute_max_score_count": 0, "compute_max_score": 0, "advance": 84525, "advance_count": 1, "score": 37779, "build_scorer_count": 2, "create_weight": 4694895, "shallow_advance": 0, "create_weight_count": 1, "build_scorer": 7112295, "count_weight": 0, "count_weight_count": 0 }, "children": [ { "type": "TermQuery", "description": "message:get", "time_in_nanos": 3801935, "breakdown": { "set_min_competitive_score_count": 0, "match_count": 0, "shallow_advance_count": 3, "set_min_competitive_score": 0, "next_doc": 0, "match": 0, "next_doc_count": 0, "score_count": 5, "compute_max_score_count": 3, "compute_max_score": 32487, "advance": 5749, "advance_count": 6, "score": 16219, "build_scorer_count": 3, "create_weight": 2382719, "shallow_advance": 9754, "create_weight_count": 1, "build_scorer": 1355007, "count_weight": 0, "count_weight_count": 0 } }, { "type": "TermQuery", "description": "message:search", "time_in_nanos": 205654, "breakdown": { "set_min_competitive_score_count": 0, "match_count": 0, "shallow_advance_count": 3, "set_min_competitive_score": 0, "next_doc": 0, "match": 0, "next_doc_count": 0, "score_count": 5, "compute_max_score_count": 3, "compute_max_score": 6678, "advance": 12733, "advance_count": 6, "score": 6627, "build_scorer_count": 3, "create_weight": 130951, "shallow_advance": 2512, "create_weight_count": 1, "build_scorer": 46153, "count_weight": 0, "count_weight_count": 0 } } ] } ], "rewrite_time": 451233, "collector": [ { "name": "SimpleTopScoreDocCollector", "reason": "search_top_hits", "time_in_nanos": 775274 } ] } ], "aggregations": [], "fetch": { "type": "fetch", "description": "", "time_in_nanos": 660555, "breakdown": { "next_reader": 7292, "next_reader_count": 1, "load_stored_fields": 299325, "load_stored_fields_count": 5, "load_source": 3863, "load_source_count": 5 }, "debug": { "stored_fields": ["_id", "_routing", "_source"] }, "children": [ { "type": "FetchSourcePhase", "description": "", "time_in_nanos": 20443, "breakdown": { "next_reader": 745, "next_reader_count": 1, "process": 19698, "process_count": 5 }, "debug": { "fast_path": 5 } }, { "type": "StoredFieldsPhase", "description": "", "time_in_nanos": 5310, "breakdown": { "next_reader": 745, "next_reader_count": 1, "process": 4445, "process_count": 5 } } ] } } ] } }

"query": [ { "type": "BooleanQuery", "description": "message:get message:search", "time_in_nanos": "11972972", "breakdown": {...}, "children": [ { "type": "TermQuery", "description": "message:get", "time_in_nanos": "3801935", "breakdown": {...} }, { "type": "TermQuery", "description": "message:search", "time_in_nanos": "205654", "breakdown": {...} } ] } ]

智算服务

应用商城

合作伙伴

开发者

支持与服务

了解天翼云

Elasticsearch profile API来定位慢查询

Elasticsearch profile API来定位慢查询

活动

智算服务

应用商城

合作伙伴

开发者

支持与服务

了解天翼云

Elasticsearch profile API来定位慢查询

Elasticsearch profile API来定位慢查询