背景
使用索引字段进行筛选数据时,explain查询语句发现MySQL居然没有使用索引,产生疑问,因此决定调查清楚为什么会不用索引,而是走全表扫描
原因调查出的结果是当你要查询的数据量是连续又占整个表五分之一以上那就不会走索引了,因为顺序读取更快,这个五分之一不是绝对的,而是一个经验值,实际上还是需要SQL优化时计算走这个索引耗时和不走这个索引的耗时去比较,会使用最快的一种方式查询数据。
原因是大概知道了,但是还是不够信服,想进一步看一下分别是多少耗时,最后判断使用了索引或者不走索引,因此我们使用优化器跟踪来看一看。
试验
- 链接Mysql数据库并选择好数据库
- 设置上记录trace
SET optimizer_trace="enabled=on";
- 执行要分析的SQL语句
select * from PKSessionHistory where sessionCreateTime >= '2022-11-28 16:00:00'
- 查询优化跟踪记录
select * from information_schema.optimizer_trace;
查询结果
建议使用json工具来查看json,例如utools里的json插件
{
"steps": [
{
"join_preparation": {
"select#": 1,
"steps": [
{
"expanded_query": "/* select#1 */ select `CohabitPKSessionHistory`.`id` AS `id`,`CohabitPKSessionHistory`.`pkSessionId` AS `pkSessionId`,`CohabitPKSessionHistory`.`cohabitSessionId` AS `cohabitSessionId`,`CohabitPKSessionHistory`.`userId` AS `userId`,`CohabitPKSessionHistory`.`objId` AS `objId`,`CohabitPKSessionHistory`.`createTime` AS `createTime`,`CohabitPKSessionHistory`.`ext` AS `ext`,`CohabitPKSessionHistory`.`userScore` AS `userScore`,`CohabitPKSessionHistory`.`objScore` AS `objScore`,`CohabitPKSessionHistory`.`sessionCreateTime` AS `sessionCreateTime`,`CohabitPKSessionHistory`.`sessionEndTime` AS `sessionEndTime`,`CohabitPKSessionHistory`.`expectedSessionEndTime` AS `expectedSessionEndTime`,`CohabitPKSessionHistory`.`expectedPKEndTime` AS `expectedPKEndTime`,`CohabitPKSessionHistory`.`endSource` AS `endSource`,`CohabitPKSessionHistory`.`totalScore` AS `totalScore`,`CohabitPKSessionHistory`.`cohabitType` AS `cohabitType`,`CohabitPKSessionHistory`.`winnerId` AS `winnerId`,`CohabitPKSessionHistory`.`loserId` AS `loserId`,`CohabitPKSessionHistory`.`userRoseScore` AS `userRoseScore`,`CohabitPKSessionHistory`.`objRoseScore` AS `objRoseScore` from `CohabitPKSessionHistory` where (`CohabitPKSessionHistory`.`sessionCreateTime` >= '2022-11-28 16:00:00')"
}
]
}
},
{
"join_optimization": {
"select#": 1,
"steps": [
{
"condition_processing": {
"condition": "WHERE",
"original_condition": "(`CohabitPKSessionHistory`.`sessionCreateTime` >= '2022-11-28 16:00:00')",
"steps": [
{
"transformation": "equality_propagation",
"resulting_condition": "(`CohabitPKSessionHistory`.`sessionCreateTime` >= '2022-11-28 16:00:00')"
},
{
"transformation": "constant_propagation",
"resulting_condition": "(`CohabitPKSessionHistory`.`sessionCreateTime` >= '2022-11-28 16:00:00')"
},
{
"transformation": "trivial_condition_removal",
"resulting_condition": "(`CohabitPKSessionHistory`.`sessionCreateTime` >= '2022-11-28 16:00:00')"
}
]
}
},
{
"substitute_generated_columns": {}
},
{
"table_dependencies": [
{
"table": "`CohabitPKSessionHistory`",
"row_may_be_null": false,
"map_bit": 0,
"depends_on_map_bits": []
}
]
},
{
"ref_optimizer_key_uses": []
},
{
"rows_estimation": [
{
"table": "`CohabitPKSessionHistory`",
"range_analysis": {
"table_scan": {
"rows": 2084,
"cost": 433.9
},
"potential_range_indexes": [
{
"index": "PRIMARY",
"usable": false,
"cause": "not_applicable"
},
{
"index": "idx_sessionCreateTime",
"usable": true,
"key_parts": [
"sessionCreateTime",
"id"
]
},
{
"index": "idx_sessionCreateTime_totalScore",
"usable": true,
"key_parts": [
"sessionCreateTime",
"totalScore",
"id"
]
}
],
"setup_range_conditions": [],
"group_index_range": {
"chosen": false,
"cause": "not_group_by_or_distinct"
},
"analyzing_range_alternatives": {
"range_scan_alternatives": [
{
"index": "idx_sessionCreateTime",
"ranges": [
"0x99ae790000 <= sessionCreateTime"
],
"index_dives_for_eq_ranges": true,
"rowid_ordered": false,
"using_mrr": false,
"index_only": false,
"rows": 182,
"cost": 219.41,
"chosen": true
},
{
"index": "idx_sessionCreateTime_totalScore",
"ranges": [
"0x99ae790000 <= sessionCreateTime"
],
"index_dives_for_eq_ranges": true,
"rowid_ordered": false,
"using_mrr": false,
"index_only": false,
"rows": 182,
"cost": 219.41,
"chosen": false,
"cause": "cost"
}
],
"analyzing_roworder_intersect": {
"usable": false,
"cause": "too_few_roworder_scans"
}
},
"chosen_range_access_summary": {
"range_access_plan": {
"type": "range_scan",
"index": "idx_sessionCreateTime",
"rows": 182,
"ranges": [
"0x99ae790000 <= sessionCreateTime"
]
},
"rows_for_plan": 182,
"cost_for_plan": 219.41,
"chosen": true
}
}
}
]
},
{
"considered_execution_plans": [
{
"plan_prefix": [],
"table": "`CohabitPKSessionHistory`",
"best_access_path": {
"considered_access_paths": [
{
"rows_to_scan": 182,
"access_type": "range",
"range_details": {
"used_index": "idx_sessionCreateTime"
},
"resulting_rows": 182,
"cost": 255.81,
"chosen": true
}
]
},
"condition_filtering_pct": 100,
"rows_for_plan": 182,
"cost_for_plan": 255.81,
"chosen": true
}
]
},
{
"attaching_conditions_to_tables": {
"original_condition": "(`CohabitPKSessionHistory`.`sessionCreateTime` >= '2022-11-28 16:00:00')",
"attached_conditions_computation": [],
"attached_conditions_summary": [
{
"table": "`CohabitPKSessionHistory`",
"attached": "(`CohabitPKSessionHistory`.`sessionCreateTime` >= '2022-11-28 16:00:00')"
}
]
}
},
{
"refine_plan": [
{
"table": "`CohabitPKSessionHistory`",
"pushed_index_condition": "(`CohabitPKSessionHistory`.`sessionCreateTime` >= '2022-11-28 16:00:00')",
"table_condition_attached": null
}
]
}
]
}
},
{
"join_execution": {
"select#": 1,
"steps": []
}
}
]
}
从优化器记录来看,我们两个索引耗时都是219,全表扫描耗时会是419,因此选择了第一个索引。我们可以explain看一下会是一样的结果选择了第一个索引。另外写了using index condition,是使用了索引下推,不了解的可以查阅相关博客看下索引下推,主要是把mysql服务层要做的筛选下推给了存储引擎筛选,减少两层之间传输的数据量,减少IO。
mysql> explain select * from CohabitPKSessionHistory where sessionCreateTime >= '2022-11-28 16:00:00';
+----+-------------+-------------------------+------------+-------+--------------------------------------------------------+-----------------------+---------+------+------+----------+-----------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------------------------+------------+-------+--------------------------------------------------------+-----------------------+---------+------+------+----------+-----------------------+
| 1 | SIMPLE | CohabitPKSessionHistory | NULL | range | idx_sessionCreateTime,idx_sessionCreateTime_totalScore | idx_sessionCreateTime | 5 | NULL | 183 | 100.00 | Using index condition |
+----+-------------+-------------------------+------------+-------+--------------------------------------------------------+-----------------------+---------+------+------+----------+-----------------------+
1 row in set, 1 warning (0.38 sec)
由于该测试表数据量目前只有两千行,因此我们创建一个复制表,留四个字段,先插入大量数据,再构建索引,然后做试验~
- 构建表
索引先不要去建,等数据插入完了再建索引,因为插入数据过程中要更改索引,又要IO
create table PkSessionHistoryCopy(
`id` int(20) unsigned NOT NULL AUTO_INCREMENT primary key COMMENT '自增ID',
`userId` int(11) NOT NULL COMMENT '用户ID,主动方',
`objId` int(11) NOT NULL COMMENT '用户ID,被动方',
`sessionCreateTime` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT 'PK开始时间 '
);
- 插入数据
我先从刚那个两千来行的表把数据全部复制进去,然后自己copy自己,数据就会不断翻倍
insert into PkSessionHistoryCopy (userId,objId,sessionCreateTime) select userId,objId,sessionCreateTime from CohabitPKSessionHistory
每执行一次下面的自己数据量就翻倍,我目前执行到翻倍成两百五十万行数据
insert into PkSessionHistoryCopy (userId,objId,sessionCreateTime) select userId,objId,sessionCreateTime from PkSessionHistoryCopy
- 建索引
alter table PkSessionHistoryCopy add index idx_time (sessionCreateTime)
- 看下数据量
select * from PkSessionHistoryCopy order by id desc limit 1
id | userId | objId | sessionCreateTime |
---|---|---|---|
2510823 | 52089016 | 52089177 | 2022-12-05 16:06:59 |
- 开始试验,我们先来下根据索引字段查的这个执行计划
explain select * from PkSessionHistoryCopy where sessionCreateTime >= '2020-01-01 00:00:00';
mysql> explain select * from PkSessionHistoryCopy where sessionCreateTime >= '2020-01-01 00:00:00';
+----+-------------+----------------------+------------+------+---------------+------+---------+------+---------+----------+-------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+----------------------+------------+------+---------------+------+---------+------+---------+----------+-------------+
| 1 | SIMPLE | PkSessionHistoryCopy | NULL | ALL | idx_time | NULL | NULL | NULL | 2264381 | 50.00 | Using where |
+----+-------------+----------------------+------------+------+---------------+------+---------+------+---------+----------+-------------+
1 row in set, 1 warning (0.04 sec)
我们可以看到结果显示不会走索引,为啥呢,我们看下优化器跟踪
select * from information_schema.optimizer_trace;
{
"steps": [
{
"join_preparation": {
"select#": 1,
"steps": [
{
"expanded_query": "/* select#1 */ select `PkSessionHistoryCopy`.`id` AS `id`,`PkSessionHistoryCopy`.`userId` AS `userId`,`PkSessionHistoryCopy`.`objId` AS `objId`,`PkSessionHistoryCopy`.`sessionCreateTime` AS `sessionCreateTime` from `PkSessionHistoryCopy` where (`PkSessionHistoryCopy`.`sessionCreateTime` >= '2020-01-01 00:00:00')"
}
]
}
},
{
"join_optimization": {
"select#": 1,
"steps": [
{
"condition_processing": {
"condition": "WHERE",
"original_condition": "(`PkSessionHistoryCopy`.`sessionCreateTime` >= '2020-01-01 00:00:00')",
"steps": [
{
"transformation": "equality_propagation",
"resulting_condition": "(`PkSessionHistoryCopy`.`sessionCreateTime` >= '2020-01-01 00:00:00')"
},
{
"transformation": "constant_propagation",
"resulting_condition": "(`PkSessionHistoryCopy`.`sessionCreateTime` >= '2020-01-01 00:00:00')"
},
{
"transformation": "trivial_condition_removal",
"resulting_condition": "(`PkSessionHistoryCopy`.`sessionCreateTime` >= '2020-01-01 00:00:00')"
}
]
}
},
{
"substitute_generated_columns": {}
},
{
"table_dependencies": [
{
"table": "`PkSessionHistoryCopy`",
"row_may_be_null": false,
"map_bit": 0,
"depends_on_map_bits": []
}
]
},
{
"ref_optimizer_key_uses": []
},
{
"rows_estimation": [
{
"table": "`PkSessionHistoryCopy`",
"range_analysis": {
"table_scan": {
"rows": 2264381,
"cost": 458228
},
"potential_range_indexes": [
{
"index": "PRIMARY",
"usable": false,
"cause": "not_applicable"
},
{
"index": "idx_time",
"usable": true,
"key_parts": [
"sessionCreateTime",
"id"
]
}
],
"setup_range_conditions": [],
"group_index_range": {
"chosen": false,
"cause": "not_group_by_or_distinct"
},
"analyzing_range_alternatives": {
"range_scan_alternatives": [
{
"index": "idx_time",
"ranges": [
"0x99a5420000 <= sessionCreateTime"
],
"index_dives_for_eq_ranges": true,
"rowid_ordered": false,
"using_mrr": false,
"index_only": false,
"rows": 1132190,
"cost": 1.36e6,
"chosen": false,
"cause": "cost"
}
],
"analyzing_roworder_intersect": {
"usable": false,
"cause": "too_few_roworder_scans"
}
}
}
}
]
},
{
"considered_execution_plans": [
{
"plan_prefix": [],
"table": "`PkSessionHistoryCopy`",
"best_access_path": {
"considered_access_paths": [
{
"rows_to_scan": 2264381,
"access_type": "scan",
"resulting_rows": 1.13e6,
"cost": 458226,
"chosen": true
}
]
},
"condition_filtering_pct": 100,
"rows_for_plan": 1.13e6,
"cost_for_plan": 458226,
"chosen": true
}
]
},
{
"attaching_conditions_to_tables": {
"original_condition": "(`PkSessionHistoryCopy`.`sessionCreateTime` >= '2020-01-01 00:00:00')",
"attached_conditions_computation": [],
"attached_conditions_summary": [
{
"table": "`PkSessionHistoryCopy`",
"attached": "(`PkSessionHistoryCopy`.`sessionCreateTime` >= '2020-01-01 00:00:00')"
}
]
}
},
{
"refine_plan": [
{
"table": "`PkSessionHistoryCopy`"
}
]
}
]
}
},
{
"join_explain": {
"select#": 1,
"steps": []
}
}
]
}
我们可以看到全表扫描的cost花费是458228,扫描了2264381行
再看我们走索引的耗时情况,扫描行数1132190,耗时1.36e6,也就是1360000,是不是比458228多多了,因此我们可以看到写着chosen:false,意思是选择:否,也就是不选这个索引。cause:cost原因是时间消耗更多
那数据量多少的时候又能走索引更高效呢
explain select * from PkSessionHistoryCopy where sessionCreateTime >= '2022-11-29 00:00:00';
+----+-------------+----------------------+------------+-------+---------------+----------+---------+------+--------+----------+-----------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+----------------------+------------+-------+---------------+----------+---------+------+--------+----------+-----------------------+
| 1 | SIMPLE | PkSessionHistoryCopy | NULL | range | idx_time | idx_time | 5 | NULL | 283980 | 100.00 | Using index condition |
+----+-------------+----------------------+------------+-------+---------------+----------+---------+------+--------+----------+-----------------------+
1 row in set, 1 warning (0.04 sec)
mysql> select count(*) from PkSessionHistoryCopy where sessionCreateTime >= '2022-11-29 00:00:00';
+----------+
| count(*) |
+----------+
| 145408 |
+----------+
1 row in set (0.06 sec)
这个日期是我试出来的这天,然后我们看一下占比发现是百分之六,所以网络上说的百分之二十、五分之一以上的说法,其实也未必准,实际操作还是得先explain去算一下两种的耗时看一下会不会选择索引。
还有一种办法让他高效走索引,就是加limit,限制查询的数据条数,这样直接走索引查出那些页的数据即可,比扫描全表更高效,从这次试验我们也可以看出,不是你建了索引并且用索引字段筛选就一定走索引的,他还是会在优化时计算每种方式的耗时,选择最优的,另外如果你用覆盖索引、limit减少数据量也是可以优化速度的。