今天同事过来问了个sql相关的问题。 为啥select查询条件中2个列,表上有2个单列索引,它执行计划走的其中一个索引,MySQL它这么做是有什么依据吗?
这个问题,我们可以使用mysql的trace功能分析下看看。
trace分析优化器如何选择执行计划 ,这个方法的缺点是必须真实的运行一次这个SQL才能获取分析结果
案例:
CREATE TABLE `tb1` (
`id` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
`o_no` varchar(32) NOT NULL,
`status` char(4) NOT NULL ,
`p_id` bigint(20) DEFAULT NULL ,
`p_name` varchar(64) DEFAULT NULL ,
`pay_time` varchar(64) DEFAULT NULL ,
`create_time` varchar(64) DEFAULT NULL ,
PRIMARY KEY (`id`),
KEY `key_o_no_status` (`o_no`,`status`),
KEY `idx_create_time` (`create_time`),
KEY `idx_p_name` (`p_name`),
KEY `idx_pay_time` (`pay_time`),
KEY `idx_p_id` (`p_id`)
) ENGINE=InnoDB ;
SQL:
SELECT * FROM db1.tb1 WHERE p_id = 11 AND p_name = '张飞' limit 0,1000 ;
开启trace:
SET OPTIMIZER_TRACE="enabled=on";
SET optimizer_trace_max_mem_size=1000000;
-- 真实执行下这个查询sql
SELECT * FROM db1.tb1 WHERE p_id = 11 AND p_name = '张飞' limit 0,1000 ;
-- 看下trace的结果
SELECT * FROM information_schema.optimizer_trace \G
结果如下,已经做了些简单的备注。
{
"steps": [
{
"join_preparation": {
"select#": 1,
"steps": [
{
"expanded_query": "/* select#1 */ select 这里打下码 from `tb1` where ((`tb1`.`p_id` = 11) and (`tb1`.`p_name` = '张飞')) limit 0,1000"
}
]
}
},
{
"join_optimization": {
"select#": 1,
"steps": [
{
"condition_processing": {
"condition": "WHERE",
"original_condition": "((`tb1`.`p_id` = 11) and (`tb1`.`p_name` = '张飞'))",
"steps": [
{
"transformation": "equality_propagation",
"resulting_condition": "((`tb1`.`p_name` = '张飞') and multiple equal(11, `tb1`.`p_id`))"
},
{
"transformation": "constant_propagation",
"resulting_condition": "((`tb1`.`p_name` = '张飞') and multiple equal(11, `tb1`.`p_id`))"
},
{
"transformation": "trivial_condition_removal",
"resulting_condition": "((`tb1`.`p_name` = '张飞') and multiple equal(11, `tb1`.`p_id`))"
}
]
}
},
{
"table_dependencies": [
{
"table": "`tb1`",
"row_may_be_null": false,
"map_bit": 0,
"depends_on_map_bits": [
]
}
]
},
{
"ref_optimizer_key_uses": [
{
"table": "`tb1`",
"field": "p_name",
"equals": "'张飞'",
"null_rejecting": false
},
{
"table": "`tb1`",
"field": "p_id",
"equals": "11",
"null_rejecting": false
}
]
},
{
"rows_estimation": [
{
"table": "`tb1`",
"range_analysis": {
"table_scan": {
"rows": 25450,
"cost": 30542
},
"potential_range_indices": [
{
"index": "PRIMARY",
"usable": false,
"cause": "not_applicable"
},
{
"index": "key_o_no_status",
"usable": false,
"cause": "not_applicable"
},
{
"index": "idx_create_time",
"usable": false,
"cause": "not_applicable"
},
{
"index": "idx_p_name",
"usable": true, ---> 这个索引备选
"key_parts": [
"p_name",
"id"
]
},
{
"index": "idx_pay_time",
"usable": false,
"cause": "not_applicable"
},
{
"index": "idx_p_id",
"usable": true, ---> 这个索引备选
"key_parts": [
"p_id",
"id"
]
}
],
"setup_range_conditions": [
],
"group_index_range": {
"chosen": false,
"cause": "not_group_by_or_distinct"
},
"analyzing_range_alternatives": {
"range_scan_alternatives": [
{
"index": "idx_p_name",
"ranges": [
"张飞 <= p_name <= 张飞"
],
"index_dives_for_eq_ranges": true,
"rowid_ordered": true,
"using_mrr": false,
"index_only": false,
"rows": 45,
"cost": 55.01, ---> 注意这里的cost
"chosen": true
},
{
"index": "idx_p_id",
"ranges": [
"11 <= p_id <= 11"
],
"index_dives_for_eq_ranges": true,
"rowid_ordered": true,
"using_mrr": false,
"index_only": false,
"rows": 1,
"cost": 2.21, ---> 注意这里的cost
"chosen": true
}
],
"analyzing_roworder_intersect": {
"intersecting_indices": [
{
"index": "idx_p_id",
"index_scan_cost": 1,
"cumulated_index_scan_cost": 1,
"disk_sweep_cost": 0,
"cumulated_total_cost": 1,
"usable": true,
"matching_rows_now": 1,
"isect_covering_with_this_index": false,
"chosen": true --->
},
{
"index": "idx_p_name",
"index_scan_cost": 2.0732,
"cumulated_index_scan_cost": 3.0732,
"disk_sweep_cost": 0,
"cumulated_total_cost": 3.0732,
"usable": true,
"matching_rows_now": 0.0018,
"isect_covering_with_this_index": false,
"chosen": false,
"cause": "does_not_reduce_cost"
}
],
"clustered_pk": {
"clustered_pk_added_to_intersect": false,
"cause": "no_clustered_pk_index"
},
"chosen": false,
"cause": "too_few_indexes_to_merge"
}
},
"chosen_range_access_summary": {
"range_access_plan": {
"type": "range_scan",
"index": "idx_p_id",
"rows": 1,
"ranges": [
"11 <= p_id <= 11"
]
},
"rows_for_plan": 1,
"cost_for_plan": 2.21,
"chosen": true --->
}
}
}
]
},
{
"considered_execution_plans": [ ---> 评估最优路径
{
"plan_prefix": [
],
"table": "`tb1`",
"best_access_path": {
"considered_access_paths": [
{
"access_type": "ref",
"index": "idx_p_name",
"rows": 45,
"cost": 54,
"chosen": true ---> 备选执行计划1
},
{
"access_type": "ref",
"index": "idx_p_id",
"rows": 1,
"cost": 1.2,
"chosen": true ---> 备选执行计划2
},
{
"access_type": "range",
"cause": "heuristic_index_cheaper",
"chosen": false
}
]
},
"cost_for_plan": 1.2, ---> 最终选择了 "备选执行计划2" , idx_p_id 这个索引,因为它的cost最小!
"rows_for_plan": 1,
"chosen": true
}
]
},
{
"attaching_conditions_to_tables": {
"original_condition": "((`tb1`.`p_id` = 11) and (`tb1`.`p_name` = '张飞'))",
"attached_conditions_computation": [
],
"attached_conditions_summary": [
{
"table": "`tb1`",
"attached": "(`tb1`.`p_name` = '张飞')"
}
]
}
},
{
"refine_plan": [
{
"table": "`tb1`"
}
]
}
]
}
},
{
"join_execution": {
"select#": 1,
"steps": [
]
}
}
]
}
可以看到,mysql分析了各个可能,选择了cost最低的access_path(至少是它认为的这是个最优解)。
需要说明的是,如果元数据信息不准确的话,也会影响执行计划的判断的。
有时候执行计划差的太多,我们可以人为的做一次analyze table,此外还可以调大innodb_stats_persistent_sample_pages值(从默认20调整到64),该参数表示analyze table更新Cardinality值时每次需要采样的页的数量。增加这个值,可以提高统计信息的精确度,同样也能提高执行计划的准确性,不过也相应增加了analyze table的时间,也会增加在InnoDB表上分析的I/O开销。