摘要:
TPCH的Q4使用了exists子查询, 但是结果与innodb有出入, 本文分析原因。
DDL:
select
o_orderpriority,
count(*) as order_count
from
orders
where
o_orderdate >= date '1993-07-01'
and o_orderdate < date '1993-07-01' + interval '3' month
and exists (
select
*
from
lineitem
where
l_orderkey = o_orderkey
and l_commitdate < l_receiptdate
)
group by
o_orderpriority
order by
o_orderpriority
limit 10;
查询结果对比:
问题分析:
mysql列存储引擎执行过程分析:
到TMD的语法树:
T:-1 = TABLE_ALIAS(T:0,"orders")
T:-2 = TMP_TABLE(T:4294967295)
VC:-2.0 = CREATE_VC(T:-2,PHYS_COL(T:-1,A:5))
A:-1 = T:-2.ADD_COLUMN(VC:-2.0,GROUP_BY,"o_orderpriority"
A:-2 = T:-2.ADD_COLUMN(<null>,COUNT,"order_count","ALL")
VC:-2.1 = CREATE_VC(T:-2,PHYS_COL(T:-2,A:-1))
T:-2.ADD_ORDER(VC:-2.1,ASC)
VC:-2.2 = CREATE_VC(T:-2,PHYS_COL(T:-1,A:4))
VC:-2.3 = CREATE_VC(T:-2,EXPR("date_literal"))
C:0 = CREATE_CONDS(T:-2,VC:-2.2,>=,VC:-2.3,<null>)
VC:-2.4 = CREATE_VC(T:-2,EXPR("date_add_interval"))
C:0.AND(VC:-2.2,<,VC:-2.4,<null>)
T:-3 = TABLE_ALIAS(T:1,"lineitem")
T:-4 = TMP_TABLE(T:4294967293)
VC:-4.0 = CREATE_VC(T:-4,EXPR("1"))
A:-1 = T:-4.ADD_COLUMN(VC:-4.0,LIST,"Not_used","ALL")
VC:-4.1 = CREATE_VC(T:-4,PHYS_COL(T:-3,A:0))
VC:-4.2 = CREATE_VC(T:-4,EXPR("TIANMU_FIELD(T:-1,A:0)"))
C:1 = CREATE_CONDS(T:-4,VC:-4.1,=,VC:-4.2,<null>)
VC:-4.3 = CREATE_VC(T:-4,PHYS_COL(T:-3,A:11))
VC:-4.4 = CREATE_VC(T:-4,PHYS_COL(T:-3,A:12))
C:1.AND(VC:-4.3,<,VC:-4.4,<null>)
T:-4.ADD_CONDS(C:1,WHERE)
T:-4.APPLY_CONDS()
T:-4.MODE(LIMIT,0,1)
VC:-2.5 = CREATE_VC(T:-2,SUBQUERY(T:-4))
C:0.AND(VC:-2.5,EXISTS,<null>,<null>)
T:-2.ADD_CONDS(C:0,WHERE)
T:-2.APPLY_CONDS()
T:-2.MODE(LIMIT,0,10)
RESULT(T:-2)
关键点:
T:-3 = TABLE_ALIAS(T:1,"lineitem")
T:-4 = TMP_TABLE(T:4294967293)
VC:-4.0 = CREATE_VC(T:-4,EXPR("1"))
A:-1 = T:-4.ADD_COLUMN(VC:-4.0,LIST,"Not_used","ALL")
VC:-4.1 = CREATE_VC(T:-4,PHYS_COL(T:-3,A:0))
VC:-4.2 = CREATE_VC(T:-4,EXPR("TIANMU_FIELD(T:-1,A:0)"))
C:1 = CREATE_CONDS(T:-4,VC:-4.1,=,VC:-4.2,<null>)
VC:-4.3 = CREATE_VC(T:-4,PHYS_COL(T:-3,A:11))
VC:-4.4 = CREATE_VC(T:-4,PHYS_COL(T:-3,A:12))
C:1.AND(VC:-4.3,<,VC:-4.4,<null>)
T:-4.ADD_CONDS(C:1,WHERE)
T:-4.APPLY_CONDS()
T:-4.MODE(LIMIT,0,1)
VC:-4.1 = CREATE_VC(T:-4,PHYS_COL(T:-3,A:0))
VC:-4.2 = CREATE_VC(T:-4,EXPR("TIANMU_FIELD(T:-1,A:0)"))
C:1 = CREATE_CONDS(T:-4,VC:-4.1,=,VC:-4.2,<null>)
分析:
- TMD引擎的子查询, 比较的o_orderkey, 不是外部表orders中传递过来的
- 而是在子查询时, 对lineitem和orders进行关联查询做出的
- 等价于以下mysql/innodb的处理
mysql> select
-> o_orderpriority,
-> count(*) as order_count
-> from
-> orders
-> where
-> o_orderdate >= date '1993-07-01'
-> and o_orderdate < date '1993-07-01' + interval '3' month
-> and exists (
-> select
-> *
-> from
-> lineitem,
-> orders
-> where
-> l_orderkey = o_orderkey
-> and l_commitdate < l_receiptdate
-> )
-> group by
-> o_orderpriority
-> order by
-> o_orderpriority
-> limit 10;
+-----------------+-------------+
| o_orderpriority | order_count |
+-----------------+-------------+
| 1-URGENT | 1099 |
| 2-HIGH | 1085 |
| 3-MEDIUM | 1102 |
| 4-NOT SPECIFIED | 1075 |
| 5-LOW | 1191 |
+-----------------+-------------+
5 rows in set (0.03 sec)
mysql列存储引擎对于查询计划, 等价于
一. 精简后的SQL
select
o_orderpriority
from
orders
where
o_orderdate >= date '1993-07-01'
and o_orderdate < date '1993-07-01' + interval '3' month
and exists (
select
*
from
lineitem
where
l_orderkey = o_orderkey
and l_commitdate < l_receiptdate
)
limit 10;
二. 查询计划可以理解为
select
o_orderpriority
from
orders
where
o_orderdate >= date '1993-07-01'
and o_orderdate < date '1993-07-01' + interval '3' month
and count (
select
1
from
orders,lineitem
where
l_orderkey = o_orderkey
and l_commitdate < l_receiptdate
limit 1
) == 1
limit 10;
trace显示:
mysql> select * from INFORMATION_SCHEMA.OPTIMIZER_TRACE\G
*************************** 1. row ***************************
QUERY: select
o_orderpriority,
count(*) as order_count
from
orders
where
o_orderdate >= date '1993-07-01'
and o_orderdate < date '1993-07-01' + interval '3' month
and exists (
select
*
from
lineitem
where
l_orderkey = o_orderkey
and l_commitdate < l_receiptdate
)
group by
o_orderpriority
order by
o_orderpriority
limit 10
TRACE: {
"steps": [
{
"join_preparation": {
"select#": 1,
"steps": [
{
"join_preparation": {
"select#": 2,
"steps": [
{
"expanded_query": "/* select#2 */ select 1 from `lineitem` where ((`lineitem`.`l_orderkey` = `orders`.`o_orderkey`) and (`lineitem`.`l_commitdate` < `lineitem`.`l_receiptdate`))"
}
]
}
},
{
"expanded_query": "/* select#1 */ select `orders`.`o_orderpriority` AS `o_orderpriority`,count(0) AS `order_count` from `orders` where ((`orders`.`o_orderdate` >= DATE'1993-07-01') and (`orders`.`o_orderdate` < (DATE'1993-07-01' + interval '3' month)) and exists(/* select#2 */ select 1 from `lineitem` where ((`lineitem`.`l_orderkey` = `orders`.`o_orderkey`) and (`lineitem`.`l_commitdate` < `lineitem`.`l_receiptdate`)))) group by `orders`.`o_orderpriority` order by `orders`.`o_orderpriority` limit 10"
}
]
}
},
{
"join_optimization": {
"select#": 1,
"steps": [
{
"condition_processing": {
"condition": "WHERE",
"original_condition": "((`orders`.`o_orderdate` >= DATE'1993-07-01') and (`orders`.`o_orderdate` < (DATE'1993-07-01' + interval '3' month)) and exists(/* select#2 */ select 1 from `lineitem` where ((`lineitem`.`l_orderkey` = `orders`.`o_orderkey`) and (`lineitem`.`l_commitdate` < `lineitem`.`l_receiptdate`))))",
"steps": [
{
"transformation": "equality_propagation",
"subselect_evaluation": [
],
"resulting_condition": "((`orders`.`o_orderdate` >= DATE'1993-07-01') and (`orders`.`o_orderdate` < (DATE'1993-07-01' + interval '3' month)) and exists(/* select#2 */ select 1 from `lineitem` where ((`lineitem`.`l_orderkey` = `orders`.`o_orderkey`) and (`lineitem`.`l_commitdate` < `lineitem`.`l_receiptdate`))))"
},
{
"transformation": "constant_propagation",
"subselect_evaluation": [
],
"resulting_condition": "((`orders`.`o_orderdate` >= DATE'1993-07-01') and (`orders`.`o_orderdate` < (DATE'1993-07-01' + interval '3' month)) and exists(/* select#2 */ select 1 from `lineitem` where ((`lineitem`.`l_orderkey` = `orders`.`o_orderkey`) and (`lineitem`.`l_commitdate` < `lineitem`.`l_receiptdate`))))"
},
{
"transformation": "trivial_condition_removal",
"subselect_evaluation": [
],
"resulting_condition": "((`orders`.`o_orderdate` >= DATE'1993-07-01') and (`orders`.`o_orderdate` < (DATE'1993-07-01' + interval '3' month)) and exists(/* select#2 */ select 1 from `lineitem` where ((`lineitem`.`l_orderkey` = `orders`.`o_orderkey`) and (`lineitem`.`l_commitdate` < `lineitem`.`l_receiptdate`))))"
}
]
}
}
]
}
}
]
}
MISSING_BYTES_BEYOND_MAX_MEM_SIZE: 0
INSUFFICIENT_PRIVILEGES: 0
1 row in set (0.00 sec)
*************************** 1. row ***************************
id: 1
select_type: PRIMARY
table: orders
partitions: NULL
type: ALL
possible_keys: NULL
key: NULL
key_len: NULL
ref: NULL
rows: 150000
filtered: 11.11
Extra: Using where with pushed condition ((`tpch`.`orders`.`o_orderdate` >= DATE'1993-07-01') and (`tpch`.`orders`.`o_orderdate` < <cache>((DATE'1993-07-01' + interval '3' month))) and exists(/* select#2 */ select 1 from `tpch`.`lineitem` where ((`tpch`.`lineitem`.`l_orderkey` = `tpch`.`orders`.`o_orderkey`) and (`tpch`.`lineitem`.`l_commitdate` < `tpch`.`lineitem`.`l_receiptdate`))))(t0) Pckrows: 3, susp. 3 (0 empty 0 full). Conditions: 2; Using temporary; Using filesort
*************************** 2. row ***************************
id: 2
select_type: DEPENDENT SUBQUERY
table: lineitem
partitions: NULL
type: ref
possible_keys: PRIMARY
key: PRIMARY
key_len: 4
ref: tpch.orders.o_orderkey
rows: 60058
filtered: 33.33
Extra: Using where with pushed condition (`tpch`.`lineitem`.`l_commitdate` < `tpch`.`lineitem`.`l_receiptdate`)(t0) Pckrows: 10, susp. 10 (0 empty 0 full). Conditions: 1
2 rows in set, 2 warnings (0.06 sec)
核对exists传值的问题点
一. 对比exists传值
select
*
from
orders
where
o_orderdate >= date '1993-07-01'
and o_orderdate < date '1993-07-01' + interval '3' month
and exists (
select 641 as o_orderkey
)
order by
o_totalprice
limit 10;
+------------+-----------+---------------+--------------+-------------+-----------------+-----------------+----------------+----------------------------------------------------------------------------+
| o_orderkey | o_custkey | o_orderstatus | o_totalprice | o_orderdate | o_orderpriority | o_clerk | o_shippriority | o_comment |
+------------+-----------+---------------+--------------+-------------+-----------------+-----------------+----------------+----------------------------------------------------------------------------+
| 591877 | 11008 | F | 921.74 | 1993-09-27 | 4-NOT SPECIFIED | Clerk#000000141 | 0 | haggle ruthlessly against the carefully idle deposits. fluffily pending t |
| 339815 | 13199 | F | 924.63 | 1993-09-05 | 5-LOW | Clerk#000000993 | 0 | s accounts along the bold, final deposits c |
| 106983 | 5350 | F | 1037.92 | 1993-09-14 | 2-HIGH | Clerk#000000474 | 0 | te blithely. blithely ironic deposits about the furiou |
| 522276 | 4183 | F | 1051.16 | 1993-09-13 | 1-URGENT | Clerk#000000803 | 0 | theodolites. blithe |
| 524641 | 8362 | F | 1069.35 | 1993-09-12 | 3-MEDIUM | Clerk#000000198 | 0 | . accounts haggle blithely above the furiou |
| 510020 | 8209 | F | 1228.11 | 1993-09-19 | 4-NOT SPECIFIED | Clerk#000000080 | 0 | ironic, even pinto beans run carefully. quickly ironic dependencie |
| 34338 | 6896 | F | 1265.76 | 1993-07-01 | 5-LOW | Clerk#000000242 | 0 | old deposits affix ironic |
| 363879 | 13861 | F | 1267.84 | 1993-08-09 | 2-HIGH | Clerk#000000685 | 0 | tes wake boldly blithe packages. furiously final platelets sleep s |
| 398497 | 10006 | F | 1284.22 | 1993-07-07 | 1-URGENT | Clerk#000000592 | 0 | x-ray. express platelets above the bold platelets are fluffily fi |
| 435299 | 3914 | F | 1305.68 | 1993-07-17 | 2-HIGH | Clerk#000000830 | 0 | ccounts cajole. carefully ironic requests sleep sp |
+------------+-----------+---------------+--------------+-------------+-----------------+-----------------+----------------+----------------------------------------------------------------------------+
10 rows in set (0.05 sec)
select
o_orderkey
from
orders
where
o_orderdate >= date '1993-07-01'
and o_orderdate < date '1993-07-01' + interval '3' month
and exists (
select
o_orderkey
from
lineitem,orders
where
l_orderkey = o_orderkey
and l_commitdate < l_receiptdate
and o_orderdate < date '1993-07-01' + interval '3' month
and o_orderdate >= date '1993-07-01'
and l_orderkey = 36673
limit 1
)
order by
o_totalprice
limit 10;
Empty set (0.18 sec)
select
o_orderkey
from
lineitem,orders
where
l_orderkey = o_orderkey
and l_commitdate < l_receiptdate
and o_orderdate < date '1993-07-01' + interval '3' month
and o_orderdate >= date '1993-07-01'
and l_orderkey = 36673
limit 1;
+------------+
| o_orderkey |
+------------+
| 36673 |
+------------+
1 row in set (0.02 sec)