摘要:
mysql8对于IN操作符, 存在使用EXISTS进行优化的策略。
本文分析mysql8是如何实现此优化的。
mysql8官方文档说明in2exists优化:
逻辑处理:
调用堆栈:
(gdb) bt
#0 Item_in_subselect::single_value_in_to_exists_transformer (this=0x7f9158011518, select=0x7f915801db90, func=0x4331188 <eq_creator>)
at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-131-20220805/sql/item_:2222
#1 0x0000000002237016 in Item_in_subselect::single_value_transformer (this=0x7f9158011518, select=0x7f915801db90, func=0x4331188 <eq_creator>)
at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-131-20220805/sql/item_:1948
#2 0x0000000002239a34 in Item_in_subselect::select_in_like_transformer (this=0x7f9158011518, select=0x7f915801db90, func=0x4331188 <eq_creator>)
at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-131-20220805/sql/item_:2643
#3 0x00000000022396f0 in Item_in_subselect::select_transformer (this=0x7f9158011518, select=0x7f915801db90)
at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-131-20220805/sql/item_:2547
#4 0x00000000023db2a0 in st_select_lex::resolve_subquery (this=0x7f915801db90, thd=0x7f9158002c10)
at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-131-20220805/sql/sql_:1080
#5 0x00000000023d936a in st_select_lex::prepare (this=0x7f915801db90, thd=0x7f9158002c10) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-131-20220805/sql/sql_:317
#6 0x000000000223a6e3 in subselect_single_select_engine::prepare (this=0x7f915801e238) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-131-20220805/sql/item_:2976
#7 0x0000000002232a28 in Item_subselect::fix_fields (this=0x7f9158011518, thd=0x7f9158002c10, ref=0x7f91580119d0)
at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-131-20220805/sql/item_:427
#8 0x0000000002239d24 in Item_in_subselect::fix_fields (this=0x7f9158011518, thd_arg=0x7f9158002c10, ref=0x7f91580119d0)
at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-131-20220805/sql/item_:2689
#9 0x0000000001d787de in Item_cond::fix_fields (this=0x7f91580118a8, thd=0x7f9158002c10, ref=0x7f9158007fe8)
at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-131-20220805/sql/item_:5862
#10 0x00000000023db80e in st_select_lex::setup_conds (this=0x7f9158007f10, thd=0x7f9158002c10) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-131-20220805/sql/sql_:1191
#11 0x00000000023d8e6d in st_select_lex::prepare (this=0x7f9158007f10, thd=0x7f9158002c10) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-131-20220805/sql/sql_:212
#12 0x0000000002c7091c in Tianmu::core::optimize_select (thd=0x7f9158002c10, select_options=2147748608, result=0x7f9158020010, select_lex=0x7f9158007f10,
optimize_after_tianmu=@0x7f938a864d0c: 0, free_join=@0x7f938a864d10: 1) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-131-20220805/storage/tianmu/core/engine_execute.cpp:338
#13 0x0000000002c702d6 in Tianmu::core::Engine::HandleSelect (this=0x54ccfd0, thd=0x7f9158002c10, lex=0x7f9158004f38, result=@0x7f938a864d18: 0x7f9158020010, setup_tables_done_option=0,
res=@0x7f938a864d14: 0, optimize_after_tianmu=@0x7f938a864d0c: 0, tianmu_free_join=@0x7f938a864d10: 1, with_insert=0)
at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-131-20220805/storage/tianmu/core/engine_execute.cpp:225
#14 0x0000000002d58d6d in Tianmu::dbhandler::TIANMU_HandleSelect (thd=0x7f9158002c10, lex=0x7f9158004f38, result=@0x7f938a864d18: 0x7f9158020010, setup_tables_done_option=0,
res=@0x7f938a864d14: 0, optimize_after_tianmu=@0x7f938a864d0c: 0, tianmu_free_join=@0x7f938a864d10: 1, with_insert=0)
at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-131-20220805/storage/tianmu/handler/ha_rcengine.cpp:82
#15 0x000000000239b0ca in execute_sqlcom_select (thd=0x7f9158002c10, all_tables=0x7f915801c9b0) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-131-20220805/sql/sql_:5182
#16 0x000000000239444e in mysql_execute_command (thd=0x7f9158002c10, first_level=true) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-131-20220805/sql/sql_:2831
#17 0x000000000239c093 in mysql_parse (thd=0x7f9158002c10, parser_state=0x7f938a865eb0) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-131-20220805/sql/sql_:5621
#18 0x000000000239132b in dispatch_command (thd=0x7f9158002c10, com_data=0x7f938a866650, command=COM_QUERY)
at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-131-20220805/sql/sql_:1495
#19 0x0000000002390257 in do_command (thd=0x7f9158002c10) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-131-20220805/sql/sql_:1034
#20 0x00000000024c2e57 in handle_connection (arg=0x82e78b0) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-131-20220805/sql/conn_handler/connection_handler_per_:313
#21 0x0000000002ba6a88 in pfs_spawn_thread (arg=0x82b74f0) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-131-20220805/storage/perfschema/:2197
#22 0x00007f9394bf7ea5 in start_thread () from /lib64/libpthread.so.0
#23 0x00007f939211bb0d in clone () from /lib64/libc.so.6
mysql8.0.30中的in2exists的处理:
子查询优化主干:
JOIN::prepare //1 查询优化器准备阶段
//去掉子查询中冗余子句
//简单“IN/=ANY”格式子查询优先用半连接优化子查询;否则,调用select_transformer进行子查询优化
//进行其他类型的子查询优化
//单行子查询优化
//IN子查询优
//IN子查询优化
//对单列“IN/ALL/ANY”子查询优化
//IN转换为EXISTS子查询
//对多列子查询优化
//特殊情况IN向EXISTS转换
//对“ALL/ANY”子查询优化
r //对“EXISTS”子查询优化
JOIN::optimize//2 查询优化器优化阶段
//上拉IN_SUBS类型子查询(尽量把子查询转换为半连接;否则使用EXISTS策略优化);主要的子查询优化方式
replace_subcondition
//把IN_SUBS类型的子查询转换为半连接(先于decide_subquery_strategy函数执行,表明子查询的半连接优化策略优先使用)
//以上完成子查询的优化工作
//1 主要优化方式 ->IN_SUBS类型的子查询转换为半连接(否则采用ESISTS策略)
//2 其他优化手段 ->ALL、ANY等类型特殊优化(MIN、MAX最值优化)
//完成多表连接
//对半连接的嵌套形式进行半连接子句的物化优化
semijoin_types_allow_materialization
calculate_materialization_costs
//确定多表连接次序(确定半连接的优化策略)
//确定半连接的优化策略
//确定子查询的优化策略(确定“materialization”或“EXISTS strategy”)
//计算子查询优化策略的花费,选择最优
subquery_allows_materialization
calculate_materialization_costs
optimize_semijoin_nests_for_materialization
calculate_materialization_costs
finalize_exists_transform
finalize_materialization_transform
//以下完成“半连接”的五种“消重策略”的选择
//为每个表构造连接需要的信息(把逻辑运算符转化为物理运算符)
//半连接“消重”工作
subquery策略定义:
item_subselect.h:395
/**
Strategy which will be used to handle this subquery: flattening to a
semi-join, conversion to a derived table, rewrite of IN to EXISTS...
Sometimes the strategy is first only a candidate, then the real decision
happens in a second phase. Other times the first decision is final.
*/
enum class Subquery_strategy : int {
/// Nothing decided yet
UNSPECIFIED,
/// Candidate for rewriting IN(subquery) to EXISTS, or subquery
/// materialization
CANDIDATE_FOR_IN2EXISTS_OR_MAT,
/// Candidate for semi-join flattening
CANDIDATE_FOR_SEMIJOIN,
/// Candidate for rewriting to joined derived table
CANDIDATE_FOR_DERIVED_TABLE,
/// Semi-join flattening
SEMIJOIN,
/// Rewrite to joined derived table
DERIVED_TABLE,
/// Evaluate as EXISTS subquery (possibly after rewriting from another type)
SUBQ_EXISTS,
/// Subquery materialization (HASH_SJ_ENGINE)
SUBQ_MATERIALIZATION,
/// Subquery has been deleted, probably because it was always false
DELETED,
};
JOIN::decide_subquery_strategy
/**
Decides between EXISTS and materialization; performs last steps to set up
the chosen strategy.
@returns 'false' if no error
@note If UNION this is called on each contained JOIN.
*/
bool JOIN::decide_subquery_strategy()
{
assert(unit->item);
switch (unit->item->substype())
{
case Item_subselect::IN_SUBS:
case Item_subselect::ALL_SUBS:
case Item_subselect::ANY_SUBS:
// All of those are children of Item_in_subselect and may use EXISTS
break;
default:
return false;
}
Item_in_subselect * const in_pred=
static_cast<Item_in_subselect *>(unit->item);
Item_exists_subselect::enum_exec_method chosen_method= in_pred->exec_method;
// Materialization does not allow UNION so this can't happen:
assert(chosen_method != Item_exists_subselect::EXEC_MATERIALIZATION);
if ((chosen_method == Item_exists_subselect::EXEC_EXISTS_OR_MAT) &&
compare_costs_of_subquery_strategies(&chosen_method))
return true;
switch (chosen_method)
{
case Item_exists_subselect::EXEC_EXISTS:
return in_pred->finalize_exists_transform(select_lex);
case Item_exists_subselect::EXEC_MATERIALIZATION:
return in_pred->finalize_materialization_transform(this);
default:
assert(false);
return true;
}
}
Item_in_subselect::finalize_exists_transform
bool Item_in_subselect::finalize_exists_transform(SELECT_LEX *select_lex)
{
assert(exec_method == EXEC_EXISTS_OR_MAT ||
exec_method == EXEC_EXISTS);
/*
Change
SELECT expr1, expr2
to
SELECT 1,1
because EXISTS does not care about the selected expressions, only about
the existence of rows.
If UNION, we have to modify the SELECT list of each SELECT in the
UNION, fortunately this function is indeed called for each SELECT_LEX.
If this is a prepared statement, we must allow the next execution to use
materialization. So, we should back up the original SELECT list. If this
is a UNION, this means backing up the N original SELECT lists. To
avoid this constraint, we change the SELECT list only if this is not a
prepared statement.
*/
if (unit->thd->stmt_arena->is_conventional()) // not prepared stmt
{
uint cnt= select_lex->item_list.elements;
select_lex->item_list.empty();
for(; cnt > 0; cnt--)
select_lex->item_list.push_back(new Item_int(NAME_STRING("Not_used"),
(longlong) 1,
MY_INT64_NUM_DECIMAL_DIGITS));
Opt_trace_context * const trace= &unit->thd->opt_trace;
OPT_TRACE_TRANSFORM(trace, oto0, oto1,
select_lex->select_number,
"IN (SELECT)", "EXISTS (CORRELATED SELECT)");
oto1.add("put_1_in_SELECT_list", true);
}
/*
Note that if the subquery is "SELECT1 UNION SELECT2" then this is not
working optimally (Bug#14215895).
*/
unit->global_parameters()->select_limit= new Item_int(1);
unit->set_limit(unit->global_parameters());
select_lex->join->allow_outer_refs= true; // for JOIN::set_prefix_tables()
exec_method= EXEC_EXISTS;
return false;
}
Item_in_subselect::single_value_in_to_exists_transformer
item_:1914
将IN转换为EXISTS
/* 通过判断条件注入将IN语法转换为EXISTS语法
向子查询中注入额外的判断条件,并将子查询标记为关联子查询。*/
|--Item_in_subselect::single_value_in_to_exists_transformer()
|--如果子查询包含聚合函数、窗口函数、GROUP语法、HAVING语法,将判断条件加入到HAVING语法中。
|--如果我们想区分NULL和False的结果的话,将这个条件封装到触发器中。
SELECT ie FROM ... HAVING subq_having AND
trigcond(oe $cmp$ ref_or_null_helper<ie>)
|--创建指向子查询唯一列的Item_ref_null_helper对象,与之前注入的左表达式Item_ref共同创建比较表达式
|--如果子查询的第一个列为包含聚合列的表达式,那么WHERE和HAVING语法中可能通过不同的Item_ref引用到这个Item,存入到Item_sum::ref_by数组中
|--and_items() // 加入到HAVING条件中
|--如果不包含聚合函数、窗口函数、GROUP语法、HAVING语法,将判断条件加入WHERE语句中
|--如果不需要区分NULL与False的结果:
SELECT 1 FROM ... WHERE (oe $cmp$ ie) AND subq_where
|--如果需要区分上述结果的差别,使用触发器
SELECT 1 FROM ...
WHERE subq_where AND trigcond((oe $cmp$ ie) OR (ie IS NULL))
HAVING trigcond(@<is_not_null_test@>(ie))
|--其他,单个查询块,没有表及上述语法,直接用条件表达式在外查询中替代
/**
Transform an IN predicate into EXISTS via predicate injection.
@details The transformation injects additional predicates into the subquery
(and makes the subquery correlated) as follows.
- If the subquery has aggregates, GROUP BY, or HAVING, convert to
SELECT ie FROM ... HAVING subq_having AND
trigcond(oe $cmp$ ref_or_null_helper<ie>)
the addition is wrapped into trigger only when we want to distinguish
between NULL and FALSE results.
- Otherwise (no aggregates/GROUP BY/HAVING) convert it to one of the
following:
= If we don't need to distinguish between NULL and FALSE subquery:
SELECT 1 FROM ... WHERE (oe $cmp$ ie) AND subq_where
= If we need to distinguish between those:
SELECT 1 FROM ...
WHERE subq_where AND trigcond((oe $cmp$ ie) OR (ie IS NULL))
HAVING trigcond(@<is_not_null_test@>(ie))
At JOIN::optimize() we will compare costs of materialization and EXISTS; if
the former is cheaper we will switch to it.
@param thd Thread handle
@param select Query block of the subquery
@param func Subquery comparison creator
@retval RES_OK Either subquery was transformed, or appropriate
predicates where injected into it.
@retval RES_REDUCE The subquery was reduced to non-subquery
@retval RES_ERROR Error
*/
Item_subselect::trans_res
Item_in_subselect::single_value_in_to_exists_transformer(THD *thd,
Query_block *select,
Comp_creator *func) {
DBUG_TRACE;
Query_block *outer = select->outer_query_block();
OPT_TRACE_TRANSFORM(&thd->opt_trace, oto0, oto1, select->select_number,
"IN (SELECT)", "EXISTS (CORRELATED SELECT)");
oto1.add("chosen", true);
// Transformation will make the subquery a dependent one.
if (!left_expr->const_item()) select->uncacheable |= UNCACHEABLE_DEPENDENT;
in2exists_info->added_to_where = false;
if (select->having_cond() || select->with_sum_func ||
select->group_list.elements || select->m_windows.elements > 0) {
bool tmp;
Item_ref_null_helper *ref_null = new Item_ref_null_helper(
&select->context, this, &select->base_ref_items[0]);
Item_bool_func *item = func->create(m_injected_left_expr, ref_null);
item->set_created_by_in2exists();
/*
Assume that the expression in the SELECT list, is a function of a group
aggregate which is aggregated in an outer query, for example
SELECT ... FROM t1 WHERE t1.b IN (SELECT <expr of SUM(t1.a)> FROM t2). We
are changing it to
SELECT ... FROM t1 WHERE t1.b IN (SELECT <expr of SUM(t1.a)> FROM t2
HAVING t1.b=ref-to-<expr of SUM(t1.a)>).
SUM is an "inner sum func", its fix_fields() has added it to
inner_sum_func_list of the outer query; the outer query will do
split_sum_func on it which will add SUM as a hidden item and replace it
in 'expr' with a pointer to an Item_ref.
If 'expr' is a function which has SUM as one of its arguments, the
SELECT list and HAVING access 'expr' through two different pointers, but
there's only one 'expr' Item, which accesses SUM through one pointer, so
there's a single referenced_by pointer to remember, we use
referenced_by[0]. But if 'expr' is directly the SUM, with no Item in
between, then there are two places where 'expr' should be replaced: the
iterator in the SELECT list, and the 'ref-to-expr' in HAVING above. So we
have to document those 2 places in referenced_by[0] and referenced_by[1].
*/
Item *selected = select->base_ref_items[0];
if (selected->type() == SUM_FUNC_ITEM) {
Item_sum *selected_sum = static_cast<Item_sum *>(selected);
if (!selected_sum->referenced_by[0])
selected_sum->referenced_by[0] = ref_null->ref;
else {
// Slot 0 already occupied, use 1.
assert(!selected_sum->referenced_by[1]);
selected_sum->referenced_by[1] = ref_null->ref;
}
}
if (!abort_on_null && left_expr->is_nullable()) {
/*
We can encounter "NULL IN (SELECT ...)". Wrap the added condition
within a trig_cond.
*/
item =
new Item_func_trig_cond(item, get_cond_guard(0), nullptr, NO_PLAN_IDX,
Item_func_trig_cond::OUTER_FIELD_IS_NOT_NULL);
item->set_created_by_in2exists();
}
/*
AND and comparison functions can't be changed during fix_fields()
we can assign query_block->having_cond here, and pass NULL as last
argument (reference) to fix_fields()
*/
select->set_having_cond(and_items(select->having_cond(), item));
select->having_cond()->apply_is_true();
select->having_fix_field = true;
/*
we do not check having_cond()->fixed, because Item_and (from and_items)
or comparison function (from func->create) can't be fixed after creation
*/
Opt_trace_array having_trace(&thd->opt_trace,
"evaluating_constant_having_conditions");
tmp = select->having_cond()->fix_fields(thd, nullptr);
select->having_fix_field = false;
if (tmp) return RES_ERROR;
} else {
Item *orig_item = select->single_visible_field();
if (!select->source_table_is_one_row() || select->where_cond()) {
bool tmp;
Item_bool_func *item = func->create(m_injected_left_expr, orig_item);
/*
We may soon add a 'OR inner IS NULL' to 'item', but that may later be
removed if 'inner' is not nullable, so the in2exists mark must be on
'item' too. Not only on the OR node.
*/
item->set_created_by_in2exists();
if (!abort_on_null && orig_item->is_nullable()) {
Item_bool_func *having = new Item_is_not_null_test(this, orig_item);
having->set_created_by_in2exists();
if (left_expr->is_nullable()) {
if (!(having = new Item_func_trig_cond(
having, get_cond_guard(0), nullptr, NO_PLAN_IDX,
Item_func_trig_cond::OUTER_FIELD_IS_NOT_NULL)))
return RES_ERROR;
having->set_created_by_in2exists();
}
/*
Item_is_not_null_test can't be changed during fix_fields()
we can assign query_block->having_cond() here, and pass NULL as last
argument (reference) to fix_fields()
*/
select->set_having_cond(having);
select->having_fix_field = true;
/*
No need to check query_block->having_cond()->fixed, because Item_and
(from and_items) or comparison function (from func->create)
can't be fixed after creation.
*/
Opt_trace_array having_trace(&thd->opt_trace,
"evaluating_constant_having_conditions");
tmp = select->having_cond()->fix_fields(thd, nullptr);
select->having_fix_field = false;
if (tmp) return RES_ERROR;
item = new Item_cond_or(item, new Item_func_isnull(orig_item));
item->set_created_by_in2exists();
}
/*
If we may encounter NULL IN (SELECT ...) and care whether subquery
result is NULL or FALSE, wrap condition in a trig_cond.
*/
if (!abort_on_null && left_expr->is_nullable()) {
if (!(item = new Item_func_trig_cond(
item, get_cond_guard(0), nullptr, NO_PLAN_IDX,
Item_func_trig_cond::OUTER_FIELD_IS_NOT_NULL)))
return RES_ERROR;
item->set_created_by_in2exists();
}
/*
AND can't be changed during fix_fields()
we can assign query_block->having_cond() here, and pass NULL as last
argument (reference) to fix_fields()
Note that if query_block is the fake one of UNION, it does not make
much sense to give it a WHERE clause below... we already give one to
each member of the UNION.
*/
select->set_where_cond(and_items(select->where_cond(), item));
select->where_cond()->apply_is_true();
in2exists_info->added_to_where = true;
/*
No need to check query_block->where_cond()->fixed, because Item_and
can't be fixed after creation.
*/
Opt_trace_array where_trace(&thd->opt_trace,
"evaluating_constant_where_conditions");
if (select->where_cond()->fix_fields(thd, nullptr)) return RES_ERROR;
} else {
bool tmp;
if (unit->is_union()) {
/*
comparison functions can't be changed during fix_fields()
we can assign query_block->having_cond() here, and pass NULL as last
argument (reference) to fix_fields()
*/
Item_bool_func *new_having =
func->create(m_injected_left_expr,
new Item_ref_null_helper(&select->context, this,
&select->base_ref_items[0]));
new_having->set_created_by_in2exists();
if (!abort_on_null && left_expr->is_nullable()) {
if (!(new_having = new Item_func_trig_cond(
new_having, get_cond_guard(0), nullptr, NO_PLAN_IDX,
Item_func_trig_cond::OUTER_FIELD_IS_NOT_NULL)))
return RES_ERROR;
new_having->set_created_by_in2exists();
}
select->set_having_cond(new_having);
select->having_fix_field = true;
/*
No need to check query_block->having_cond()->fixed, because comparison
function (from func->create) can't be fixed after creation.
*/
Opt_trace_array having_trace(&thd->opt_trace,
"evaluating_constant_having_conditions");
tmp = select->having_cond()->fix_fields(thd, nullptr);
select->having_fix_field = false;
if (tmp) return RES_ERROR;
} else {
/*
Single query block, without tables, without WHERE, HAVING, LIMIT:
its content has one row and is equal to the item in the SELECT list,
so we can replace the IN(subquery) with an equality.
Keep applicability conditions in sync with
Item_exists_subselect::truth_transformer().
The expression is moved to the immediately outer query block, so it
may no longer contain outer references.
*/
outer->merge_contexts(select);
orig_item->fix_after_pullout(outer, select);
// Resolving of substitution item will be done in time of substituting
substitution = func->create(left_expr, orig_item);
have_to_be_excluded = true;
if (thd->lex->is_explain()) {
char warn_buff[MYSQL_ERRMSG_SIZE];
sprintf(warn_buff, ER_THD(thd, ER_SELECT_REDUCED),
select->select_number);
push_warning(thd, Sql_condition::SL_NOTE, ER_SELECT_REDUCED,
warn_buff);
}
return RES_REDUCE;
}
}
}
thd->lex->m_subquery_to_derived_is_impossible = true;
return RES_OK;
}
Item_in_subselect::select_in_like_transformer
/**
Prepare IN/ALL/ANY/SOME subquery transformation and call appropriate
transformation function.
To decide which transformation procedure (scalar or row) applicable here
we have to call fix_fields() for left expression to be able to call
cols() method on it. Also this method make arena management for
underlying transformation methods.
@param thd Thread handle
@param select Query block of subquery being transformed
@param func creator of condition function of subquery
@retval
RES_OK OK
@retval
RES_REDUCE OK, and current subquery was reduced during
transformation
@retval
RES_ERROR Error
*/
Item_subselect::trans_res Item_in_subselect::select_in_like_transformer(
THD *thd, Query_block *select, Comp_creator *func) {
const char *save_where = thd->where;
Item_subselect::trans_res res = RES_ERROR;
bool result;
DBUG_TRACE;
#ifndef NDEBUG
/*
IN/SOME/ALL/ANY subqueries don't support LIMIT clause. Without
it, ORDER BY becomes meaningless and should already have been
removed in resolve_subquery()
*/
for (Query_block *sl = unit->first_query_block(); sl;
sl = sl->next_query_block())
assert(!sl->order_list.first);
#endif
if (changed) return RES_OK;
thd->where = "IN/ALL/ANY subquery";
/*
In some optimisation cases we will not need this Item_in_optimizer
object, but we can't know it here, but here we need address correct
reference on left expression.
//psergey: he means confluent cases like "... IN (SELECT 1)"
*/
if (!optimizer) {
Prepared_stmt_arena_holder ps_arena_holder(thd);
optimizer = new Item_in_optimizer(left_expr, this);
if (!optimizer) goto err;
}
thd->lex->set_current_query_block(select->outer_query_block());
result =
(!left_expr->fixed && left_expr->fix_fields(thd, optimizer->arguments()));
/* fix_fields can change reference to left_expr, we need reassign it */
left_expr = optimizer->arguments()[0];
thd->lex->set_current_query_block(select);
if (result) goto err;
/*
If we didn't choose an execution method up to this point, we choose
the IN=>EXISTS transformation, at least temporarily.
*/
if (strategy == Subquery_strategy::UNSPECIFIED)
strategy = Subquery_strategy::CANDIDATE_FOR_IN2EXISTS_OR_MAT;
/*
Both transformers call fix_fields() only for Items created inside them,
and all those items do not make permanent changes in the current item arena
which allows us to call them with changed arena (if we do not know the
nature of Item, we have to call fix_fields() for it only with the original
arena to avoid memory leak).
*/
{
Prepared_stmt_arena_holder ps_arena_holder(thd);
if (left_expr->cols() == 1)
res = single_value_transformer(thd, select, func);
else {
/* we do not support row operation for ALL/ANY/SOME */
if (func != &eq_creator) {
my_error(ER_OPERAND_COLUMNS, MYF(0), 1);
return RES_ERROR;
}
res = row_value_transformer(thd, select);
}
}
err:
thd->where = save_where;
return res;
}
Query_block::flatten_subqueries
sql_:3709
/**
Convert semi-join subquery predicates into semi-join join nests.
Convert candidate subquery predicates into semi-join join nests. This
transformation is performed once in query lifetime and is irreversible.
Conversion of one subquery predicate
------------------------------------
We start with a query block that has a semi-join subquery predicate:
@code
SELECT ...
FROM ot, ...
WHERE oe IN (SELECT ie FROM it1 ... itN WHERE subq_where) AND outer_where
@endcode
and convert the predicate and subquery into a semi-join nest:
@code
SELECT ...
FROM ot SEMI JOIN (it1 ... itN), ...
WHERE outer_where AND subq_where AND oe=ie
@endcode
that is, in order to do the conversion, we need to
* Create the "SEMI JOIN (it1 .. itN)" part and add it into the parent
query block's FROM structure.
* Add "AND subq_where AND oe=ie" into parent query block's WHERE (or ON if
the subquery predicate was in an ON condition)
* Remove the subquery predicate from the parent query block's WHERE
Considerations when converting many predicates
----------------------------------------------
A join may have at most MAX_TABLES tables. This may prevent us from
flattening all subqueries when the total number of tables in parent and
child selects exceeds MAX_TABLES. In addition, one slot is reserved per
semi-join nest, in case the subquery needs to be materialized in a
temporary table.
We deal with this problem by flattening children's subqueries first and
then using a heuristic rule to determine each subquery predicate's
priority, which is calculated in this order:
1. Prefer dependent subqueries over non-dependent ones
2. Prefer subqueries with many tables over those with fewer tables
3. Prefer early subqueries over later ones (to make sort deterministic)
@returns false if success, true if error
*/
bool Query_block::flatten_subqueries(THD *thd) {
DBUG_TRACE;
assert(has_sj_candidates());
Item_exists_subselect **subq, **subq_begin = sj_candidates->begin(),
**subq_end = sj_candidates->end();
Opt_trace_context *const trace = &thd->opt_trace;
/*
Semijoin flattening is bottom-up. Indeed, we have this execution flow,
for SELECT#1 WHERE X IN (SELECT #2 WHERE Y IN (SELECT#3)) :
Query_block::prepare() (select#1)
-> fix_fields() on IN condition
-> Query_block::prepare() on subquery (select#2)
-> fix_fields() on IN condition
-> Query_block::prepare() on subquery (select#3)
<- Query_block::prepare()
<- fix_fields()
-> flatten_subqueries: merge #3 in #2
<- flatten_subqueries
<- Query_block::prepare()
<- fix_fields()
-> flatten_subqueries: merge #2 in #1
Note that flattening of #(N) is done by its parent JOIN#(N-1), because
there are cases where flattening is not possible and only the parent can
know.
*/
uint subq_no;
for (subq = subq_begin, subq_no = 0; subq < subq_end; subq++, subq_no++) {
auto subq_item = *subq;
/*
Some subqueries may have been deleted, remove them fully before sorting
sj_candidates and subsequent processing:
*/
if (subq_item->strategy == Subquery_strategy::DELETED) {
sj_candidates->erase_value(subq_item);
subq--; // So that the next iteration will handle the next subquery.
subq_end = sj_candidates->end(); // array's end moved.
continue;
}
// Transformation of IN and EXISTS subqueries is supported
assert(subq_item->substype() == Item_subselect::IN_SUBS ||
subq_item->substype() == Item_subselect::EXISTS_SUBS);
Query_block *child_query_block = subq_item->unit->first_query_block();
// Check that we proceeded bottom-up
assert(child_query_block->sj_candidates == nullptr);
bool dependent = subq_item->unit->uncacheable & UNCACHEABLE_DEPENDENT;
subq_item->sj_convert_priority =
(((dependent * MAX_TABLES_FOR_SIZE) + // dependent subqueries first
child_query_block->leaf_table_count) *
65536) + // then with many tables
(65536 - subq_no); // then based on position
/*
We may actually allocate more than 64k subqueries in a query block,
but this is so unlikely that we ignore the impact it may have on sorting.
*/
}
/*
Pick which subqueries to convert:
sort the subquery array
- prefer correlated subqueries over uncorrelated;
- prefer subqueries that have greater number of outer tables;
*/
std::sort(subq_begin, subq_begin + sj_candidates->size(),
[](Item_exists_subselect *el1, Item_exists_subselect *el2) {
return el1->sj_convert_priority > el2->sj_convert_priority;
});
// A permanent transformation is going to start, so:
Prepared_stmt_arena_holder ps_arena_holder(thd);
// Transform certain subquery predicates to derived tables
for (subq = subq_begin; subq < subq_end; subq++) {
auto subq_item = *subq;
if (subq_item->strategy != Subquery_strategy::CANDIDATE_FOR_DERIVED_TABLE)
continue;
OPT_TRACE_TRANSFORM(trace, oto0, oto1,
subq_item->unit->first_query_block()->select_number,
"IN (SELECT)", "joined derived table");
oto1.add("chosen", true);
if (transform_table_subquery_to_join_with_derived(thd, subq_item))
return true;
}
/*
Replace all subqueries to be flattened with a truth predicate.
Generally, this predicate is TRUE, but if the subquery has a WHERE condition
that is always false, replace with a FALSE predicate. In the latter case,
also avoid converting the subquery to a semi-join.
*/
uint table_count = leaf_table_count;
for (subq = subq_begin; subq < subq_end; subq++) {
auto subq_item = *subq;
if (subq_item->strategy != Subquery_strategy::CANDIDATE_FOR_SEMIJOIN)
continue;
// Add the tables in the subquery nest plus one in case of materialization:
const uint tables_added =
subq_item->unit->first_query_block()->leaf_table_count + 1;
// (1) Not too many tables in total.
// (2) This subquery contains no antijoin nest (anti/semijoin nest cannot
// include antijoin nest for implementation reasons, see
// advance_sj_state()).
if (table_count + tables_added <= MAX_TABLES && // (1)
!subq_item->unit->first_query_block()->has_aj_nests) // (2)
subq_item->strategy = Subquery_strategy::SEMIJOIN;
Item *subq_where = subq_item->unit->first_query_block()->where_cond();
/*
A predicate can be evaluated to ALWAYS TRUE or ALWAYS FALSE when it
has only const items. If found to be ALWAYS FALSE, do not include
the subquery in transformations.
*/
bool cond_value = true;
if (subq_where && subq_where->const_item() &&
!subq_where->walk(&Item::is_non_const_over_literals, enum_walk::POSTFIX,
nullptr) &&
simplify_const_condition(thd, &subq_where, false, &cond_value))
return true;
if (!cond_value) {
// Unlink and delete this subquery's query expression
Item::Cleanup_after_removal_context ctx(this);
subq_item->walk(&Item::clean_up_after_removal,
enum_walk::SUBQUERY_POSTFIX, pointer_cast<uchar *>(&ctx));
}
if (subq_item->strategy == Subquery_strategy::SEMIJOIN)
table_count += tables_added;
if (subq_item->strategy != Subquery_strategy::SEMIJOIN &&
subq_item->strategy != Subquery_strategy::DELETED) {
subq_item->strategy = Subquery_strategy::UNSPECIFIED;
continue;
}
/*
In WHERE/ON of parent query, replace IN (subq) with truth value:
- When subquery is converted to anti/semi-join: truth value true.
- When subquery WHERE cond is false: IN returns FALSE, so truth value
false if a semijoin (IN) and truth value true if an antijoin (NOT IN).
*/
Item *truth_item =
(cond_value || subq_item->can_do_aj)
? implicit_cast<Item *>(new (thd->mem_root) Item_func_true())
: implicit_cast<Item *>(new (thd->mem_root) Item_func_false());
if (truth_item == nullptr) return true;
Item **tree = (subq_item->embedding_join_nest == nullptr)
? &m_where_cond
: subq_item->embedding_join_nest->join_cond_ref();
if (replace_subcondition(thd, tree, subq_item, truth_item, false))
return true; /* purecov: inspected */
}
/* Transform the selected subqueries into semi-join */
for (subq = subq_begin; subq < subq_end; subq++) {
auto subq_item = *subq;
if (subq_item->strategy != Subquery_strategy::SEMIJOIN) continue;
OPT_TRACE_TRANSFORM(
trace, oto0, oto1, subq_item->unit->first_query_block()->select_number,
"IN (SELECT)", subq_item->can_do_aj ? "antijoin" : "semijoin");
oto1.add("chosen", true);
if (convert_subquery_to_semijoin(thd, *subq)) return true;
}
/*
Finalize the subqueries that we did not convert,
ie. perform IN->EXISTS rewrite.
*/
for (subq = subq_begin; subq < subq_end; subq++) {
auto subq_item = *subq;
if (subq_item->strategy != Subquery_strategy::UNSPECIFIED) continue;
Item_subselect::trans_res res;
subq_item->changed = false;
subq_item->fixed = false;
Query_block *save_query_block = thd->lex->current_query_block();
thd->lex->set_current_query_block(subq_item->unit->first_query_block());
// This is the only part of the function which uses a JOIN.
res = subq_item->select_transformer(thd,
subq_item->unit->first_query_block());
thd->lex->set_current_query_block(save_query_block);
if (res == Item_subselect::RES_ERROR) return true;
subq_item->changed = true;
subq_item->fixed = true;
/*
If the Item has been substituted with another Item (e.g an
Item_in_optimizer), resolve it and add it to proper WHERE or ON clause.
If no substitute exists (e.g for EXISTS predicate), no action is required.
*/
Item *substitute = subq_item->substitution;
if (substitute == nullptr) continue;
const bool do_fix_fields = !substitute->fixed;
const bool subquery_in_join_clause =
subq_item->embedding_join_nest != nullptr;
Item **tree = subquery_in_join_clause
? (subq_item->embedding_join_nest->join_cond_ref())
: &m_where_cond;
if (replace_subcondition(thd, tree, *subq, substitute, do_fix_fields))
return true;
subq_item->substitution = nullptr;
}
sj_candidates->clear();
return false;
}
Query_block::prepare
sql_:173
/**
Prepare query block for optimization.
Resolve table and column information.
Resolve all expressions (item trees), ie WHERE clause, join conditions,
GROUP BY clause, HAVING clause, ORDER BY clause, LIMIT clause.
Prepare all subqueries recursively as part of resolving the expressions.
Apply permanent transformations to the abstract syntax tree, such as
semi-join transformation, derived table transformation, elimination of
constant values and redundant clauses (e.g ORDER BY, GROUP BY).
@param thd thread handler
@param insert_field_list List of fields when used in INSERT, otherwise NULL
@returns false if success, true if error
@note on privilege checking for SELECT query that possibly contains view
or derived table references:
- When this function is called, it is assumed that the precheck() function
has been called. precheck() ensures that the user has some SELECT
privileges to the tables involved in the query. When resolving views
it has also been established that the user has some privileges for them.
To prepare a view for privilege checking, it is also needed to call
check_view_privileges() after views have been merged into the query.
This is not necessary for unnamed derived tables since it has already
been established that we have SELECT privileges for the underlying tables
by the precheck functions. (precheck() checks a query without resolved
views, ie. before tables are opened, so underlying tables of views
are not yet available).
- When a query block is resolved, always ensure that the user has SELECT
privileges to the columns referenced in the WHERE clause, the join
conditions, the GROUP BY clause, the HAVING clause and the ORDER BY clause.
- When resolving the outer-most query block, ensure that the user also has
SELECT privileges to the columns in the selected expressions.
- When setting up a derived table or view for materialization, ensure that
the user has SELECT privileges to the columns in the selected expressions
- Column privileges are normally checked by Item_field::fix_fields().
Exceptions are select list of derived tables/views which are checked
in TABLE_LIST::setup_materialized_derived(), and natural/using join
conditions that are checked in mark_common_columns().
- As far as INSERT, UPDATE and DELETE statements have the same expressions
as a SELECT statement, this note applies to those statements as well.
*/
bool Query_block::prepare(THD *thd, mem_root_deque<Item *> *insert_field_list) {
DBUG_TRACE;
assert(this == thd->lex->current_query_block());
assert(join == nullptr);
assert(!thd->is_error());
// If this query block is a table value constructor, a lot of the preparation
// done in Query_block::prepare becomes irrelevant. Thus we call our own
// Query_block::prepare_values in this case.
if (is_table_value_constructor) return prepare_values(thd);
Query_expression *const unit = master_query_expression();
if (!top_join_list.empty()) propagate_nullability(&top_join_list, false);
/*
Determine whether it is suggested to merge immediate derived tables, based
on the placement of the query block:
- DTs belonging to outermost query block: always
- DTs belonging to first level subqueries: Yes if inside SELECT statement,
no otherwise (including UPDATE and DELETE).
This is required to support a workaround for allowing subqueries
containing the same table as is target for delete or update,
by forcing a materialization of the subquery.
- All other cases inherit status of parent query block.
*/
allow_merge_derived = outer_query_block() == nullptr ||
master_query_expression()->item == nullptr ||
(outer_query_block()->outer_query_block() == nullptr
? parent_lex->sql_command == SQLCOM_SELECT ||
parent_lex->sql_command == SQLCOM_SET_OPTION
: outer_query_block()->allow_merge_derived);
Opt_trace_context *const trace = &thd->opt_trace;
Opt_trace_object trace_wrapper_prepare(trace);
Opt_trace_object trace_prepare(trace, "join_preparation");
trace_prepare.add_select_number(select_number);
Opt_trace_array trace_steps(trace, "steps");
/*
Setup the expressions in the SELECT list.
For derived tables/views, wait with privilege checking of columns and
marking in read/write sets until we know how they are used (may be used in
UPDATE and INSERT). Exceptions:
- Always assume columns referenced in subqueries are selected.
- Always assume outer references are selected (marking is then done in
Item_outer_ref::fix_fields).
Expressions must be resolved here, before tables are set up, otherwise table
function's arguments are not resolved properly.
*/
const bool check_privs = !thd->derived_tables_processing ||
master_query_expression()->item != nullptr;
thd->mark_used_columns = check_privs ? MARK_COLUMNS_READ : MARK_COLUMNS_NONE;
ulonglong want_privilege_saved = thd->want_privilege;
thd->want_privilege = check_privs ? SELECT_ACL : 0;
/*
Expressions in lateral join can't refer to item list, thus item list lookup
shouldn't be allowed during table/table function setup.
*/
is_item_list_lookup = false;
/* Check that all tables, fields, conds and order are ok */
if (setup_tables(thd, get_table_list(), false)) return true;
if ((derived_table_count || table_func_count) &&
resolve_placeholder_tables(thd, true))
return true;
// Wait with privilege checking until all derived tables are resolved.
if (derived_table_count && !thd->derived_tables_processing &&
check_view_privileges(thd, SELECT_ACL, SELECT_ACL))
return true;
is_item_list_lookup = true;
// Precompute and store the row types of NATURAL/USING joins.
if (leaf_table_count >= 2 &&
setup_natural_join_row_types(thd, join_list, &context))
return true;
Mem_root_array<Item_exists_subselect *> sj_candidates_local(thd->mem_root);
set_sj_candidates(&sj_candidates_local);
/*
Item and Item_field CTORs will both increment some counters
in current_query_block(), based on the current parsing context.
We are not parsing anymore: any new Items created now are due to
query rewriting, so stop incrementing counters.
*/
assert(parsing_place == CTX_NONE);
parsing_place = CTX_NONE;
resolve_place = RESOLVE_SELECT_LIST;
if (with_wild && setup_wild(thd)) return true;
if (setup_base_ref_items(thd)) return true; /* purecov: inspected */
if (setup_fields(thd, thd->want_privilege, /*allow_sum_func=*/true,
/*split_sum_funcs=*/true, /*column_update=*/false,
insert_field_list, &fields, base_ref_items))
return true;
resolve_place = RESOLVE_NONE;
const nesting_map save_allow_sum_func = thd->lex->allow_sum_func;
const nesting_map save_deny_window_func = thd->lex->m_deny_window_func;
// Do not allow local set functions for join conditions, WHERE and GROUP BY
thd->lex->allow_sum_func &= ~((nesting_map)1 << nest_level);
thd->mark_used_columns = MARK_COLUMNS_READ;
thd->want_privilege = SELECT_ACL;
// Set up join conditions and WHERE clause
if (setup_conds(thd)) return true;
// Set up the GROUP BY clause
int all_fields_count = fields.size();
if (group_list.elements && setup_group(thd)) return true;
hidden_group_field_count = fields.size() - all_fields_count;
// Allow local set functions in HAVING and ORDER BY
thd->lex->allow_sum_func |= (nesting_map)1 << nest_level;
// Windowing is not allowed with HAVING
thd->lex->m_deny_window_func |= (nesting_map)1 << nest_level;
if (olap == ROLLUP_TYPE) {
for (Item *item : fields) {
mark_item_as_maybe_null_if_rollup_item(item);
item->update_used_tables();
}
}
// Setup the HAVING clause
if (m_having_cond) {
assert(m_having_cond->is_bool_func());
thd->where = "having clause";
having_fix_field = true;
resolve_place = RESOLVE_HAVING;
if (!m_having_cond->fixed &&
(m_having_cond->fix_fields(thd, &m_having_cond) ||
m_having_cond->check_cols(1)))
return true;
assert(m_having_cond->data_type() != MYSQL_TYPE_INVALID);
/*
Rollup may alter nullability of HAVING condition, so wait with
simplification of this condition until after rollup is resolved.
*/
having_fix_field = false;
resolve_place = RESOLVE_NONE;
}
if (olap == ROLLUP_TYPE && resolve_rollup(thd))
return true; /* purecov: inspected */
thd->lex->m_deny_window_func = save_deny_window_func;
if (m_having_cond != nullptr) {
if (olap == ROLLUP_TYPE) {
m_having_cond = resolve_rollup_item(thd, m_having_cond);
if (m_having_cond == nullptr) {
return true;
}
}
/*
Simplify the having condition if it is a const item.
Leave a TRUE condition if HAVING is always true, so that query block
is still marked as having a HAVING condition.
*/
if (m_having_cond->const_item() && !thd->lex->is_view_context_analysis() &&
!m_having_cond->walk(&Item::is_non_const_over_literals,
enum_walk::POSTFIX, nullptr) &&
simplify_const_condition(thd, &m_having_cond, false))
return true;
}
// Set up the ORDER BY clause
all_fields_count = fields.size();
if (order_list.elements) {
if (setup_order(thd, base_ref_items, get_table_list(), &fields,
order_list.first))
return true;
}
if (fulltext_uses_rollup_column(this)) {
my_error(ER_FULLTEXT_WITH_ROLLUP, MYF(0));
return true;
}
hidden_order_field_count = fields.size() - all_fields_count;
// Resolve OFFSET and LIMIT clauses
if (resolve_limits(thd)) return true;
/*
Query block is completely resolved, except for windows (see below) which
handles its own, so restore set function allowance.
*/
thd->lex->allow_sum_func = save_allow_sum_func;
/*
Permanently remove redundant parts from the query if
1) This is a subquery
2) Not normalizing a view. Removal should take place when a
query involving a view is optimized, not when the view
is created
*/
if (unit->item && // 1)
!thd->lex->is_view_context_analysis()) // 2)
{
remove_redundant_subquery_clauses(thd, hidden_group_field_count);
}
/*
Set up windows after setup_order() (as the query's ORDER BY may contain
window functions), and before setup_order_final() (as such function needs
to know about implicit grouping which may be induced by an aggregate
function in the window's PARTITION or ORDER clause).
*/
const size_t fields_cnt = fields.size();
if (m_windows.elements != 0 &&
Window::setup_windows1(thd, this, base_ref_items, get_table_list(),
&fields, &m_windows))
return true;
bool added_new_sum_funcs = fields.size() > fields_cnt;
if (order_list.elements) {
if (setup_order_final(thd)) return true; /* purecov: inspected */
added_new_sum_funcs = true;
}
thd->want_privilege = want_privilege_saved;
if (is_distinct() && can_skip_distinct())
remove_base_options(SELECT_DISTINCT);
/*
Printing the expanded query should happen here and not elsewhere, because
when a view is merged (when the view is opened in open_tables()), the
parent query's query_block does not yet contain a correct WHERE clause (it
misses the view's merged WHERE clause). This is corrected only just above,
in TABLE_LIST::prep_where(), called by
setup_without_group()->setup_conds().
We also have to wait for fix_fields() on HAVING, above.
At this stage, we also have properly set up Item_ref-s.
*/
{
Opt_trace_object trace_wrapper(trace);
opt_trace_print_expanded_query(thd, this, &trace_wrapper);
}
/*
When normalizing a view (like when writing a view's body to the FRM),
subquery transformations don't apply (if they did, IN->EXISTS could not be
undone in favour of materialization, when optimizing a later statement
using the view)
*/
if (unit->item && // This is a subquery
this != unit->fake_query_block && // A real query block
// Not normalizing a view
!thd->lex->is_view_context_analysis()) {
// Query block represents a subquery within an IN/ANY/ALL/EXISTS predicate
if (resolve_subquery(thd)) return true;
}
// Transform eligible scalar subqueries to derived tables.
//
// Don't transform if analyzing a view: the resulting query may not be
// compilable from sqldump, (due to group by check/visibility in HAVING).
//
// Don't transform if the switch subquery_to_derived is false.
//
// Note that the transformation must precede m_having_cond->split_sum_func2
// below since substitutions may be made in the HAVING clause which would not
// otherwise get done.
if (!(thd->lex->context_analysis_only & CONTEXT_ANALYSIS_ONLY_VIEW) &&
(thd->optimizer_switch_flag(OPTIMIZER_SWITCH_SUBQUERY_TO_DERIVED) ||
(parent_lex->m_sql_cmd != nullptr &&
thd->secondary_engine_optimization() ==
Secondary_engine_optimization::SECONDARY)) &&
transform_scalar_subqueries_to_join_with_derived(thd))
return true; /* purecov: inspected */
/*
If GROUPING function is present in having condition -
1. Set that the evaluation of this condition depends on rollup
result.
2. Add a reference to the condition so that result is stored
after evaluation.
*/
if (m_having_cond && (m_having_cond->has_aggregation() ||
m_having_cond->has_grouping_func())) {
m_having_cond->split_sum_func2(thd, base_ref_items, &fields, &m_having_cond,
true);
added_new_sum_funcs = true;
}
if (inner_sum_func_list) {
Item_sum *end = inner_sum_func_list;
Item_sum *item_sum = end;
do {
item_sum = item_sum->next_sum;
item_sum->split_sum_func2(thd, base_ref_items, &fields, nullptr, false);
added_new_sum_funcs = true;
} while (item_sum != end);
}
if (added_new_sum_funcs && olap == ROLLUP_TYPE) {
uint send_group_parts = group_list_size();
for (auto it = fields.begin(); it != fields.end(); ++it) {
Item *item = *it;
if (item->type() == Item::SUM_FUNC_ITEM && !item->const_item()) {
Item_sum *item_sum = down_cast<Item_sum *>(item);
if (item_sum->aggr_query_block == this &&
!item_sum->is_rollup_sum_wrapper()) {
// split_sum_func2 created a new aggregate function item,
// so we need to update it for rollup.
Item *new_item =
create_rollup_switcher(thd, this, item_sum, send_group_parts);
if (new_item == nullptr) return true;
*it = new_item;
}
}
}
}
if (group_list.elements) {
/*
Because HEAP tables can't index BIT fields we need to use an
additional hidden field for grouping because later it will be
converted to a LONG field. Original field will remain of the
BIT type and will be returned to a client.
*/
for (ORDER *ord = group_list.first; ord; ord = ord->next) {
if ((*ord->item)->type() == Item::FIELD_ITEM &&
(*ord->item)->data_type() == MYSQL_TYPE_BIT) {
Item_field *field = new Item_field(thd, *(Item_field **)ord->item);
ord->item = add_hidden_item(field);
}
}
}
// Setup full-text functions after resolving HAVING
if (has_ft_funcs() && setup_ftfuncs(thd, this)) return true;
if (query_result() && query_result()->prepare(thd, fields, unit)) return true;
if (has_sj_candidates() && flatten_subqueries(thd)) return true;
set_sj_candidates(nullptr);
/*
When reaching the top-most query block, or the next-to-top query block for
the SQL command SET and for SP instructions (indicated with SQLCOM_END),
apply local transformations to this query block and all underlying query
blocks.
*/
if (!thd->lex->is_view_context_analysis() &&
(outer_query_block() == nullptr ||
((parent_lex->sql_command == SQLCOM_SET_OPTION ||
parent_lex->sql_command == SQLCOM_END) &&
outer_query_block()->outer_query_block() == nullptr)) &&
!skip_local_transforms) {
/*
This code is invoked in the following cases:
- if this is not a create view statement as transformations are
not required when creating a view.
- if this is an outer-most query block of a SELECT or multi-table
UPDATE/DELETE statement. Notice that for a UNION, this applies to
all query blocks. It also applies to a fake_query_block object.
- if this is one of highest-level subqueries, if the statement is
something else; like subq-i in:
UPDATE t1 SET col1=(subq-1), col2=(subq-2);
- If this is a subquery in a SET command,
or scalar subqueries used in SP expressions like sp_instr_freturn
(undicated by SQLCOM_END).
@todo: Refactor SET so that this is not needed.
- INSERT may in some cases alter the sequence of preparation calls, by
setting the skip_local_transforms flag before calling prepare().
Local transforms are applied after query block merging.
This means that we avoid unnecessary invocations, as local transforms
would otherwise have been performed first before query block merging and
then another time after query block merging.
Thus, apply_local_transforms() may run only after the top query
is finished with query block merging. That's why
apply_local_transforms() is initiated only by the top query, and then
recurses into subqueries.
*/
if (apply_local_transforms(thd, true)) return true;
}
// Eliminate unused window definitions, redundant sorts etc.
if (!m_windows.is_empty()) Window::eliminate_unused_objects(&m_windows);
// Replace group by field references inside window functions with references
// in the presence of ROLLUP.
if (olap == ROLLUP_TYPE && resolve_rollup_wfs(thd))
return true; /* purecov: inspected */
assert(!thd->is_error());
return false;
}
Item_in_subselect::single_value_transformer
/**
Rewrite a single-column IN/ALL/ANY subselect
DESCRIPTION
Rewrite a single-column subquery using rule-based approach. The subquery
oe $cmp$ (SELECT ie FROM ... WHERE subq_where ... HAVING subq_having)
First, try to convert the subquery to scalar-result subquery in one of
the forms:
- oe $cmp$ (SELECT MAX(...) ) // handled by Item_singlerow_subselect
- oe $cmp$ \<max\>(SELECT ...) // handled by Item_maxmin_subselect
If that fails, the subquery will be handled with class Item_in_optimizer.
There are two possibilities:
- If the subquery execution method is materialization, then the subquery is
not transformed any further.
- Otherwise the IN predicates is transformed into EXISTS by injecting
equi-join predicates and possibly other helper predicates. For details
see method single_value_in_like_transformer().
@param thd Thread handle
@param select Query block of the subquery
@param func Subquery comparison creator
@retval RES_OK Either subquery was transformed, or appropriate
predicates where injected into it.
@retval RES_REDUCE The subquery was reduced to non-subquery
@retval RES_ERROR Error
*/
Item_subselect::trans_res Item_in_subselect::single_value_transformer(
THD *thd, Query_block *select, Comp_creator *func) {
bool subquery_maybe_null = false;
DBUG_TRACE;
/*
Check that the right part of the subselect contains no more than one
column. E.g. in SELECT 1 IN (SELECT * ..) the right part is (SELECT * ...)
*/
// psergey: duplicated_subselect_card_check
if (select->num_visible_fields() > 1) {
my_error(ER_OPERAND_COLUMNS, MYF(0), 1);
return RES_ERROR;
}
/*
Check the nullability of the subquery. The subquery should return
only one column, so we check the nullability of the first item in
Query_block::fields_list. In case the subquery is a union, check the
nullability of the first item of each query block belonging to the
union.
*/
for (Query_block *sel = unit->first_query_block(); sel != nullptr;
sel = sel->next_query_block()) {
Item *only_item = sel->single_visible_field();
if (only_item == nullptr) {
// There was more than one after all.
my_error(ER_OPERAND_COLUMNS, MYF(0), 1);
return RES_ERROR;
}
if ((subquery_maybe_null = only_item->is_nullable())) break;
}
/*
If this is an ALL/ANY single-value subquery predicate, try to rewrite
it with a MIN/MAX subquery.
E.g. SELECT * FROM t1 WHERE b > ANY (SELECT a FROM t2) can be rewritten
with SELECT * FROM t1 WHERE b > (SELECT MIN(a) FROM t2).
A predicate may be transformed to use a MIN/MAX subquery if it:
1. has a greater than/less than comparison operator, and
2. is not correlated with the outer query, and
3. UNKNOWN results are treated as FALSE, by this item or the outer item,
or can never be generated.
*/
if (!func->eqne_op() && // 1
!unit->uncacheable && // 2
(abort_on_null || (upper_item && upper_item->ignore_unknown()) || // 3
(!left_expr->is_nullable() && !subquery_maybe_null))) {
if (substitution) {
// It is second (third, ...) SELECT of UNION => All is done
return RES_OK;
}
Item *subs;
if (!select->group_list.elements && !select->having_cond() &&
// MIN/MAX(agg_or_window_func) would not be valid
!select->with_sum_func && select->m_windows.elements == 0 &&
!(select->next_query_block()) && select->table_list.elements &&
// For ALL: MIN ignores NULL: 3<=ALL(4 and NULL) is UNKNOWN, while
// NOT(3>(SELECT MIN(4 and NULL)) is TRUE
!(substype() == ALL_SUBS && subquery_maybe_null)) {
OPT_TRACE_TRANSFORM(&thd->opt_trace, oto0, oto1, select->select_number,
"> ALL/ANY (SELECT)", "SELECT(MIN)");
oto1.add("chosen", true);
thd->lex->m_subquery_to_derived_is_impossible = true;
Item_sum_hybrid *item;
nesting_map save_allow_sum_func;
if (func->l_op()) {
/*
(ALL && (> || =>)) || (ANY && (< || =<))
for ALL condition is inverted
*/
item = new Item_sum_max(select->base_ref_items[0]);
} else {
/*
(ALL && (< || =<)) || (ANY && (> || =>))
for ALL condition is inverted
*/
item = new Item_sum_min(select->base_ref_items[0]);
}
if (upper_item) upper_item->set_sum_test(item);
select->base_ref_items[0] = item;
// Find the correct position in the field list, and overwrite it with the
// item.
for (auto it = select->visible_fields().begin();
it != select->visible_fields().end(); ++it) {
*it = item;
break;
}
DBUG_EXECUTE("where", print_where(thd, item, "rewrite with MIN/MAX",
QT_ORDINARY););
save_allow_sum_func = thd->lex->allow_sum_func;
thd->lex->allow_sum_func |= (nesting_map)1 << select->nest_level;
/*
Item_sum_(max|min) can't substitute other item => we can use 0 as
reference, also Item_sum_(max|min) can't be fixed after creation, so
we do not check item->fixed
*/
if (item->fix_fields(thd, nullptr)) return RES_ERROR;
thd->lex->allow_sum_func = save_allow_sum_func;
subs = new Item_singlerow_subselect(select);
} else {
OPT_TRACE_TRANSFORM(&thd->opt_trace, oto0, oto1, select->select_number,
"> ALL/ANY (SELECT)", "MIN (SELECT)");
oto1.add("chosen", true);
Item_maxmin_subselect *item;
subs = item = new Item_maxmin_subselect(this, select, func->l_op(),
substype() == ANY_SUBS);
if (upper_item) upper_item->set_sub_test(item);
}
if (upper_item) upper_item->set_subselect(this);
substitution = func->create(left_expr, subs);
return RES_OK;
}
if (!substitution) {
/* We're invoked for the 1st (or the only) SELECT in the subquery UNION */
substitution = optimizer;
thd->lex->set_current_query_block(select->outer_query_block());
// optimizer never use Item **ref => we can pass 0 as parameter
if (!optimizer || optimizer->fix_left(thd, nullptr)) {
thd->lex->set_current_query_block(select); /* purecov: inspected */
return RES_ERROR; /* purecov: inspected */
}
thd->lex->set_current_query_block(select);
/*
As far as Item_ref_in_optimizer do not substitute itself on fix_fields
we can use same item for all selects.
*/
Item_ref *const left = new Item_ref(
&select->context, (Item **)optimizer->get_cache(), in_left_expr_name);
if (left == nullptr) return RES_ERROR;
if (mark_as_outer(left_expr, 0))
left->depended_from = select->outer_query_block();
m_injected_left_expr = left;
assert(in2exists_info == nullptr);
in2exists_info = new (thd->mem_root) In2exists_info;
in2exists_info->dependent_before =
unit->uncacheable & UNCACHEABLE_DEPENDENT;
if (!left_expr->const_item()) unit->uncacheable |= UNCACHEABLE_DEPENDENT;
in2exists_info->dependent_after = unit->uncacheable & UNCACHEABLE_DEPENDENT;
}
if (!abort_on_null && left_expr->is_nullable() && !pushed_cond_guards) {
if (!(pushed_cond_guards = (bool *)thd->alloc(sizeof(bool))))
return RES_ERROR;
pushed_cond_guards[0] = true;
}
/* Perform the IN=>EXISTS transformation. */
const trans_res retval =
single_value_in_to_exists_transformer(thd, select, func);
return retval;
}