2022-09-18 mysql列存储引擎-subselect相关执行流程记录-天翼云

逻辑架构:

时序图:

2022-09-18 mysql列存储引擎-subselect相关执行流程记录

核心流程:

调用堆栈:

(gdb) bt
#0  Tianmu::core::Query::Preexecute (this=0x7f04a93bd620, qu=..., sender=0x7f02b0ba8ce0, display_now=true)
    at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/storage/tianmu/core/query.cpp:561
#1  0x00000000029d4f6e in Tianmu::core::Engine::Execute (this=0x543cfd0, thd=0x7f02b0011fc0, lex=0x7f02b0b03740, result_output=0x7f02b0b04ff8, unit_for_union=0x0)
    at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/storage/tianmu/core/engine_execute.cpp:421
#2  0x00000000029d415e in Tianmu::core::Engine::HandleSelect (this=0x543cfd0, thd=0x7f02b0011fc0, lex=0x7f02b0b03740, result=@0x7f04a93bdb38: 0x7f02b0b04ff8, setup_tables_done_option=0, 
    res=@0x7f04a93bdb34: 0, optimize_after_tianmu=@0x7f04a93bdb2c: 1, tianmu_free_join=@0x7f04a93bdb30: 1, with_insert=0)
    at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/storage/tianmu/core/engine_execute.cpp:232
#3  0x0000000002ab8e2e in Tianmu::dbhandler::TIANMU_HandleSelect (thd=0x7f02b0011fc0, lex=0x7f02b0b03740, result=@0x7f04a93bdb38: 0x7f02b0b04ff8, setup_tables_done_option=0, 
    res=@0x7f04a93bdb34: 0, optimize_after_tianmu=@0x7f04a93bdb2c: 1, tianmu_free_join=@0x7f04a93bdb30: 1, with_insert=0)
    at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/storage/tianmu/handler/ha_rcengine.cpp:82
#4  0x00000000022feff7 in execute_sqlcom_select (thd=0x7f02b0011fc0, all_tables=0x7f02b0b05580) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_parse.cc:5182
#5  0x00000000022f8e1f in mysql_execute_command (thd=0x7f02b0011fc0, first_level=false) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_parse.cc:2831
#6  0x00000000022643b8 in sp_instr_stmt::exec_core (this=0x7f02b0b05e98, thd=0x7f02b0011fc0, nextp=0x7f04a93bf228)
    at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sp_instr.cc:1027
#7  0x000000000226325b in sp_lex_instr::reset_lex_and_exec_core (this=0x7f02b0b05e98, thd=0x7f02b0011fc0, nextp=0x7f04a93bf228, open_tables=false)
    at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sp_instr.cc:452
#8  0x0000000002263c42 in sp_lex_instr::validate_lex_and_execute_core (this=0x7f02b0b05e98, thd=0x7f02b0011fc0, nextp=0x7f04a93bf228, open_tables=false)
    at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sp_instr.cc:753
#9  0x0000000002264110 in sp_instr_stmt::execute (this=0x7f02b0b05e98, thd=0x7f02b0011fc0, nextp=0x7f04a93bf228)
    at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sp_instr.cc:938
#10 0x000000000225cdd5 in sp_head::execute (this=0x7f02b0006c30, thd=0x7f02b0011fc0, merge_da_on_success=true) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sp_head.cc:796
#11 0x000000000225dd97 in sp_head::execute_function (this=0x7f02b0006c30, thd=0x7f02b0011fc0, argp=0x7f02bp0aa21c0, argcount=1, return_value_fld=0x7f02b0aa6930)
    at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sp_head.cc:1287
#12 0x0000000001d75ed9 in Item_func_sp::execute_impl (this=0x7f02b0aa2118, thd=0x7f02b0011fc0) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/item_func.cc:8706
#13 0x0000000001d75c2c in Item_func_sp::execute (this=0x7f02b0aa2118) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/item_func.cc:8637
#14 0x0000000001d5c926 in Item_func_sp::val_int (this=0x7f02b0aa2118) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/item_func.h:2741
#15 0x0000000001d074ba in Item::send (this=0x7f02b0aa2118, protocol=0x7f02b0013020, buffer=0x7f04a93c0620) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/item.cc:7570
#16 0x00000000022ab95f in THD::send_result_set_row (this=0x7f02b0011fc0, row_items=0x7f02b0aa0ab8) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_class.cc:4757
#17 0x00000000022a69ca in Query_result_send::send_data (this=0x7f02b0a8fd78, items=...) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_class.cc:2742
#18 0x00000000022bf060 in end_send (join=0x7f02b0ba5510, qep_tab=0x7f02b0bac1b8, end_of_records=false) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_executor.cc:2936
#19 0x00000000022bc6d8 in evaluate_join_record (join=0x7f02b0ba5510, qep_tab=0x7f02b0bac040) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_executor.cc:1652
#20 0x00000000022bcbd4 in evaluate_null_complemented_join_record (join=0x7f02b0ba5510, qep_tab=0x7f02b0bac040)
    at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_executor.cc:1792
#21 0x00000000022bbea6 in sub_select (join=0x7f02b0ba5510, qep_tab=0x7f02b0bac040, end_of_records=false) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_executor.cc:1311
#22 0x00000000022bc6d8 in evaluate_join_record (join=0x7f02b0ba5510, qep_tab=0x7f02b0babec8) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_executor.cc:1652
#23 0x00000000022bbe54 in sub_select (join=0x7f02b0ba5510, qep_tab=0x7f02b0babec8, end_of_records=false) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_executor.cc:1304
#24 0x00000000022bb8ef in do_select (join=0x7f02b0ba5510) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_executor.cc:957
#25 0x00000000022b9baf in JOIN::exec (this=0x7f02b0ba5510) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_executor.cc:206
#26 0x000000000233e137 in handle_query (thd=0x7f02b0011fc0, lex=0x7f02b0014138, result=0x7f02b0a8fd78, added_options=0, removed_options=0, optimize_after_bh=2, free_join_from_bh=1)
    at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_select.cc:195
#27 0x00000000022ff031 in execute_sqlcom_select (thd=0x7f02b0011fc0, all_tables=0x7f02b0a8eb28) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_parse.cc:5184
#28 0x00000000022f8e1f in mysql_execute_command (thd=0x7f02b0011fc0, first_level=true) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_parse.cc:2831
#29 0x00000000022ffcfe in mysql_parse (thd=0x7f02b0011fc0, parser_state=0x7f04a93c1f00) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_parse.cc:5621
#30 0x00000000022f6148 in dispatch_command (thd=0x7f02b0011fc0, com_data=0x7f04a93c2660, command=COM_QUERY) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_parse.cc:1495
---Type <return> to continue, or q <return> to quit---
#31 0x00000000022f52eb in do_command (thd=0x7f02b0011fc0) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_parse.cc:1034
#32 0x00000000023f922b in handle_connection (arg=0x7730860) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/conn_handler/connection_handler_per_thread.cc:313
#33 0x0000000002913af1 in pfs_spawn_thread (arg=0x7740b10) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/storage/perfschema/pfs.cc:2197
#34 0x00007f04f4395ea5 in start_thread () from /lib64/libpthread.so.0
#35 0x00007f04f18b9b0d in clone () from /lib64/libc.so.6

(gdb) bt
#0  end_send (join=0x7f1fd40e89a0, qep_tab=0x7f1fd40b0d38, end_of_records=false) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_executor.cc:2936
#1  0x00000000022bce08 in evaluate_join_record (join=0x7f1fd40e89a0, qep_tab=0x7f1fd40b0bc0) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_executor.cc:1652
#2  0x00000000022bd304 in evaluate_null_complemented_join_record (join=0x7f1fd40e89a0, qep_tab=0x7f1fd40b0bc0)
    at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_executor.cc:1792
#3  0x00000000022bc5d6 in sub_select (join=0x7f1fd40e89a0, qep_tab=0x7f1fd40b0bc0, end_of_records=false) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_executor.cc:1311
#4  0x00000000022bce08 in evaluate_join_record (join=0x7f1fd40e89a0, qep_tab=0x7f1fd40b0a48) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_executor.cc:1652
#5  0x00000000022bc584 in sub_select (join=0x7f1fd40e89a0, qep_tab=0x7f1fd40b0a48, end_of_records=false) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_executor.cc:1304
#6  0x00000000022bc01f in do_select (join=0x7f1fd40e89a0) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_executor.cc:957
#7  0x00000000022ba2df in JOIN::exec (this=0x7f1fd40e89a0) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_executor.cc:206
#8  0x000000000233e867 in handle_query (thd=0x7f1fd4011f40, lex=0x7f1fd40140b8, result=0x7f1fd40d6780, added_options=0, removed_options=0, optimize_after_bh=2, free_join_from_bh=1)
    at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_select.cc:195
#9  0x00000000022ff761 in execute_sqlcom_select (thd=0x7f1fd4011f40, all_tables=0x7f1fd40d5538) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_parse.cc:5184
#10 0x00000000022f954f in mysql_execute_command (thd=0x7f1fd4011f40, first_level=true) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_parse.cc:2831
#11 0x000000000230042e in mysql_parse (thd=0x7f1fd4011f40, parser_state=0x7f21d4f44f00) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_parse.cc:5621
#12 0x00000000022f6878 in dispatch_command (thd=0x7f1fd4011f40, com_data=0x7f21d4f45660, command=COM_QUERY) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_parse.cc:1495
#13 0x00000000022f5a1b in do_command (thd=0x7f1fd4011f40) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/sql_parse.cc:1034
#14 0x00000000023f995b in handle_connection (arg=0x72d00a0) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/sql/conn_handler/connection_handler_per_thread.cc:313
#15 0x0000000002915591 in pfs_spawn_thread (arg=0x7b9acb0) at /home/jenkins/workspace/stonedb5.7-zsl-centos7.9-75-128/storage/perfschema/pfs.cc:2197
#16 0x00007f221ff18ea5 in start_thread () from /lib64/libpthread.so.0
#17 0x00007f221d43cb0d in clone () from /lib64/libc.so.6

核心函数:

sub_select

/**
  Retrieve records ends with a given beginning from the result of a join.

  SYNPOSIS
    sub_select()
    join      pointer to the structure providing all context info for the query
    join_tab  the first next table of the execution plan to be retrieved
    end_records  true when we need to perform final steps of retrival   

  DESCRIPTION
    For a given partial join record consisting of records from the tables 
    preceding the table join_tab in the execution plan, the function
    retrieves all matching full records from the result set and
    send them to the result set stream. 

  @note
    The function effectively implements the  final (n-k) nested loops
    of nested loops join algorithm, where k is the ordinal number of
    the join_tab table and n is the total number of tables in the join query.
    It performs nested loops joins with all conjunctive predicates from
    the where condition pushed as low to the tables as possible.
    E.g. for the query
    @code
      SELECT * FROM t1,t2,t3
      WHERE t1.a=t2.a AND t2.b=t3.b AND t1.a BETWEEN 5 AND 9
    @endcode
    the predicate (t1.a BETWEEN 5 AND 9) will be pushed to table t1,
    given the selected plan prescribes to nest retrievals of the
    joined tables in the following order: t1,t2,t3.
    A pushed down predicate are attached to the table which it pushed to,
    at the field join_tab->cond.
    When executing a nested loop of level k the function runs through
    the rows of 'join_tab' and for each row checks the pushed condition
    attached to the table.
    If it is false the function moves to the next row of the
    table. If the condition is true the function recursively executes (n-k-1)
    remaining embedded nested loops.
    The situation becomes more complicated if outer joins are involved in
    the execution plan. In this case the pushed down predicates can be
    checked only at certain conditions.
    Suppose for the query
    @code
      SELECT * FROM t1 LEFT JOIN (t2,t3) ON t3.a=t1.a
      WHERE t1>2 AND (t2.b>5 OR t2.b IS NULL)
    @endcode
    the optimizer has chosen a plan with the table order t1,t2,t3.
    The predicate P1=t1>2 will be pushed down to the table t1, while the
    predicate P2=(t2.b>5 OR t2.b IS NULL) will be attached to the table
    t2. But the second predicate can not be unconditionally tested right
    after a row from t2 has been read. This can be done only after the
    first row with t3.a=t1.a has been encountered.
    Thus, the second predicate P2 is supplied with a guarded value that are
    stored in the field 'found' of the first inner table for the outer join
    (table t2). When the first row with t3.a=t1.a for the  current row 
    of table t1  appears, the value becomes true. For now on the predicate
    is evaluated immediately after the row of table t2 has been read.
    When the first row with t3.a=t1.a has been encountered all
    conditions attached to the inner tables t2,t3 must be evaluated.
    Only when all of them are true the row is sent to the output stream.
    If not, the function returns to the lowest nest level that has a false
    attached condition.
    The predicates from on expressions are also pushed down. If in the 
    the above example the on expression were (t3.a=t1.a AND t2.a=t1.a),
    then t1.a=t2.a would be pushed down to table t2, and without any
    guard.
    If after the run through all rows of table t2, the first inner table
    for the outer join operation, it turns out that no matches are
    found for the current row of t1, then current row from table t1
    is complemented by nulls  for t2 and t3. Then the pushed down predicates
    are checked for the composed row almost in the same way as it had
    been done for the first row with a match. The only difference is
    the predicates from on expressions are not checked. 

  @par
  @b IMPLEMENTATION
  @par
    The function forms output rows for a current partial join of k
    tables tables recursively.
    For each partial join record ending with a certain row from
    join_tab it calls sub_select that builds all possible matching
    tails from the result set.
    To be able  check predicates conditionally items of the class
    Item_func_trig_cond are employed.
    An object of  this class is constructed from an item of class COND
    and a pointer to a guarding boolean variable.
    When the value of the guard variable is true the value of the object
    is the same as the value of the predicate, otherwise it's just returns
    true. 
    To carry out a return to a nested loop level of join table t the pointer 
    to t is remembered in the field 'return_tab' of the join structure.
    Consider the following query:
    @code
        SELECT * FROM t1,
                      LEFT JOIN
                      (t2, t3 LEFT JOIN (t4,t5) ON t5.a=t3.a)
                      ON t4.a=t2.a
           WHERE (t2.b=5 OR t2.b IS NULL) AND (t4.b=2 OR t4.b IS NULL)
    @endcode
    Suppose the chosen execution plan dictates the order t1,t2,t3,t4,t5
    and suppose for a given joined rows from tables t1,t2,t3 there are
    no rows in the result set yet.
    When first row from t5 that satisfies the on condition
    t5.a=t3.a is found, the pushed down predicate t4.b=2 OR t4.b IS NULL
    becomes 'activated', as well the predicate t4.a=t2.a. But
    the predicate (t2.b=5 OR t2.b IS NULL) can not be checked until
    t4.a=t2.a becomes true. 
    In order not to re-evaluate the predicates that were already evaluated
    as attached pushed down predicates, a pointer to the the first
    most inner unmatched table is maintained in join_tab->first_unmatched.
    Thus, when the first row from t5 with t5.a=t3.a is found
    this pointer for t5 is changed from t4 to t2.             

    @par
    @b STRUCTURE @b NOTES
    @par
    join_tab->first_unmatched points always backwards to the first inner
    table of the embedding nested join, if any.

  @param join      pointer to the structure providing all context info for
                   the query
  @param join_tab  the first next table of the execution plan to be retrieved
  @param end_records  true when we need to perform final steps of retrival   

  @return
    return one of enum_nested_loop_state, except NESTED_LOOP_NO_MORE_ROWS.
*/

enum_nested_loop_state
sub_select(JOIN *join, QEP_TAB *const qep_tab,bool end_of_records)
{
  DBUG_ENTER("sub_select");

  qep_tab->table()->reset_null_row();

  if (end_of_records)
  {
    enum_nested_loop_state nls=
      (*qep_tab->next_select)(join,qep_tab+1,end_of_records);
    DBUG_RETURN(nls);
  }
  READ_RECORD *info= &qep_tab->read_record;

  if (qep_tab->prepare_scan())
    DBUG_RETURN(NESTED_LOOP_ERROR);

  if (qep_tab->starts_weedout())
  {
    do_sj_reset(qep_tab->flush_weedout_table);
  }

  const plan_idx qep_tab_idx= qep_tab->idx();
  join->return_tab= qep_tab_idx;
  qep_tab->not_null_compl= true;
  qep_tab->found_match= false;

  if (qep_tab->last_inner() != NO_PLAN_IDX)
  {
    /* qep_tab is the first inner table for an outer join operation. */

    /* Set initial state of guard variables for this table.*/
    qep_tab->found= false;

    /* Set first_unmatched for the last inner table of this group */
    QEP_AT(qep_tab, last_inner()).first_unmatched= qep_tab_idx;
  }
  if (qep_tab->do_firstmatch() || qep_tab->do_loosescan())
  {
    /*
      qep_tab is the first table of a LooseScan range, or has a "jump"
      address in a FirstMatch range.
      Reset the matching for this round of execution.
    */
    QEP_AT(qep_tab, match_tab).found_match= false;
  }

  join->thd->get_stmt_da()->reset_current_row_for_condition();

  enum_nested_loop_state rc= NESTED_LOOP_OK;
  bool in_first_read= true;
  const bool pfs_batch_update= qep_tab->pfs_batch_update(join);
  if (pfs_batch_update)
    qep_tab->table()->file->start_psi_batch_mode();
  while (rc == NESTED_LOOP_OK && join->return_tab >= qep_tab_idx)
  {
    int error;
    if (in_first_read)
    {
      in_first_read= false;
      error= (*qep_tab->read_first_record)(qep_tab);
    }
    else
      error= info->read_record(info);

    DBUG_EXECUTE_IF("bug13822652_1", join->thd->killed= THD::KILL_QUERY;);

    if (error > 0 || (join->thd->is_error()))   // Fatal error
      rc= NESTED_LOOP_ERROR;
    else if (error < 0)
      break;
    else if (join->thd->killed)      // Aborted by user
    {
      join->thd->send_kill_message();
      rc= NESTED_LOOP_KILLED;
    }
    else
    {
      if (qep_tab->keep_current_rowid)
        qep_tab->table()->file->position(qep_tab->table()->record[0]);
      rc= evaluate_join_record(join, qep_tab);
    }
  }

  if (rc == NESTED_LOOP_OK &&
      qep_tab->last_inner() != NO_PLAN_IDX &&
      !qep_tab->found)
    rc= evaluate_null_complemented_join_record(join, qep_tab);

  if (pfs_batch_update)
    qep_tab->table()->file->end_psi_batch_mode();

  DBUG_RETURN(rc);
}

evaluate_null_complemented_join_record

/**

  @details
    Construct a NULL complimented partial join record and feed it to the next
    level of the nested loop. This function is used in case we have
    an OUTER join and no matching record was found.
*/

static enum_nested_loop_state
evaluate_null_complemented_join_record(JOIN *join, QEP_TAB *qep_tab)
{
  /*
    The table join_tab is the first inner table of a outer join operation
    and no matches has been found for the current outer row.
  */
  QEP_TAB *first_inner_tab= qep_tab;
  QEP_TAB *last_inner_tab= &QEP_AT(qep_tab, last_inner());

  DBUG_ENTER("evaluate_null_complemented_join_record");

  for ( ; qep_tab <= last_inner_tab ; qep_tab++)
  {
    // Make sure that the rowid buffer is bound, duplicates weedout needs it
    if (qep_tab->copy_current_rowid &&
        !qep_tab->copy_current_rowid->buffer_is_bound())
      qep_tab->copy_current_rowid->bind_buffer(qep_tab->table()->file->ref);

    /* Change the the values of guard predicate variables. */
    qep_tab->found= true;
    qep_tab->not_null_compl= false;
    /* The outer row is complemented by nulls for each inner tables */
    restore_record(qep_tab->table(),s->default_values);  // Make empty record
    qep_tab->table()->set_null_row();       // For group by without error
    if (qep_tab->starts_weedout() && qep_tab > first_inner_tab)
    {
      // sub_select() has not performed a reset for this table.
      do_sj_reset(qep_tab->flush_weedout_table);
    }
    /* Check all attached conditions for inner table rows. */
    if (qep_tab->condition() && !qep_tab->condition()->val_int())
    {
      if (join->thd->killed)
      {
        join->thd->send_kill_message();
        DBUG_RETURN(NESTED_LOOP_KILLED);
      }

      /* check for errors */
      if (join->thd->is_error())
        DBUG_RETURN(NESTED_LOOP_ERROR);
      else
        DBUG_RETURN(NESTED_LOOP_OK);
    }
  }
  qep_tab= last_inner_tab;
  /*
    From the point of view of the rest of execution, this record matches
    (it has been built and satisfies conditions, no need to do more evaluation
    on it). See similar code in evaluate_join_record().
  */
  plan_idx f_u= QEP_AT(qep_tab, first_unmatched).first_upper();
  if (f_u != NO_PLAN_IDX &&
      join->qep_tab[f_u].last_inner() != qep_tab->idx())
    f_u= NO_PLAN_IDX;
  qep_tab->first_unmatched= f_u;
  /*
    The row complemented by nulls satisfies all conditions
    attached to inner tables.
    Finish evaluation of record and send it to be joined with
    remaining tables.
    Note that evaluate_join_record will re-evaluate the condition attached
    to the last inner table of the current outer join. This is not deemed to
    have a significant performance impact.
  */
  const enum_nested_loop_state rc= evaluate_join_record(join, qep_tab);

  for (QEP_TAB *tab= first_inner_tab; tab <= last_inner_tab; tab++)
    tab->table()->reset_null_row();

  DBUG_RETURN(rc);
}

evaluate_join_record

/**
  @brief Process one row of the nested loop join.

  This function will evaluate parts of WHERE/ON clauses that are
  applicable to the partial row on hand and in case of success
  submit this row to the next level of the nested loop.
  join_tab->return_tab may be modified to cause a return to a previous
  join_tab.

  @param  join     - The join object
  @param  join_tab - The most inner join_tab being processed

  @return Nested loop state
*/

static enum_nested_loop_state
evaluate_join_record(JOIN *join, QEP_TAB *const qep_tab)
{
  bool not_used_in_distinct= qep_tab->not_used_in_distinct;
  ha_rows found_records=join->found_records;
  Item *condition= qep_tab->condition();
  const plan_idx qep_tab_idx= qep_tab->idx();
  bool found= TRUE;
  DBUG_ENTER("evaluate_join_record");
  DBUG_PRINT("enter",
             ("join: %p join_tab index: %d table: %s cond: %p",
              join, static_cast<int>(qep_tab_idx),
              qep_tab->table()->alias, condition));

  if (condition)
  {
    found= MY_TEST(condition->val_int());

    if (join->thd->killed)
    {
      join->thd->send_kill_message();
      DBUG_RETURN(NESTED_LOOP_KILLED);
    }

    /* check for errors evaluating the condition */
    if (join->thd->is_error())
      DBUG_RETURN(NESTED_LOOP_ERROR);
  }
  if (found)
  {
    /*
      There is no condition on this join_tab or the attached pushed down
      condition is true => a match is found.
    */
    while (qep_tab->first_unmatched != NO_PLAN_IDX && found)
    {
      /*
        The while condition is always false if join_tab is not
        the last inner join table of an outer join operation.
      */
      QEP_TAB *first_unmatched= &QEP_AT(qep_tab, first_unmatched);
      /*
        Mark that a match for the current row of the outer table is found.
        This activates WHERE clause predicates attached the inner tables of
        the outer join.
      */
      first_unmatched->found= true;
      for (QEP_TAB *tab= first_unmatched; tab <= qep_tab; tab++)
      {
        /*
          Check all predicates that have just been activated.

          Actually all predicates non-guarded by first_unmatched->found
          will be re-evaluated again. It could be fixed, but, probably,
          it's not worth doing now.

          not_exists_optimize has been created from a
          condition containing 'is_null'. This 'is_null'
          predicate is still present on any 'tab' with
          'not_exists_optimize'. Furthermore, the usual rules
          for condition guards also applies for
          'not_exists_optimize' -> When 'is_null==false' we
          know all cond. guards are open and we can apply
          the 'not_exists_optimize'.
        */
        assert(!(tab->table()->reginfo.not_exists_optimize &&
                 !tab->condition()));

        if (tab->condition() && !tab->condition()->val_int())
        {
          /* The condition attached to table tab is false */

          if (tab->table()->reginfo.not_exists_optimize)
          {
            /*
              When not_exists_optimizer is set and a matching row is found, the
              outer row should be excluded from the result set: no need to
              explore this record, thus we don't call the next_select.
              And, no need to explore other following records of 'tab', so we
              set join->return_tab.
              As we set join_tab->found above, evaluate_join_record() at the
              upper level will not yield a NULL-complemented record.
              Note that the calculation below can set return_tab to -1
              i.e. PRE_FIRST_PLAN_IDX.
            */
            join->return_tab= qep_tab_idx - 1;
            DBUG_RETURN(NESTED_LOOP_OK);
          }

          if (tab == qep_tab)
            found= 0;
          else
          {
            /*
              Set a return point if rejected predicate is attached
              not to the last table of the current nest level.
            */
            join->return_tab= tab->idx();
            DBUG_RETURN(NESTED_LOOP_OK);
          }
        }
        /* check for errors evaluating the condition */
        if (join->thd->is_error())
          DBUG_RETURN(NESTED_LOOP_ERROR);
      }
      /*
        Check whether join_tab is not the last inner table
        for another embedding outer join.
      */
      plan_idx f_u= first_unmatched->first_upper();
      if (f_u != NO_PLAN_IDX && join->qep_tab[f_u].last_inner() != qep_tab_idx)
        f_u= NO_PLAN_IDX;
      qep_tab->first_unmatched= f_u;
    }

    plan_idx return_tab= join->return_tab;

    if (qep_tab->finishes_weedout() && found)
    {
      int res= do_sj_dups_weedout(join->thd, qep_tab->check_weed_out_table);
      if (res == -1)
        DBUG_RETURN(NESTED_LOOP_ERROR);
      else if (res == 1)
        found= FALSE;
    }
    else if (qep_tab->do_loosescan() &&
             QEP_AT(qep_tab, match_tab).found_match)
    { 
      /*
         Loosescan algorithm requires an access method that gives 'sorted'
         retrieval of keys, or an access method that provides only one
         row (which is inherently sorted).
         EQ_REF and LooseScan may happen if dependencies in subquery (e.g.,
         outer join) prevents table pull-out.
       */  
      assert(qep_tab->use_order() || qep_tab->type() == JT_EQ_REF);

      /* 
         Previous row combination for duplicate-generating range,
         generated a match.  Compare keys of this row and previous row
         to determine if this is a duplicate that should be skipped.
       */
      if (key_cmp(qep_tab->table()->key_info[qep_tab->index()].key_part,
                  qep_tab->loosescan_buf, qep_tab->loosescan_key_len))
        /* 
           Keys do not match.  
           Reset found_match for last table of duplicate-generating range, 
           to avoid comparing keys until a new match has been found.
        */
        QEP_AT(qep_tab, match_tab).found_match= false;
      else
        found= false;
    }

    /*
      It was not just a return to lower loop level when one
      of the newly activated predicates is evaluated as false
      (See above join->return_tab= tab).
    */
    join->examined_rows++;
    DBUG_PRINT("counts", ("evaluate_join_record join->examined_rows++: %lu",
                          (ulong) join->examined_rows));

    if (found)
    {
      enum enum_nested_loop_state rc;
      // A match is found for the current partial join prefix.
      qep_tab->found_match= true;

      rc= (*qep_tab->next_select)(join, qep_tab+1, 0);
      join->thd->get_stmt_da()->inc_current_row_for_condition();
      if (rc != NESTED_LOOP_OK)
        DBUG_RETURN(rc);

      /* check for errors evaluating the condition */
      if (join->thd->is_error())
        DBUG_RETURN(NESTED_LOOP_ERROR);

      if (qep_tab->do_loosescan() &&
          QEP_AT(qep_tab,match_tab).found_match)
      {
        /* 
           A match was found for a duplicate-generating range of a semijoin. 
           Copy key to be able to determine whether subsequent rows
           will give duplicates that should be skipped.
        */
        KEY *key= qep_tab->table()->key_info + qep_tab->index();
        key_copy(qep_tab->loosescan_buf, qep_tab->table()->record[0],
                 key, qep_tab->loosescan_key_len);
      }
      else if (qep_tab->do_firstmatch() &&
               QEP_AT(qep_tab, match_tab).found_match)
      {
        /* 
          We should return to join_tab->firstmatch_return after we have 
          enumerated all the suffixes for current prefix row combination
        */
        set_if_smaller(return_tab, qep_tab->firstmatch_return);
      }

      /*
        Test if this was a SELECT DISTINCT query on a table that
        was not in the field list;  In this case we can abort if
        we found a row, as no new rows can be added to the result.
      */
      if (not_used_in_distinct && found_records != join->found_records)
        set_if_smaller(return_tab, qep_tab_idx - 1);

      set_if_smaller(join->return_tab, return_tab);
    }
    else
    {
      join->thd->get_stmt_da()->inc_current_row_for_condition();
      if (qep_tab->not_null_compl)
      {
        /* a NULL-complemented row is not in a table so cannot be locked */
        qep_tab->read_record.unlock_row(qep_tab);
      }
    }
  }
  else
  {
    /*
      The condition pushed down to the table join_tab rejects all rows
      with the beginning coinciding with the current partial join.
    */
    join->examined_rows++;
    join->thd->get_stmt_da()->inc_current_row_for_condition();
    if (qep_tab->not_null_compl)
      qep_tab->read_record.unlock_row(qep_tab);
  }
  DBUG_RETURN(NESTED_LOOP_OK);
}

核心数据结构:

TABLE

struct TABLE
{
  TABLE() { memset(this, 0, sizeof(*this)); }
  /*
    Since TABLE instances are often cleared using memset(), do not
    add virtual members and do not inherit from TABLE.
    Otherwise memset() will start overwriting the vtable pointer.
  */

  TABLE_SHARE  *s;
  handler  *file;
  TABLE *next, *prev;

private:
  /**
     Links for the lists of used/unused TABLE objects for the particular
     table in the specific instance of Table_cache (in other words for
     specific Table_cache_element object).
     Declared as private to avoid direct manipulation with those objects.
     One should use methods of I_P_List template instead.
  */
  TABLE *cache_next, **cache_prev;

  /*
    Give Table_cache_element access to the above two members to allow
    using them for linking TABLE objects in a list.
  */
  friend class Table_cache_element;

public:

  THD  *in_use;                        /* Which thread uses this */
  Field **field;      /* Pointer to fields */
  /// Count of hidden fields, if internal temporary table; 0 otherwise.
  uint hidden_field_count;

  uchar *record[2];      /* Pointer to records */
  uchar *write_row_record;    /* Used as optimisation in
             THD::write_row */
  uchar *insert_values;                  /* used by INSERT ... UPDATE */
  /* 
    Map of keys that can be used to retrieve all data from this table 
    needed by the query without reading the row.
  */
  key_map covering_keys;
  key_map quick_keys, merge_keys;
  
  /*
    possible_quick_keys is a superset of quick_keys to use with EXPLAIN of
    JOIN-less commands (single-table UPDATE and DELETE).
    
    When explaining regular JOINs, we use JOIN_TAB::keys to output the 
    "possible_keys" column value. However, it is not available for
    single-table UPDATE and DELETE commands, since they don't use JOIN
    optimizer at the top level. OTOH they directly use the range optimizer,
    that collects all keys usable for range access here.
  */
  key_map possible_quick_keys;

  /*
    A set of keys that can be used in the query that references this
    table.

    All indexes disabled on the table's TABLE_SHARE (see TABLE::s) will be 
    subtracted from this set upon instantiation. Thus for any TABLE t it holds
    that t.keys_in_use_for_query is a subset of t.s.keys_in_use. Generally we 
    must not introduce any new keys here (see setup_tables).

    The set is implemented as a bitmap.
  */
  key_map keys_in_use_for_query;
  /* Map of keys that can be used to calculate GROUP BY without sorting */
  key_map keys_in_use_for_group_by;
  /* Map of keys that can be used to calculate ORDER BY without sorting */
  key_map keys_in_use_for_order_by;
  KEY  *key_info;      /* data of keys defined for the table */

  Field *next_number_field;    /* Set if next_number is activated */
  Field *found_next_number_field;  /* Set on open */
  Field **vfield;                       /* Pointer to generated fields*/
  Field *hash_field;                    /* Field used by unique constraint */
  Field *fts_doc_id_field;              /* Set if FTS_DOC_ID field is present */

  /* Table's triggers, 0 if there are no of them */
  Table_trigger_dispatcher *triggers;
  TABLE_LIST *pos_in_table_list;/* Element referring to this table */
  /* Position in thd->locked_table_list under LOCK TABLES */
  TABLE_LIST *pos_in_locked_tables;
  ORDER    *group;
  const char  *alias;                /* alias or table name */
  uchar    *null_flags;
  my_bitmap_map  *bitmap_init_value;
  MY_BITMAP     def_read_set, def_write_set, tmp_set; /* containers */
  /*
    Bitmap of fields that one or more query condition refers to. Only
    used if optimizer_condition_fanout_filter is turned 'on'.
    Currently, only the WHERE clause and ON clause of inner joins is
    taken into account but not ON conditions of outer joins.
    Furthermore, HAVING conditions apply to groups and are therefore
    not useful as table condition filters.
  */
  MY_BITMAP     cond_set;

  /**
    Bitmap of table fields (columns), which are explicitly set in the
    INSERT INTO statement. It is declared here to avoid memory allocation
    on MEM_ROOT).

    @sa fields_set_during_insert.
  */
  MY_BITMAP     def_fields_set_during_insert;

  MY_BITMAP     *read_set, *write_set;          /* Active column sets */

  /**
    A pointer to the bitmap of table fields (columns), which are explicitly set
    in the INSERT INTO statement.

    fields_set_during_insert points to def_fields_set_during_insert
    for base (non-temporary) tables. In other cases, it is NULL.
    Triggers can not be defined for temporary tables, so this bitmap does not
    matter for temporary tables.

    @sa def_fields_set_during_insert.
  */
  MY_BITMAP     *fields_set_during_insert;
  uint maybe_null;
  /*
   The ID of the query that opened and is using this table. Has different
   meanings depending on the table type.

   Temporary tables:

   table->query_id is set to thd->query_id for the duration of a statement
   and is reset to 0 once it is closed by the same statement. A non-zero
   table->query_id means that a statement is using the table even if it's
   not the current statement (table is in use by some outer statement).

   Non-temporary tables:

   Under pre-locked or LOCK TABLES mode: query_id is set to thd->query_id
   for the duration of a statement and is reset to 0 once it is closed by
   the same statement. A non-zero query_id is used to control which tables
   in the list of pre-opened and locked tables are actually being used.
  */
  query_id_t  query_id;

  /* 
    For each key that has quick_keys.is_set(key) == TRUE: estimate of #records
    and max #key parts that range access would use.
  */
  ha_rows  quick_rows[MAX_KEY];

  /* Bitmaps of key parts that =const for the entire join. */
  key_part_map  const_key_parts[MAX_KEY];

  uint    quick_key_parts[MAX_KEY];
  uint    quick_n_ranges[MAX_KEY];

  /* 
    Estimate of number of records that satisfy SARGable part of the table
    condition, or table->file->records if no SARGable condition could be
    constructed.
    This value is used by join optimizer as an estimate of number of records
    that will pass the table condition (condition that depends on fields of 
    this table and constants)
  */
  ha_rows       quick_condition_rows;

  uint          lock_position;          /* Position in MYSQL_LOCK.table */
  uint          lock_data_start;        /* Start pos. in MYSQL_LOCK.locks */
  uint          lock_count;             /* Number of locks */
  uint          temp_pool_slot;    /* Used by intern temp tables */
  uint    db_stat;    /* mode of file as in handler.h */
  int    current_lock;           /* Type of lock on table */

private:
  /**
    If true, this table is inner w.r.t. some outer join operation, all columns
    are nullable (in the query), and null_row may be true.
  */
  my_bool nullable;

public:
  /*
    If true, the current table row is considered to have all columns set to 
    NULL, including columns declared as "not null" (see nullable).
    @todo make it private, currently join buffering changes it through a pointer
  */
  my_bool null_row;

  uint8   status;                       /* What's in record[0] */
  my_bool copy_blobs;                   /* copy_blobs when storing */

  /*
    TODO: Each of the following flags take up 8 bits. They can just as easily
    be put into one single unsigned long and instead of taking up 18
    bytes, it would take up 4.
  */
  my_bool force_index;

  /**
    Flag set when the statement contains FORCE INDEX FOR ORDER BY
    See TABLE_LIST::process_index_hints().
  */
  my_bool force_index_order;

  /**
    Flag set when the statement contains FORCE INDEX FOR GROUP BY
    See TABLE_LIST::process_index_hints().
  */
  my_bool force_index_group;
  my_bool distinct;
  my_bool const_table;
  my_bool no_rows;

  /**
     If set, the optimizer has found that row retrieval should access index 
     tree only.
   */
  my_bool key_read;
  /**
     Certain statements which need the full row, set this to ban index-only
     access.
  */
  my_bool no_keyread;
  my_bool locked_by_logger;
  /**
    If set, indicate that the table is not replicated by the server.
  */
  my_bool no_replicate;
  my_bool locked_by_name;
  my_bool fulltext_searched;
  my_bool no_cache;
  /* To signal that the table is associated with a HANDLER statement */
  my_bool open_by_handler;
  /*
    To indicate that a non-null value of the auto_increment field
    was provided by the user or retrieved from the current record.
    Used only in the MODE_NO_AUTO_VALUE_ON_ZERO mode.
  */
  my_bool auto_increment_field_not_null;
  my_bool insert_or_update;             /* Can be used by the handler */
  my_bool alias_name_used;    /* true if table_name is alias */
  my_bool get_fields_in_item_tree;      /* Signal to fix_field */
  /**
    This table must be reopened and is not to be reused.
    NOTE: The TABLE will not be reopened during LOCK TABLES in
    close_thread_tables!!!
  */
  my_bool m_needs_reopen;
private:
  bool created; /* For tmp tables. TRUE <=> tmp table has been instantiated.*/
public:
  uint max_keys; /* Size of allocated key_info array. */

  struct /* field connections */
  {
    class JOIN_TAB *join_tab;
    class QEP_TAB *qep_tab;
    enum thr_lock_type lock_type;    /* How table is used */
    bool not_exists_optimize;
    /*
      TRUE <=> range optimizer found that there is no rows satisfying
      table conditions.
    */
    bool impossible_range;
  } reginfo;

  /**
     @todo This member should not be declared in-line. That makes it
     impossible for any function that does memory allocation to take a const
     reference to a TABLE object.
   */
  MEM_ROOT mem_root;
  /**
     Initialized in Item_func_group_concat::setup for appropriate
     temporary table if GROUP_CONCAT is used with ORDER BY | DISTINCT
     and BLOB field count > 0.
   */
  Blob_mem_storage *blob_storage;
  GRANT_INFO grant;
  Filesort_info sort;
  partition_info *part_info;            /* Partition related information */
  /* If true, all partitions have been pruned away */
  bool all_partitions_pruned_away;
  MDL_ticket *mdl_ticket;

private:
  /// Cost model object for operations on this table
  Cost_model_table m_cost_model;
public:

  void init(THD *thd, TABLE_LIST *tl);
  bool fill_item_list(List<Item> *item_list) const;
  void reset_item_list(List<Item> *item_list) const;
  void clear_column_bitmaps(void);
  void prepare_for_position(void);

  void mark_column_used(THD *thd, Field *field, enum enum_mark_columns mark);
  void mark_columns_used_by_index_no_reset(uint index, MY_BITMAP *map,
                                           uint key_parts= 0);
  void mark_columns_used_by_index(uint index);
  void mark_auto_increment_column(void);
  void mark_columns_needed_for_update(bool mark_binlog_columns);
  void mark_columns_needed_for_delete(void);
  void mark_columns_needed_for_insert(void);
  void mark_columns_per_binlog_row_image(void);
  void mark_generated_columns(bool is_update);
  bool is_field_used_by_generated_columns(uint field_index);
  void mark_gcol_in_maps(Field *field);
  inline void column_bitmaps_set(MY_BITMAP *read_set_arg,
                                 MY_BITMAP *write_set_arg)
  {
    read_set= read_set_arg;
    write_set= write_set_arg;
    if (file && created)
      file->column_bitmaps_signal();
  }
  inline void column_bitmaps_set_no_signal(MY_BITMAP *read_set_arg,
                                           MY_BITMAP *write_set_arg)
  {
    read_set= read_set_arg;
    write_set= write_set_arg;
  }
  inline void use_all_columns()
  {
    column_bitmaps_set(&s->all_set, &s->all_set);
  }
  inline void default_column_bitmaps()
  {
    read_set= &def_read_set;
    write_set= &def_write_set;
  }
  /** Should this instance of the table be reopened? */
  inline bool needs_reopen()
  { return !db_stat || m_needs_reopen; }
  /// @returns first non-hidden column
  Field **visible_field_ptr() const
  { return field + hidden_field_count; }
  /// @returns count of visible fields
  uint visible_field_count() const
  { return s->fields - hidden_field_count; }
  bool alloc_keys(uint key_count);
  bool add_tmp_key(Field_map *key_parts, char *key_name);
  void use_index(int key_to_save);

  void set_keyread(bool flag)
  {
    assert(file);
    if (flag && !key_read)
    {
      key_read= 1;
      if (is_created())
        file->extra(HA_EXTRA_KEYREAD);
    }
    else if (!flag && key_read)
    {
      key_read= 0;
      if (is_created())
        file->extra(HA_EXTRA_NO_KEYREAD);
    }
  }

  /**
    Check whether the given index has a virtual generated columns.

    @param index_no        the given index to check

    @returns true if if index is defined over at least one virtual generated
    column
  */
  inline bool index_contains_some_virtual_gcol(uint index_no)
  {
    assert(index_no < s->keys);
    return key_info[index_no].flags & HA_VIRTUAL_GEN_KEY;
  }
  bool update_const_key_parts(Item *conds);

  bool check_read_removal(uint index);

  my_ptrdiff_t default_values_offset() const
  { return (my_ptrdiff_t) (s->default_values - record[0]); }

  /// Return true if table is instantiated, and false otherwise.
  bool is_created() const { return created; }

  /**
    Set the table as "created", and enable flags in storage engine
    that could not be enabled without an instantiated table.
  */
  void set_created()
  {
    if (created)
      return;
    if (key_read)
      file->extra(HA_EXTRA_KEYREAD);
    created= true;
  }
  /**
    Set the contents of table to be "deleted", ie "not created", after having
    deleted the contents.
  */
  void set_deleted()
  {
    created= false;
  }
  /// Set table as nullable, ie it is inner wrt some outer join
  void set_nullable() { nullable= TRUE; }

  /// Return whether table is nullable
  bool is_nullable() const { return nullable; }

  /// @return true if table contains one or more generated columns
  bool has_gcol() const { return vfield; }

  /// @return true if table contains one or more virtual generated columns
  bool has_virtual_gcol() const;

  /// Set current row as "null row", for use in null-complemented outer join
  void set_null_row()
  {
    null_row= TRUE;
    status|= STATUS_NULL_ROW;
    memset(null_flags, 255, s->null_bytes);
  }

  /// Clear "null row" status for the current row
  void reset_null_row()
  {
    null_row= FALSE;
    status&= ~STATUS_NULL_ROW;
  }

  /// @return true if current row is null-extended
  bool has_null_row() const { return null_row; }

  /**
    Initialize the optimizer cost model.
 
    This function should be called each time a new query is started.

    @param cost_model_server the main cost model object for the query
  */
  void init_cost_model(const Cost_model_server* cost_model_server)
  {
    m_cost_model.init(cost_model_server, this);
  }

  /**
    Return the cost model object for this table.
  */
  const Cost_model_table* cost_model() const { return &m_cost_model; }

  /**
    Fix table's generated columns' (GC) expressions
   
    @details When a table is opened from the dictionary, the GCs' expressions
    are fixed during opening (see fix_fields_gcol_func()). After query
    execution, Item::cleanup() is called on them (see cleanup_gc_items()). When
    the table is opened from the table cache, the GCs need to be fixed again
    and this function does that.

    @param[in] thd     the current thread
    @return true if error, else false
  */
  bool refix_gc_items(THD *thd);
  
  /**
    Clean any state in items associated with generated columns to be ready for
    the next statement.
  */
  void cleanup_gc_items();

 /**
   Check if table contains any records.

   @param      thd     The thread object
   @param[out] retval  Pointer to boolean value (true if table is not empty).

   @returns  false for success, true for error
 */
 bool contains_records(THD *thd, bool *retval);
private:

  /**
    This flag decides whether or not we should log the drop temporary table
    command.
  */
  bool should_binlog_drop_if_temp_flag;

public:
  /**
    Virtual fields of type BLOB have a flag m_keep_old_value. This flag is set
    to false for all such fields in this table.
  */
  void blobs_need_not_keep_old_value();

  /**
    Set the variable should_binlog_drop_if_temp_flag, so that
    the logging of temporary tables can be decided.

    @param should_binlog  the value to set flag should_binlog_drop_if_temp_flag
  */
  void set_binlog_drop_if_temp(bool should_binlog);

  /**
    @return whether should_binlog_drop_if_temp_flag flag is
            set or not
  */
  bool should_binlog_drop_if_temp(void) const;
};

Field

class Field: public Proto_field
{
  Field(const Item &);        /* Prevent use of these */
  void operator=(Field &);
public:

  bool has_insert_default_function() const
  {
    return unireg_check == TIMESTAMP_DN_FIELD ||
      unireg_check == TIMESTAMP_DNUN_FIELD;
  }

  bool has_update_default_function() const
  {
    return unireg_check == TIMESTAMP_UN_FIELD ||
      unireg_check == TIMESTAMP_DNUN_FIELD;
  }

  /* To do: inherit Sql_alloc and get these for free */
  static void *operator new(size_t size) throw ()
  { return sql_alloc(size); }
  static void *operator new(size_t size, MEM_ROOT *mem_root) throw () {
    return alloc_root(mem_root, size);
  }
  static void operator delete(void *ptr, MEM_ROOT *mem_root)
  { assert(false); /* never called */ }

  static void operator delete(void *ptr_arg, size_t size) throw()
  { TRASH(ptr_arg, size); }

  uchar    *ptr;      // Position to field in record

private:
  /**
     Byte where the @c NULL bit is stored inside a record. If this Field is a
     @c NOT @c NULL field, this member is @c NULL.
  */
  uchar *m_null_ptr;

  /**
    Flag: if the NOT-NULL field can be temporary NULL.
  */
  bool m_is_tmp_nullable;

  /**
    This is a flag with the following semantics:
      - it can be changed only when m_is_tmp_nullable is true;
      - it specifies if this field in the first current record
        (TABLE::record[0]) was set to NULL (temporary NULL).

    This flag is used for trigger handling.
  */
  bool m_is_tmp_null;

  /**
    The value of THD::count_cuted_fields at the moment of setting
    m_is_tmp_null attribute.
  */
  enum_check_fields m_count_cuted_fields_saved;

protected:
  const uchar *get_null_ptr() const
  { return m_null_ptr; }

  uchar *get_null_ptr() 
  { return m_null_ptr; }

public:
  /*
    Note that you can use table->in_use as replacement for current_thd member 
    only inside of val_*() and store() members (e.g. you can't use it in cons)
  */
  TABLE *table;                                 // Pointer for table
  TABLE *orig_table;                            // Pointer to original table
  const char  **table_name, *field_name;
  LEX_STRING  comment;
  /* Field is part of the following keys */
  key_map key_start;                /* Keys that starts with this field */
  /// Indexes which contain this field entirely (not only a prefix)
  key_map part_of_key;
  key_map part_of_sortkey;          /* ^ but only keys usable for sorting */
  /**
    All keys that include this field, but not extended by the storage engine to
    include primary key columns.
  */
  key_map part_of_key_not_extended;

  /* 
    We use three additional unireg types for TIMESTAMP to overcome limitation 
    of current binary format of .frm file. We'd like to be able to support 
    NOW() as default and on update value for such fields but unable to hold 
    this info anywhere except unireg_check field. This issue will be resolved
    in more clean way with transition to new text based .frm format.
    See also comment for Field_timestamp::Field_timestamp().
  */
  enum utype  { NONE,DATE,SHIELD,NOEMPTY,CASEUP,PNR,BGNR,PGNR,YES,NO,REL,
    CHECK,EMPTY,UNKNOWN_FIELD,CASEDN,NEXT_NUMBER,INTERVAL_FIELD,
                BIT_FIELD, TIMESTAMP_OLD_FIELD, CAPITALIZE, BLOB_FIELD,
                TIMESTAMP_DN_FIELD, TIMESTAMP_UN_FIELD, TIMESTAMP_DNUN_FIELD,
                GENERATED_FIELD= 128 };
  enum geometry_type
  {
    GEOM_GEOMETRY = 0, GEOM_POINT = 1, GEOM_LINESTRING = 2, GEOM_POLYGON = 3,
    GEOM_MULTIPOINT = 4, GEOM_MULTILINESTRING = 5, GEOM_MULTIPOLYGON = 6,
    GEOM_GEOMETRYCOLLECTION = 7
  };
  enum imagetype { itRAW, itMBR};

  utype    unireg_check;
  uint32  field_length;    // Length of field
  uint32  flags;
  uint16        field_index;            // field number in fields array
  uchar    null_bit;    // Bit used to test null bit
  /**
     If true, this field was created in create_tmp_field_from_item from a NULL
     value. This means that the type of the field is just a guess, and the type
     may be freely coerced to another type.

     @see create_tmp_field_from_item
     @see Item_type_holder::get_real_type

   */
  bool is_created_from_null_item;
  /**
     True if this field belongs to some index (unlike part_of_key, the index
     might have only a prefix).
  */
  bool m_indexed;
private:
  enum enum_pushed_warnings
  {
    BAD_NULL_ERROR_PUSHED= 1,
    NO_DEFAULT_FOR_FIELD_PUSHED= 2,
    NO_DEFAULT_FOR_VIEW_FIELD_PUSHED= 4
  };

  /*
    Bitmask specifying which warnings have been already pushed in order
    not to repeat the same warning for the collmn multiple times.
    Uses values of enum_pushed_warnings to control pushed warnings.
  */
  unsigned int m_warnings_pushed;

public:
  /* Generated column data */
  Generated_column *gcol_info;
  /*
    Indication that the field is phycically stored in tables 
    rather than just generated on SQL queries.
    As of now, FALSE can only be set for virtual generated columns.
  */
  bool stored_in_db;
  bool is_gcol() const { return gcol_info; }
  bool is_virtual_gcol() const { return gcol_info && !stored_in_db; }

  Field(uchar *ptr_arg,uint32 length_arg,uchar *null_ptr_arg,
        uchar null_bit_arg, utype unireg_check_arg,
        const char *field_name_arg);

  virtual ~Field()
  { }

  void reset_warnings()
  { m_warnings_pushed= 0; }

  /**
    Turn on temporary nullability for the field.
  */
  void set_tmp_nullable()
  {
    m_is_tmp_nullable= true;
  }

  /**
    Turn off temporary nullability for the field.
  */
  void reset_tmp_nullable()
  {
    m_is_tmp_nullable= false;
  }

  /**
    Reset temporary NULL value for field
  */
  void reset_tmp_null()
  {
    m_is_tmp_null= false;
  }

  void set_tmp_null();

  /**
    @return temporary NULL-ability flag.
    @retval true if NULL can be assigned temporary to the Field.
    @retval false if NULL can not be assigned even temporary to the Field.
  */
  bool is_tmp_nullable() const
  { return m_is_tmp_nullable; }

  /**
    @return whether Field has temporary value NULL.
    @retval true if the Field has temporary value NULL.
    @retval false if the Field's value is NOT NULL, or if the temporary
    NULL-ability flag is reset.
  */
  bool is_tmp_null() const
  { return is_tmp_nullable() && m_is_tmp_null; }

  /* Store functions returns 1 on overflow and -1 on fatal error */
  virtual type_conversion_status store(const char *to, size_t length,
                                       const CHARSET_INFO *cs)=0;
  virtual type_conversion_status store(double nr)=0;
  virtual type_conversion_status store(longlong nr, bool unsigned_val)=0;
  /**
    Store a temporal value in packed longlong format into a field.
    The packed value is compatible with TIME_to_longlong_time_packed(),
    TIME_to_longlong_date_packed() or TIME_to_longlong_datetime_packed().
    Note, the value must be properly rounded or truncated according
    according to field->decimals().

    @param  nr  temporal value in packed longlong format.
    @retval false on success
    @retval true  on error
  */
  virtual type_conversion_status store_packed(longlong nr)
  {
    return store(nr, 0);
  }
  virtual type_conversion_status store_decimal(const my_decimal *d)=0;
  /**
    Store MYSQL_TIME value with the given amount of decimal digits
    into a field.

    Note, the "dec" parameter represents number of digits of the Item
    that previously created the MYSQL_TIME value. It's needed when we
    store the value into a CHAR/VARCHAR/TEXT field to display
    the proper amount of fractional digits.
    For other field types the "dec" value does not matter and is ignored.

    @param ltime   Time, date or datetime value.
    @param dec     Number of decimals in ltime.
    @retval false  on success
    @retval true   on error
  */
  virtual type_conversion_status store_time(MYSQL_TIME *ltime, uint8 dec);
  /**
    Store MYSQL_TYPE value into a field when the number of fractional
    digits is not important or is not know.

    @param ltime   Time, date or datetime value.
    @retval false   on success
    @retval true   on error
  */
  type_conversion_status store_time(MYSQL_TIME *ltime)
  {
    return store_time(ltime, 0);
  }
  type_conversion_status store(const char *to, size_t length,
                               const CHARSET_INFO *cs,
                               enum_check_fields check_level);
  virtual double val_real(void)=0;
  virtual longlong val_int(void)=0;
  /**
    Returns TIME value in packed longlong format.
    This method should not be called for non-temporal types.
    Temporal field types override the default method.
  */
  virtual longlong val_time_temporal()
  {
    assert(0);
    return 0;
  }
  /**
    Returns DATE/DATETIME value in packed longlong format.
    This method should not be called for non-temporal types.
    Temporal field types override the default method.
  */
  virtual longlong val_date_temporal()
  {
    assert(0);
    return 0;
  }
  /**
    Returns "native" packed longlong representation of
    a TIME or DATE/DATETIME field depending on field type.
  */
  longlong val_temporal_by_field_type()
  {
    // Return longlong TIME or DATETIME representation, depending on field type
    if (type() == MYSQL_TYPE_TIME)
      return val_time_temporal();
    assert(is_temporal_with_date());
    return val_date_temporal();
  }
  virtual my_decimal *val_decimal(my_decimal *)= 0;
  inline String *val_str(String *str) { return val_str(str, str); }
  /*
     val_str(buf1, buf2) gets two buffers and should use them as follows:
     if it needs a temp buffer to convert result to string - use buf1
       example Field_tiny::val_str()
     if the value exists as a string already - use buf2
       example Field_string::val_str()
     consequently, buf2 may be created as 'String buf;' - no memory
     will be allocated for it. buf1 will be allocated to hold a
     value if it's too small. Using allocated buffer for buf2 may result in
     an unnecessary free (and later, may be an alloc).
     This trickery is used to decrease a number of malloc calls.
  */
  virtual String *val_str(String*,String *)=0;
  String *val_int_as_str(String *val_buffer, my_bool unsigned_flag);
  /*
   str_needs_quotes() returns TRUE if the value returned by val_str() needs
   to be quoted when used in constructing an SQL query.
  */
  virtual bool str_needs_quotes() { return FALSE; }
  virtual Item_result result_type () const=0;
  /**
    Returns Item_result type of a field when it appears
    in numeric context such as:
      SELECT time_column + 1;
      SELECT SUM(time_column);
    Examples:
    - a column of type TIME, DATETIME, TIMESTAMP act as INT.
    - a column of type TIME(1), DATETIME(1), TIMESTAMP(1)
      act as DECIMAL with 1 fractional digits.
  */
  virtual Item_result numeric_context_result_type() const
  {
    return result_type();
  }
  virtual Item_result cmp_type () const { return result_type(); }
  virtual Item_result cast_to_int_type () const { return result_type(); }
  static bool type_can_have_key_part(enum_field_types);
  static enum_field_types field_type_merge(enum_field_types, enum_field_types);
  static Item_result result_merge_type(enum_field_types);
  bool gcol_expr_is_equal(const Field *field) const;
  bool gcol_expr_is_equal(const Create_field *field) const;
  virtual bool eq(Field *field)
  {
    return (ptr == field->ptr && m_null_ptr == field->m_null_ptr &&
            null_bit == field->null_bit && field->type() == type());
  }
  virtual bool eq_def(Field *field);
  
  /*
    pack_length() returns size (in bytes) used to store field data in memory
    (i.e. it returns the maximum size of the field in a row of the table,
    which is located in RAM).
  */
  virtual uint32 pack_length() const { return (uint32) field_length; }

  /*
    pack_length_in_rec() returns size (in bytes) used to store field data on
    storage (i.e. it returns the maximal size of the field in a row of the
    table, which is located on disk).
  */
  virtual uint32 pack_length_in_rec() const { return pack_length(); }
  virtual bool compatible_field_size(uint metadata, Relay_log_info *rli,
                                     uint16 mflags, int *order);
  virtual uint pack_length_from_metadata(uint field_metadata)
  {
    DBUG_ENTER("Field::pack_length_from_metadata");
    DBUG_RETURN(field_metadata);
  }
  virtual uint row_pack_length() const { return 0; }
  virtual int save_field_metadata(uchar *first_byte)
  { return do_save_field_metadata(first_byte); }

  /*
    data_length() return the "real size" of the data in memory.
    Useful only for variable length datatypes where it's overloaded.
    By default assume the length is constant.
  */
  virtual uint32 data_length(uint row_offset= 0) { return pack_length(); }
  virtual uint32 sort_length() const { return pack_length(); }

  /**
     Get the maximum size of the data in packed format.

     @return Maximum data length of the field when packed using the
     Field::pack() function.
   */
  virtual uint32 max_data_length() const {
    return pack_length();
  };

  virtual type_conversion_status reset(void)
  {
    memset(ptr, 0, pack_length());
    return TYPE_OK;
  }
  virtual void reset_fields() {}
  /**
    Returns timestamp value in "struct timeval" format.
    This method is used in "SELECT UNIX_TIMESTAMP(field)"
    to avoid conversion from timestamp to MYSQL_TIME and back.
  */
  virtual bool get_timestamp(struct timeval *tm, int *warnings);
  /**
    Stores a timestamp value in timeval format in a field.
   
   @note 
   - store_timestamp(), get_timestamp() and store_time() do not depend on
   timezone and always work "in UTC".

   - The default implementation of this interface expects that storing the
   value will not fail. For most Field descendent classes, this is not the
   case. However, this interface is only used when the function
   CURRENT_TIMESTAMP is used as a column default expression, and currently we
   only allow TIMESTAMP and DATETIME columns to be declared with this as the
   column default. Hence it is enough that the classes implementing columns
   with these types either override this interface, or that
   store_time(MYSQL_TIME*, uint8) does not fail.

   - The column types above interpret decimals() to mean the scale of the
   fractional seconds.
   
   - We also have the limitation that the scale of a column must be the same as
   the scale of the CURRENT_TIMESTAMP. I.e. we only allow 
   
   @code
   
   [ TIMESTAMP | DATETIME ] (n) [ DEFAULT | ON UPDATE ] CURRENT_TIMESTAMP (n)

   @endcode

   Since this interface relies on the caller to truncate the value according to this
   Field's scale, it will work with all constructs that we currently allow.
  */
  virtual void store_timestamp(const timeval *tm) { assert(false); }

  /**
     Interface for legacy code. Newer code uses the store_timestamp(const
     timeval*) interface.

     @param timestamp A TIMESTAMP value in the my_time_t format.
  */
  void store_timestamp(my_time_t sec)
  {
    struct timeval tm;
    tm.tv_sec= sec;
    tm.tv_usec= 0;
    store_timestamp(&tm);
  }

  virtual void set_default()
  {
    if (has_insert_default_function())
      evaluate_insert_default_function();
    else
      copy_data(table->default_values_offset());
  }


  /**
     Evaluates the @c INSERT default function and stores the result in the
     field. If no such function exists for the column, or the function is not
     valid for the column's data type, invoking this function has no effect.
  */
  void evaluate_insert_default_function();


  /**
     Evaluates the @c UPDATE default function, if one exists, and stores the
     result in the record buffer. If no such function exists for the column,
     or the function is not valid for the column's data type, invoking this
     function has no effect.
  */
  void evaluate_update_default_function();
  virtual bool binary() const { return 1; }
  virtual bool zero_pack() const { return 1; }
  virtual enum ha_base_keytype key_type() const { return HA_KEYTYPE_BINARY; }
  virtual uint32 key_length() const { return pack_length(); }
  virtual enum_field_types type() const =0;
  virtual enum_field_types real_type() const { return type(); }
  virtual enum_field_types binlog_type() const
  {
    /*
      Binlog stores field->type() as type code by default.
      This puts MYSQL_TYPE_STRING in case of CHAR, VARCHAR, SET and ENUM,
      with extra data type details put into metadata.

      We cannot store field->type() in case of temporal types with
      fractional seconds: TIME(n), DATETIME(n) and TIMESTAMP(n),
      because binlog records with MYSQL_TYPE_TIME, MYSQL_TYPE_DATETIME
      type codes do not have metadata.
      So for temporal data types with fractional seconds we'll store
      real_type() type codes instead, i.e.
      MYSQL_TYPE_TIME2, MYSQL_TYPE_DATETIME2, MYSQL_TYPE_TIMESTAMP2,
      and put precision into metatada.

      Note: perhaps binlog should eventually be modified to store
      real_type() instead of type() for all column types.
    */
    return type();
  }
  inline  int cmp(const uchar *str) { return cmp(ptr,str); }
  virtual int cmp_max(const uchar *a, const uchar *b, uint max_len)
    { return cmp(a, b); }
  virtual int cmp(const uchar *,const uchar *)=0;
  virtual int cmp_binary(const uchar *a,const uchar *b, uint32 max_length=~0L)
  { return memcmp(a,b,pack_length()); }
  virtual int cmp_offset(uint row_offset)
  { return cmp(ptr,ptr+row_offset); }
  virtual int cmp_binary_offset(uint row_offset)
  { return cmp_binary(ptr, ptr+row_offset); };
  virtual int key_cmp(const uchar *a,const uchar *b)
  { return cmp(a, b); }
  virtual int key_cmp(const uchar *str, uint length)
  { return cmp(ptr,str); }
  virtual uint decimals() const { return 0; }
  virtual bool is_text_key_type() const { return false; }

  /*
    Caller beware: sql_type can change str.Ptr, so check
    ptr() to see if it changed if you are using your own buffer
    in str and restore it with set() if needed
  */
  virtual void sql_type(String &str) const =0;

  bool is_temporal() const
  { return is_temporal_type(type()); }

  bool is_temporal_with_date() const
  { return is_temporal_type_with_date(type()); }

  bool is_temporal_with_time() const
  { return is_temporal_type_with_time(type()); }

  bool is_temporal_with_date_and_time() const
  { return is_temporal_type_with_date_and_time(type()); }

  /**
    Check whether the full table's row is NULL or the Field has value NULL.

    @return    true if the full table's row is NULL or the Field has value NULL
               false if neither table's row nor the Field has value NULL
  */
  bool is_null(my_ptrdiff_t row_offset= 0) const
  {
    /*
      if the field is NULLable, it returns NULLity based
      on m_null_ptr[row_offset] value. Otherwise it returns
      NULL flag depending on TABLE::has_null_row() value.

      The table may have been marked as containing only NULL values
      for all fields if it is a NULL-complemented row of an OUTER JOIN
      or if the query is an implicitly grouped query (has aggregate
      functions but no GROUP BY clause) with no qualifying rows. If
      this is the case (in which TABLE::has_null_row() is true) and the
      field is not nullable, the field is considered to be NULL.

      Do not change the order of testing. Fields may be associated
      with a TABLE object without being part of the current row.
      For NULL value check to work for these fields, they must
      have a valid m_null_ptr, and this pointer must be checked before
      TABLE::has_null_row().
    */
    if (real_maybe_null())
      return MY_TEST(m_null_ptr[row_offset] & null_bit);

    if (is_tmp_nullable())
      return m_is_tmp_null;

    return table->has_null_row();
  }

  /**
    Check whether the Field has value NULL (temporary or actual).

    @return   true if the Field has value NULL (temporary or actual)
              false if the Field has value NOT NULL.
  */
  bool is_real_null(my_ptrdiff_t row_offset= 0) const
  {
    if (real_maybe_null())
      return MY_TEST(m_null_ptr[row_offset] & null_bit);

    if (is_tmp_nullable())
      return m_is_tmp_null;

    return false;
  }

  /**
    Check if the Field has value NULL or the record specified by argument
    has value NULL for this Field.

    @return    true if the Field has value NULL or the record has value NULL
               for thois Field.
  */
  bool is_null_in_record(const uchar *record) const
  {
    if (real_maybe_null())
      return MY_TEST(record[null_offset()] & null_bit);

    return is_tmp_nullable() ? m_is_tmp_null : false;
  }

  void set_null(my_ptrdiff_t row_offset= 0);

  void set_notnull(my_ptrdiff_t row_offset= 0);

  type_conversion_status check_constraints(int mysql_errno);

  /**
    Remember the value of THD::count_cuted_fields to handle possible
    NOT-NULL constraint errors after BEFORE-trigger execution is finished.
    We should save the value of THD::count_cuted_fields before starting
    BEFORE-trigger processing since during triggers execution the
    value of THD::count_cuted_fields could be changed.
  */
  void set_count_cuted_fields(enum_check_fields count_cuted_fields)
  { m_count_cuted_fields_saved= count_cuted_fields; }

  bool maybe_null(void) const
  { return real_maybe_null() || table->is_nullable(); }

  /// @return true if this field is NULL-able, false otherwise.
  bool real_maybe_null(void) const
  { return m_null_ptr != NULL; }

  uint null_offset(const uchar *record) const
  { return (uint) (m_null_ptr - record); }

  uint null_offset() const
  { return null_offset(table->record[0]); }

  void set_null_ptr(uchar *p_null_ptr, uint p_null_bit)
  {
    m_null_ptr= p_null_ptr;
    null_bit= p_null_bit;
  }

  enum {
    LAST_NULL_BYTE_UNDEF= 0
  };

  /*
    Find the position of the last null byte for the field.

    SYNOPSIS
      last_null_byte()

    DESCRIPTION
      Return a pointer to the last byte of the null bytes where the
      field conceptually is placed.

    RETURN VALUE
      The position of the last null byte relative to the beginning of
      the record. If the field does not use any bits of the null
      bytes, the value 0 (LAST_NULL_BYTE_UNDEF) is returned.
   */
  size_t last_null_byte() const {
    size_t bytes= do_last_null_byte();
    DBUG_PRINT("debug", ("last_null_byte() ==> %ld", (long) bytes));
    assert(bytes <= table->s->null_bytes);
    return bytes;
  }

  virtual void make_field(Send_field *);

  /**
    Writes a copy of the current value in the record buffer, suitable for
    sorting using byte-by-byte comparison. Integers are always in big-endian
    regardless of hardware architecture. At most length bytes are written
    into the buffer.

    @param buff The buffer, assumed to be at least length bytes.

    @param length Number of bytes to write.
  */
  virtual void make_sort_key(uchar *buff, size_t length) = 0;
  virtual bool optimize_range(uint idx, uint part);
  /*
    This should be true for fields which, when compared with constant
    items, can be casted to longlong. In this case we will at 'fix_fields'
    stage cast the constant items to longlongs and at the execution stage
    use field->val_int() for comparison.  Used to optimize clauses like
    'a_column BETWEEN date_const, date_const'.
  */
  virtual bool can_be_compared_as_longlong() const { return false; }
  virtual void mem_free() {}
  virtual Field *new_field(MEM_ROOT *root, TABLE *new_table,
                           bool keep_type);
  virtual Field *new_key_field(MEM_ROOT *root, TABLE *new_table,
                               uchar *new_ptr, uchar *new_null_ptr,
                               uint new_null_bit);

  Field *new_key_field(MEM_ROOT *root, TABLE *new_table, uchar *new_ptr)
  { return new_key_field(root, new_table, new_ptr, m_null_ptr, null_bit); }

  /**
     Makes a shallow copy of the Field object.
     
     @note This member function must be overridden in all concrete
     subclasses. Several of the Field subclasses are concrete even though they
     are not leaf classes, so the compiler will not always catch this.

     @retval NULL If memory allocation failed.
  */ 
  virtual Field *clone() const =0;

  /**
     Makes a shallow copy of the Field object.
     
     @note This member function must be overridden in all concrete
     subclasses. Several of the Field subclasses are concrete even though they
     are not leaf classes, so the compiler will not always catch this.
     
     @param mem_root MEM_ROOT to use for memory allocation.
     @retval NULL If memory allocation failed.
   */
  virtual Field *clone(MEM_ROOT *mem_root) const = 0;

  void move_field(uchar *ptr_arg, uchar *null_ptr_arg, uchar null_bit_arg)
  {
    ptr= ptr_arg;
    m_null_ptr= null_ptr_arg;
    null_bit= null_bit_arg;
  }

  void move_field(uchar *ptr_arg)
  { ptr= ptr_arg; }

  virtual void move_field_offset(my_ptrdiff_t ptr_diff)
  {
    ptr= ADD_TO_PTR(ptr, ptr_diff, uchar*);
    if (real_maybe_null())
      m_null_ptr= ADD_TO_PTR(m_null_ptr, ptr_diff, uchar*);
  }

  virtual void get_image(uchar *buff, size_t length, const CHARSET_INFO *cs)
  { memcpy(buff, ptr, length); }

  virtual void set_image(const uchar *buff, size_t length, const CHARSET_INFO *cs)
  { memcpy(ptr, buff, length); }


  /*
    Copy a field part into an output buffer.

    SYNOPSIS
      Field::get_key_image()
      buff   [out] output buffer
      length       output buffer size
      type         itMBR for geometry blobs, otherwise itRAW

    DESCRIPTION
      This function makes a copy of field part of size equal to or
      less than "length" parameter value.
      For fields of string types (CHAR, VARCHAR, TEXT) the rest of buffer
      is padded by zero byte.

    NOTES
      For variable length character fields (i.e. UTF-8) the "length"
      parameter means a number of output buffer bytes as if all field
      characters have maximal possible size (mbmaxlen). In the other words,
      "length" parameter is a number of characters multiplied by
      field_charset->mbmaxlen.

    RETURN
      Number of copied bytes (excluding padded zero bytes -- see above).
  */

  virtual size_t get_key_image(uchar *buff, size_t length, imagetype type)
  {
    get_image(buff, length, &my_charset_bin);
    return length;
  }
  virtual void set_key_image(const uchar *buff, size_t length)
    { set_image(buff,length, &my_charset_bin); }
  inline longlong val_int_offset(uint row_offset)
    {
      ptr+=row_offset;
      longlong tmp=val_int();
      ptr-=row_offset;
      return tmp;
    }
  inline longlong val_int(const uchar *new_ptr)
  {
    uchar *old_ptr= ptr;
    longlong return_value;
    ptr= (uchar*) new_ptr;
    return_value= val_int();
    ptr= old_ptr;
    return return_value;
  }
  inline String *val_str(String *str, const uchar *new_ptr)
  {
    uchar *old_ptr= ptr;
    ptr= (uchar*) new_ptr;
    val_str(str);
    ptr= old_ptr;
    return str;
  }
  virtual bool send_binary(Protocol *protocol);
  virtual bool send_text(Protocol *protocol);

  virtual uchar *pack(uchar *to, const uchar *from,
                      uint max_length, bool low_byte_first);
  /**
     @overload Field::pack(uchar*, const uchar*, uint, bool)
  */
  uchar *pack(uchar *to, const uchar *from)
  {
    DBUG_ENTER("Field::pack");
    uchar *result= this->pack(to, from, UINT_MAX, table->s->db_low_byte_first);
    DBUG_RETURN(result);
  }

  virtual const uchar *unpack(uchar* to, const uchar *from,
                              uint param_data, bool low_byte_first);
  /**
     @overload Field::unpack(uchar*, const uchar*, uint, bool)
  */
  const uchar *unpack(uchar* to, const uchar *from)
  {
    DBUG_ENTER("Field::unpack");
    const uchar *result= unpack(to, from, 0U, table->s->db_low_byte_first);
    DBUG_RETURN(result);
  }

  virtual uint packed_col_length(const uchar *to, uint length)
  { return length;}

  /**
    This is a wrapper around pack_length() used by filesort() to determine
    how many bytes we need for packing "addon fields".
    @returns maximum size of a row when stored in the filesort buffer.
   */
  virtual uint max_packed_col_length()
  { return pack_length(); }

  uint offset(uchar *record)
  {
    return (uint) (ptr - record);
  }

  void copy_data(my_ptrdiff_t src_record_offset);

  uint fill_cache_field(struct st_cache_field *copy);
  virtual bool get_date(MYSQL_TIME *ltime, my_time_flags_t fuzzydate);
  virtual bool get_time(MYSQL_TIME *ltime);
  virtual const CHARSET_INFO *charset(void) const { return &my_charset_bin; }
  virtual const CHARSET_INFO *charset_for_protocol(void) const
  { return binary() ? &my_charset_bin : charset(); }
  virtual const CHARSET_INFO *sort_charset(void) const { return charset(); }
  virtual bool has_charset(void) const { return FALSE; }
  /*
    match_collation_to_optimize_range() is to distinguish in
    range optimizer (see opt_range.cc) between real string types:
      CHAR, VARCHAR, TEXT
    and the other string-alike types with result_type() == STRING_RESULT:
      DATE, TIME, DATETIME, TIMESTAMP
    We need it to decide whether to test if collation of the operation
    matches collation of the field (needed only for real string types).
    QQ: shouldn't DATE/TIME types have their own XXX_RESULT types eventually?
  */
  virtual bool match_collation_to_optimize_range() const { return false; };
  virtual enum Derivation derivation(void) const
  { return DERIVATION_IMPLICIT; }
  virtual uint repertoire(void) const { return MY_REPERTOIRE_UNICODE30; }
  virtual void set_derivation(enum Derivation derivation_arg) { }

  /**
    Produce warning or note about data saved into field.

    @param level            - level of message (Note/Warning/Error)
    @param code             - error code of message to be produced
    @param cut_increment    - whenever we should increase cut fields count

    @note
      This function won't produce warning and increase cut fields counter
      if count_cuted_fields == CHECK_FIELD_IGNORE for current thread.

      if count_cuted_fields == CHECK_FIELD_IGNORE then we ignore notes.
      This allows us to avoid notes in optimization, like
      convert_constant_item().

    @retval
      1 if count_cuted_fields == CHECK_FIELD_IGNORE and error level is not NOTE
    @retval
      0 otherwise
  */
  bool set_warning(Sql_condition::enum_severity_level level, unsigned int code,
                   int cut_increment)
  {
    return set_warning(level, code, cut_increment, NULL, NULL);
  }

  bool set_warning(Sql_condition::enum_severity_level level, uint code,
                   int cut_increment, const char *view_db,
                   const char *view_name);

  inline bool check_overflow(int op_result)
  {
    return (op_result == E_DEC_OVERFLOW);
  }
  inline bool check_truncated(int op_result)
  {
    return (op_result == E_DEC_TRUNCATED);
  }
  bool warn_if_overflow(int op_result);
  void init(TABLE *table_arg)
  {
    orig_table= table= table_arg;
    table_name= &table_arg->alias;
  }

  /* maximum possible display length */
  virtual uint32 max_display_length()= 0;

  /**
    Whether a field being created is type-compatible with an existing one.

    Used by the ALTER TABLE code to evaluate whether the new definition
    of a table is compatible with the old definition so that it can
    determine if data needs to be copied over (table data change).
    Constraints and generation clause (default value, generation expression)
    are not checked by this function.
  */
  virtual uint is_equal(Create_field *new_field);
  /* convert decimal to longlong with overflow check */
  longlong convert_decimal2longlong(const my_decimal *val, bool unsigned_flag,
                                    bool *has_overflow);
  /* The max. number of characters */
  virtual uint32 char_length()
  {
    return field_length / charset()->mbmaxlen;
  }

  virtual geometry_type get_geometry_type() const
  {
    /* shouldn't get here. */
    assert(0);
    return GEOM_GEOMETRY;
  }
#ifndef NDEBUG
  /* Print field value into debug trace, in NULL-aware way. */
  void dbug_print()
  {
    if (is_real_null())
      fprintf(DBUG_FILE, "NULL");
    else
    {
      char buf[256];
      String str(buf, sizeof(buf), &my_charset_bin);
      str.length(0);
      String *pstr;
      pstr= val_str(&str);
      fprintf(DBUG_FILE, "'%s'", pstr->c_ptr_safe());
    }
  }
#endif

  ha_storage_media field_storage_type() const
  {
    return (ha_storage_media)
      ((flags >> FIELD_FLAGS_STORAGE_MEDIA) & 3);
  }

  void set_storage_type(ha_storage_media storage_type_arg)
  {
    assert(field_storage_type() == HA_SM_DEFAULT);
    flags |= (storage_type_arg << FIELD_FLAGS_STORAGE_MEDIA);
  }

  column_format_type column_format() const
  {
    return (column_format_type)
      ((flags >> FIELD_FLAGS_COLUMN_FORMAT) & 3);
  }

  void set_column_format(column_format_type column_format_arg)
  {
    assert(column_format() == COLUMN_FORMAT_TYPE_DEFAULT);
    flags |= (column_format_arg << FIELD_FLAGS_COLUMN_FORMAT);
  }

  /* Validate the value stored in a field */
  virtual type_conversion_status validate_stored_val(THD *thd)
  { return TYPE_OK; }

  /* Hash value */
  virtual void hash(ulong *nr, ulong *nr2);

  /**
    Get the upper limit of the MySQL integral and floating-point type.

    @return maximum allowed value for the field
  */
  virtual ulonglong get_max_int_value() const
  {
    assert(false);
    return 0ULL;
  }

  /* Return pointer to the actual data in memory */
  virtual void get_ptr(uchar **str) { *str= ptr; }

/**
  Checks whether a string field is part of write_set.

  @return
    FALSE  - If field is not char/varchar/....
           - If field is char/varchar/.. and is not part of write set.
    TRUE   - If field is char/varchar/.. and is part of write set.
*/
  virtual bool is_updatable() const { return FALSE; }

  /**
    Check whether field is part of the index taking the index extensions flag
    into account. Index extensions are also not applicable to UNIQUE indexes
    for loose index scans.

    @param[in]     thd             THD object
    @param[in]     cur_index       Index of the key
    @param[in]     cur_index_info  key_info object

    @retval true  Field is part of the key
    @retval false otherwise

  */
  bool is_part_of_actual_key(THD *thd, uint cur_index, KEY *cur_index_info);

  friend int cre_myisam(char * name, TABLE *form, uint options,
      ulonglong auto_increment_value);
  friend class Copy_field;
  friend class Item_avg_field;
  friend class Item_std_field;
  friend class Item_sum_num;
  friend class Item_sum_sum;
  friend class Item_sum_str;
  friend class Item_sum_count;
  friend class Item_sum_avg;
  friend class Item_sum_std;
  friend class Item_sum_min;
  friend class Item_sum_max;
  friend class Item_func_group_concat;

private:
  /*
    Primitive for implementing last_null_byte().

    SYNOPSIS
      do_last_null_byte()

    DESCRIPTION
      Primitive for the implementation of the last_null_byte()
      function. This represents the inheritance interface and can be
      overridden by subclasses.
   */
  virtual size_t do_last_null_byte() const;

/**
   Retrieve the field metadata for fields.

   This default implementation returns 0 and saves 0 in the metadata_ptr
   value.

   @param   metadata_ptr   First byte of field metadata

   @returns 0 no bytes written.
*/
  virtual int do_save_field_metadata(uchar *metadata_ptr)
  { return 0; }

protected:
  static void handle_int16(uchar *to, const uchar *from,
                           bool low_byte_first_from, bool low_byte_first_to)
  {
    int16 val;
#ifdef WORDS_BIGENDIAN
    if (low_byte_first_from)
      val = sint2korr(from);
    else
#endif
      shortget(&val, from);

#ifdef WORDS_BIGENDIAN
    if (low_byte_first_to)
      int2store(to, val);
    else
#endif
      shortstore(to, val);
  }

  static void handle_int24(uchar *to, const uchar *from,
                           bool low_byte_first_from, bool low_byte_first_to)
  {
    int32 val;
#ifdef WORDS_BIGENDIAN
    if (low_byte_first_from)
      val = sint3korr(from);
    else
#endif
      val= (from[0] << 16) + (from[1] << 8) + from[2];

#ifdef WORDS_BIGENDIAN
    if (low_byte_first_to)
      int2store(to, val);
    else
#endif
    {
      to[0]= 0xFF & (val >> 16);
      to[1]= 0xFF & (val >> 8);
      to[2]= 0xFF & val;
    }
  }

  /*
    Helper function to pack()/unpack() int32 values
  */
  static void handle_int32(uchar *to, const uchar *from,
                           bool low_byte_first_from, bool low_byte_first_to)
  {
    int32 val;
#ifdef WORDS_BIGENDIAN
    if (low_byte_first_from)
      val = sint4korr(from);
    else
#endif
      longget(&val, from);

#ifdef WORDS_BIGENDIAN
    if (low_byte_first_to)
      int4store(to, val);
    else
#endif
      longstore(to, val);
  }

  /*
    Helper function to pack()/unpack() int64 values
  */
  static void handle_int64(uchar* to, const uchar *from,
                           bool low_byte_first_from, bool low_byte_first_to)
  {
    int64 val;
#ifdef WORDS_BIGENDIAN
    if (low_byte_first_from)
      val = sint8korr(from);
    else
#endif
      longlongget(&val, from);

#ifdef WORDS_BIGENDIAN
    if (low_byte_first_to)
      int8store(to, val);
    else
#endif
      longlongstore(to, val);
  }

  uchar *pack_int16(uchar *to, const uchar *from, bool low_byte_first_to)
  {
    handle_int16(to, from, table->s->db_low_byte_first, low_byte_first_to);
    return to  + sizeof(int16);
  }

  const uchar *unpack_int16(uchar* to, const uchar *from,
                            bool low_byte_first_from)
  {
    handle_int16(to, from, low_byte_first_from, table->s->db_low_byte_first);
    return from + sizeof(int16);
  }

  uchar *pack_int24(uchar *to, const uchar *from, bool low_byte_first_to)
  {
    handle_int24(to, from, table->s->db_low_byte_first, low_byte_first_to);
    return to + 3;
  }

  const uchar *unpack_int24(uchar* to, const uchar *from,
                            bool low_byte_first_from)
  {
    handle_int24(to, from, low_byte_first_from, table->s->db_low_byte_first);
    return from + 3;
  }

  uchar *pack_int32(uchar *to, const uchar *from, bool low_byte_first_to)
  {
    handle_int32(to, from, table->s->db_low_byte_first, low_byte_first_to);
    return to  + sizeof(int32);
  }

  const uchar *unpack_int32(uchar* to, const uchar *from,
                            bool low_byte_first_from)
  {
    handle_int32(to, from, low_byte_first_from, table->s->db_low_byte_first);
    return from + sizeof(int32);
  }

  uchar *pack_int64(uchar* to, const uchar *from, bool low_byte_first_to)
  {
    handle_int64(to, from, table->s->db_low_byte_first, low_byte_first_to);
    return to + sizeof(int64);
  }

  const uchar *unpack_int64(uchar* to, const uchar *from,
                            bool low_byte_first_from)
  {
    handle_int64(to, from, low_byte_first_from, table->s->db_low_byte_first);
    return from + sizeof(int64);
  }

};

活动

应用商城

合作伙伴

开发者

支持与服务

了解天翼云

2022-09-18 mysql列存储引擎-subselect相关执行流程记录

2022-09-18 mysql列存储引擎-subselect相关执行流程记录

逻辑架构:

时序图:

相关文档:

Table 3-5. Query Optimization

核心流程:

调用堆栈:

核心函数:

sub_select

evaluate_null_complemented_join_record

evaluate_join_record

核心数据结构:

TABLE

Field

相关文章

MySQL常用函数汇总

Go 语言入门很简单：Go 语言中操作 MySQL 数据库

MySQL 导出数据

jsp课程笔记（四）--JDBC增删改数据

mysql备份与恢复

go-mysql-server 新版本发布

go 通过sql操作mysql

mysql_fdw 集成go-mysql-server 开发的mysql server

MongoDB中间件工具mgm入门介绍（二）

mysql函数

作者介绍

最新文章

初识数据库

mysql列存储引擎-宣讲-第二讲-一条SQL在Tianmu引擎中的运行

【揭秘】MySQL逻辑架构：一文带你全面了解！

redis 如何保证缓存和数据库一致性？

Redis 发布/订阅介绍

Redis 事务与数据持久化

热门文章

jsp电子商务购物车之五 数据库存储篇2

mysql列存储引擎-POC-需求分析

mysql存储引擎、数据导入导出、多表查询

MySQL技术内幕 InnoDB存储引擎：B+树索引

mysql表类型和存储引擎和视图

2022-09-09 mysql列存储引擎-POC-需求分析-第二版-有问题的SQL

热门标签

相关产品

弹性云主机

天翼云电脑（公众版）

对象存储

云硬盘

随机文章

学习MySQL的CSV存储引擎

redis---消息队列stream

Mysql数据库存储emoji表情 windows版

redis的列表list操作

mysql表类型和存储引擎和视图

spring boot基于NoSQL数据库Redis发送接收存储消息

jsp电子商务购物车之五数据库存储篇2