源码解析
Linux 一般来说,词法和语法解析都是通过 Flex 与 Bison 完成的;而在 MySQL 中,词法分析使用自己的程序,而语法分析使用的是 Bison;Bison 会根据 MySQL 定义的语法规则,进行语法解析。
完成语法解析后,会将解析结果生成的数据结构保存在 struct LEX 中,该结构体在 sql/sql_lex.h 文件中定义。
struct LEX: public Query_tables_list
{
friend bool lex_start(THD *thd);
SELECT_LEX_UNIT *unit; ///< Outer-most query expression /// @todo: select_lex can be replaced with unit->first-select() SELECT_LEX *select_lex; ///< First query block SELECT_LEX *all_selects_list; ///< List of all query blocks private:
/* current SELECT_LEX in parsing */
SELECT_LEX *m_current_select;
... ...
}
优化器会根据这里的数据,生成相应的执行计划,最后调用存储引擎执行。
执行过程
以下是语法解析模块调用过程。
mysql_parse()
|-mysql_reset_thd_for_next_command()
|-lex_start()
|-query_cache_send_result_to_client() # 首先查看cache
|-parse_sql() # MYSQLparse的外包函数
|-MYSQLparse() # 实际的解析函数入口
如上,SQL 解析入口会调用 MYSQLparse ,而在 sql/sql_ 中有如下的宏定义,也就说,在预编译阶段,会将 yyparse 替换为 MYSQLparse ,所以 实际调用的仍是 yyparse 函数。
#define yyparse MYSQLparse
实现细节
接下来详细介绍其实现细节。
词法解析
MYSQL 的词法分析并没有使用 LEX,而是有自己的一套词法分析,代码详见 sql/sql_ 中的实现,其入口函数是 MYSQLlex() 。
int MYSQLlex(YYSTYPE *yylval, YYLTYPE *yylloc, THD *thd)
{
... ...
token= lex_one_token(yylval, thd);
yylloc->cpp.start= lip->get_cpp_tok_start();
yylloc->raw.start= lip->get_tok_start();
switch(token) {
case WITH:
/* Parsing 'WITH' 'ROLLUP' or 'WITH' 'CUBE' requires 2 look ups, which makes the grammar LALR(2). Replace by a single 'WITH_ROLLUP' or 'WITH_CUBE' token, to transform the grammar into a LALR(1) grammar, which sql_yacc.yy can process. */
token= lex_one_token(yylval, thd);
switch(token) {
case CUBE_SYM:
yylloc->cpp.end= lip->get_cpp_ptr();
yylloc->raw.end= lip->get_ptr();
lip->add_digest_token(WITH_CUBE_SYM, yylval);
return WITH_CUBE_SYM;
case ROLLUP_SYM:
yylloc->cpp.end= lip->get_cpp_ptr();
yylloc->raw.end= lip->get_ptr();
lip->add_digest_token(WITH_ROLLUP_SYM, yylval);
return WITH_ROLLUP_SYM;
default:
/* Save the token following 'WITH' */
lip->lookahead_yylval= lip->yylval;
lip->yylval= NULL;
lip->lookahead_token= token;
yylloc->cpp.end= lip->get_cpp_ptr();
yylloc->raw.end= lip->get_ptr();
lip->add_digest_token(WITH, yylval);
return WITH;
}
break;
}
... ...
}
语法分析
Bison 和词法分析的函数接口是 yylex(),在需要的时候调用 yylex() 获取词法解析的数据,并完成自己的语法解析。
正常来说,Bison 的实际入口函数应该是 yyparse() ,而在 MySQL 中通过宏定义,将 yyparse() 替换为 MYSQLParse();如上所述,实际调用的仍然是 yyparse() 。
另外,我们可以根据 Bison 中的 Action 操作来查看 MySQL 解析结果的存储结构。
调试
在这里通过考察存储的 WHERE 数据结构来查看语法解析的结果。
(gdb) attach PID
(gdb) set print pretty on # 设置显示样式
(gdb) b mysql_execute_command # 可以用来查看所有的SQL
(gdb) p thd->lex->select_lex
(gdb) p ((Item_cond*)thd->lex->select_lex->where)->list # 查看WHERE中的list
(gdb) detach
MYSQLlex
(gdb) bt
#0 MYSQLlex (yylval=yylval@entry=0x7f6333efedf0, yylloc=yylloc@entry=0x7f6333efedb0, thd=thd@entry=0x7f5f78013d90)
at /data2/jenkins/workspace/stonedb5.7-zsl-centos7.9-30-238/sql/sql_:1309
#1 0x0000000000f9cadc in MYSQLparse (YYTHD=YYTHD@entry=0x7f5f78013d90) at /data2/jenkins/workspace/stonedb5.7-zsl-centos7.9-30-238/build/build_only/sql/sql_:18568
#2 0x0000000000ec17b1 in parse_sql (thd=thd@entry=0x7f5f78013d90, parser_state=parser_state@entry=0x7f6333f00fe0, creation_ctx=creation_ctx@entry=0x0)
at /data2/jenkins/workspace/stonedb5.7-zsl-centos7.9-30-238/sql/sql_:7178
#3 0x0000000000ec1d24 in mysql_parse (thd=thd@entry=0x7f5f78013d90, parser_state=parser_state@entry=0x7f6333f00fe0)
at /data2/jenkins/workspace/stonedb5.7-zsl-centos7.9-30-238/sql/sql_:5501
#4 0x0000000000ec2a04 in dispatch_command (thd=thd@entry=0x7f5f78013d90, com_data=com_data@entry=0x7f6333f01640, command=COM_QUERY)
at /data2/jenkins/workspace/stonedb5.7-zsl-centos7.9-30-238/sql/sql_:1495
#5 0x0000000000ec4450 in do_command (thd=thd@entry=0x7f5f78013d90) at /data2/jenkins/workspace/stonedb5.7-zsl-centos7.9-30-238/sql/sql_:1034
#6 0x0000000000f85270 in handle_connection (arg=arg@entry=0x6939ea0) at /data2/jenkins/workspace/stonedb5.7-zsl-centos7.9-30-238/sql/conn_handler/connection_handler_per_:313
#7 0x000000000144f224 in pfs_spawn_thread (arg=0x68e6be0) at /data2/jenkins/workspace/stonedb5.7-zsl-centos7.9-30-238/storage/perfschema/:2197
#8 0x00007f637eeafea5 in start_thread (arg=0x7f6333f02700) at pthread_create.c:307
#9 0x00007f637c3f8b0d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111
(gdb) p *yylloc
$18 = {
cpp = {
start = 0x7f5f78c6c760 "SELECT\n AS mderatemoney,\ncoalesce(sum(salde.ideratenums),\n0) AS ideratenums,\ncoalesce(sum(salde.mhandcharge),\n0) AS mhandcharge,\nsaletype.isettlementid AS isettlementid,\nsale.bysalesvouchertype AS bys"...,
end = 0x7f5f78c6c766 "\n AS mderatemoney,\ncoalesce(sum(salde.ideratenums),\n0) AS ideratenums,\ncoalesce(sum(salde.mhandcharge),\n0) AS mhandcharge,\nsaletype.isettlementid AS isettlementid,\nsale.bysalesvouchertype AS bysalesvo"...
},
raw = {
start = 0x7f5f78c6bbc0 "SELECT\nsale.usid AS usid,\ncus.corpname AS corpname,\nsale.iscenicid AS iscenicid,\nsalde.itickettypeid AS itickettypeid,\nprice.icrowdkindid AS icrowdkindid,\nsalde.mactualsaleprice AS mactualsaleprice,\ns"...,
end = 0x7f5f78c6bbc6 "\nsale.usid AS usid,\ncus.corpname AS corpname,\nsale.iscenicid AS iscenicid,\nsalde.itickettypeid AS itickettypeid,\nprice.icrowdkindid AS icrowdkindid,\nsalde.mactualsaleprice AS mactualsaleprice,\nsum(sal"...
}
}
lex_start
(gdb) bt
#0 lex_start (thd=thd@entry=0x7f4be8002c10) at /data2/jenkins/workspace/stonedb5.7-zsl-centos7.9-30-238/sql/sql_:507
#1 0x0000000000ec1b84 in mysql_parse (thd=thd@entry=0x7f4be8002c10, parser_state=parser_state@entry=0x7f4fabd52fe0)
at /data2/jenkins/workspace/stonedb5.7-zsl-centos7.9-30-238/sql/sql_:5484
#2 0x0000000000ec2a04 in dispatch_command (thd=thd@entry=0x7f4be8002c10, com_data=com_data@entry=0x7f4fabd53640, command=COM_QUERY)
at /data2/jenkins/workspace/stonedb5.7-zsl-centos7.9-30-238/sql/sql_:1495
#3 0x0000000000ec4450 in do_command (thd=thd@entry=0x7f4be8002c10) at /data2/jenkins/workspace/stonedb5.7-zsl-centos7.9-30-238/sql/sql_:1034
#4 0x0000000000f85270 in handle_connection (arg=arg@entry=0x6834710) at /data2/jenkins/workspace/stonedb5.7-zsl-centos7.9-30-238/sql/conn_handler/connection_handler_per_:313
#5 0x000000000144f224 in pfs_spawn_thread (arg=0x6829830) at /data2/jenkins/workspace/stonedb5.7-zsl-centos7.9-30-238/storage/perfschema/:2197
#6 0x00007f4ff6d01ea5 in start_thread (arg=0x7f4fabd54700) at pthread_create.c:307
#7 0x00007f4ff424ab0d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111
mysql_execute_command
(gdb) bt
#0 mysql_execute_command (thd=thd@entry=0x7f4be8002c10, first_level=first_level@entry=true) at /data2/jenkins/workspace/stonedb5.7-zsl-centos7.9-30-238/sql/sql_:2461
#1 0x0000000000ec1f05 in mysql_parse (thd=thd@entry=0x7f4be8002c10, parser_state=parser_state@entry=0x7f4fabd52fe0)
at /data2/jenkins/workspace/stonedb5.7-zsl-centos7.9-30-238/sql/sql_:5621
#2 0x0000000000ec2a04 in dispatch_command (thd=thd@entry=0x7f4be8002c10, com_data=com_data@entry=0x7f4fabd53640, command=COM_QUERY)
at /data2/jenkins/workspace/stonedb5.7-zsl-centos7.9-30-238/sql/sql_:1495
#3 0x0000000000ec4450 in do_command (thd=thd@entry=0x7f4be8002c10) at /data2/jenkins/workspace/stonedb5.7-zsl-centos7.9-30-238/sql/sql_:1034
#4 0x0000000000f85270 in handle_connection (arg=arg@entry=0x6834710) at /data2/jenkins/workspace/stonedb5.7-zsl-centos7.9-30-238/sql/conn_handler/connection_handler_per_:313
#5 0x000000000144f224 in pfs_spawn_thread (arg=0x6829830) at /data2/jenkins/workspace/stonedb5.7-zsl-centos7.9-30-238/storage/perfschema/:2197
#6 0x00007f4ff6d01ea5 in start_thread (arg=0x7f4fabd54700) at pthread_create.c:307
#7 0x00007f4ff424ab0d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111
(gdb) p *thd->lex->select_lex
$2 = {<Sql_alloc> = {<No data fields>}, _vptr.st_select_lex = 0x22362e8 <vtable for st_select_lex+16>, next = 0x0, prev = 0x7f4be8007580, master = 0x7f4be8007568, slave = 0x0,
link_next = 0x0, link_prev = 0x7f4be8004e68, m_query_result = 0x0, m_base_options = 0, m_active_options = 0, sql_cache = st_select_lex::SQL_CACHE_UNSPECIFIED, uncacheable = 0 '\000',
linkage = UNSPECIFIED_TYPE, no_table_names_allowed = false, context = {<Sql_alloc> = {<No data fields>}, outer_context = 0x0, next_context = 0x0, table_list = 0x7f4be8008150,
first_name_resolution_table = 0x7f4be8008150, last_name_resolution_table = 0x0, select_lex = 0x7f4be8007280, view_error_handler = false, view_error_handler_arg = 0x0,
resolve_in_select_list = true, security_ctx = 0x0}, first_context = 0x7f4be80072e0, resolve_place = st_select_lex::RESOLVE_NONE, resolve_nest = 0x0, semijoin_disallowed = false,
db = 0x0, m_where_cond = 0x7f4be8008818, m_having_cond = 0x0, cond_value = Item::COND_UNDEF, having_value = Item::COND_UNDEF, parent_lex = 0x7f4be8004d88, olap = UNSPECIFIED_OLAP_TYPE,
table_list = {<Sql_alloc> = {<No data fields>}, elements = 1, first = 0x7f4be8008150, next = 0x7f4be8008150}, group_list = {<Sql_alloc> = {<No data fields>}, elements = 0, first = 0x0,
next = 0x7f4be80073a0}, group_list_ptrs = 0x0, item_list = {<base_list> = {<Sql_alloc> = {<No data fields>}, first = 0x7f4be8007cf8, last = 0x7f4be8007cf8,
elements = 1}, <No data fields>}, is_item_list_lookup = false, hidden_group_field_count = 0, fields_list = @0x7f4be80073b8,
all_fields = {<base_list> = {<Sql_alloc> = {<No data fields>}, first = 0x2333bd0 <end_of_list>, last = 0x7f4be80073e0, elements = 0}, <No data fields>}, ftfunc_list = 0x7f4be8007400,
ftfunc_list_alloc = {<base_list> = {<Sql_alloc> = {<No data fields>}, first = 0x2333bd0 <end_of_list>, last = 0x7f4be8007400, elements = 0}, <No data fields>}, join = 0x0,
top_join_list = {<base_list> = {<Sql_alloc> = {<No data fields>}, first = 0x7f4be80086d8, last = 0x7f4be80086d8, elements = 1}, <No data fields>}, join_list = 0x7f4be8007420,
embedding = 0x0, sj_nests = {<base_list> = {<Sql_alloc> = {<No data fields>}, first = 0x2333bd0 <end_of_list>, last = 0x7f4be8007448, elements = 0}, <No data fields>}, leaf_tables = 0x0,
leaf_table_count = 0, derived_table_count = 0, materialized_derived_table_count = 0, has_sj_nests = false, partitioned_table_count = 0, order_list = {<Sql_alloc> = {<No data fields>},
elements = 0, first = 0x0, next = 0x7f4be8007488}, order_list_ptrs = 0x0, select_limit = 0x0, offset_limit = 0x0, ref_pointer_array = {m_array = 0x0, m_size = 0}, ref_ptrs = {
m_array = 0x0, m_size = 0}, select_n_having_items = 1, cond_count = 0, between_count = 0, max_equal_elems = 0, select_n_where_fields = 2, parsing_place = CTX_NONE, in_sum_expr = 0,
with_sum_func = false, n_sum_items = 0, n_child_sum_items = 0, select_number = 1, nest_level = 0, inner_sum_func_list = 0x0, with_wild = 1, braces = false, having_fix_field = false,
group_fix_field = false, inner_refs_list = {<base_list> = {<Sql_alloc> = {<No data fields>}, first = 0x2333bd0 <end_of_list>, last = 0x7f4be8007510, elements = 0}, <No data fields>},
explicit_limit = false, subquery_in_having = false, first_execution = true, sj_pullout_done = false, exclude_from_table_unique_test = false, allow_merge_derived = true,
prev_join_using = 0x0, select_list_tables = 0, outer_join = 0, opt_hints_qb = 0x0, m_agg_func_used = false, m_json_agg_func_used = false, static type_str = {0x199fa1d "NONE",
0x1990b2a "PRIMARY", 0x1902c33 "SIMPLE", 0x1991b25 "DERIVED", 0x19241c8 "SUBQUERY", 0x1a651f3 "UNION", 0x1991b2d "UNION RESULT", 0x1991b3a "MATERIALIZED"}, sj_candidates = 0x0,
hidden_order_field_count = 0}
p *(Item_cond*)thd->lex->
参考
Flex/Bison
关于最原始的论文,可以参考 Lex - A Lexical Analyzer Generator ,以及 Yacc: Yet Another Compiler-Compiler 。
对于 Lex 和 Yacc 来说,比较经典的入门可以参考 Lex & Yacc Tutorial,其中包括了如何编写一个计算器,以及相关的调试等信息;也可以参考 本地文档,以及相关的 源码 。
关于总体介绍可以参考 Lex and YACC primer
关于调试方法可以参考 Understanding Your Parser