摘要:
mysql-innodb存储引擎核心处理
核心函数:
srv_start:
dberr_t srv_start(bool create_new_db) {
lsn_t flushed_lsn;
/* just for assertions */
lsn_t previous_lsn;
/* output from call to create_log_files(...) */
lsn_t new_checkpoint_lsn = 0;
page_no_t sum_of_data_file_sizes;
page_no_t tablespace_size_in_header;
dberr_t err;
uint32_t srv_n_log_files_found = srv_n_log_files;
mtr_t mtr;
purge_pq_t *purge_queue;
char logfilename[10000];
char *logfile0 = nullptr;
size_t dirnamelen;
unsigned i = 0;
assert(srv_dict_metadata == nullptr);
/* Reset the start state. */
srv_start_state = SRV_START_STATE_NONE;
#ifdef UNIV_LINUX
#ifdef HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE
ib::info(ER_IB_MSG_1107);
#else
ib::info(ER_IB_MSG_1108);
#endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE */
#endif /* UNIV_LINUX */
if (sizeof(ulint) != sizeof(void *)) {
ib::error(ER_IB_MSG_1109, sizeof(ulint), sizeof(void *));
}
if (srv_is_upgrade_mode) {
if (srv_read_only_mode) {
ib::error(ER_IB_MSG_1110);
return (srv_init_abort(DB_ERROR));
}
if (srv_force_recovery != 0) {
ib::error(ER_IB_MSG_1111);
return (srv_init_abort(DB_ERROR));
}
}
#ifdef UNIV_DEBUG
ib::info(ER_IB_MSG_1112) << "!!!!!!!! UNIV_DEBUG switched on !!!!!!!!!";
#endif
#ifdef UNIV_IBUF_DEBUG
ib::info(ER_IB_MSG_1113) << "!!!!!!!! UNIV_IBUF_DEBUG switched on !!!!!!!!!";
#ifdef UNIV_IBUF_COUNT_DEBUG
ib::info(ER_IB_MSG_1114)
<< "!!!!!!!! UNIV_IBUF_COUNT_DEBUG switched on !!!!!!!!!";
ib::error(ER_IB_MSG_1115)
<< "Crash recovery will fail with UNIV_IBUF_COUNT_DEBUG";
#endif
#endif
#ifdef UNIV_LOG_LSN_DEBUG
ib::info(ER_IB_MSG_1116)
<< "!!!!!!!! UNIV_LOG_LSN_DEBUG switched on !!!!!!!!!";
#endif /* UNIV_LOG_LSN_DEBUG */
#if defined(COMPILER_HINTS_ENABLED)
ib::info(ER_IB_MSG_1117) << "Compiler hints enabled.";
#endif /* defined(COMPILER_HINTS_ENABLED) */
ib::info(ER_IB_MSG_1119) << MUTEX_TYPE;
ib::info(ER_IB_MSG_1120) << IB_MEMORY_BARRIER_STARTUP_MSG;
if (srv_force_recovery > 0) {
ib::info(ER_IB_MSG_1121) << "!!! innodb_force_recovery is set to "
<< srv_force_recovery << " !!!";
}
#ifndef HAVE_MEMORY_BARRIER
#if defined __i386__ || defined __x86_64__ || defined _M_IX86 || \
defined _M_X64 || defined _WIN32
#else
ib::warn(ER_IB_MSG_1122);
#endif /* IA32 or AMD64 */
#endif /* HAVE_MEMORY_BARRIER */
#ifdef UNIV_ZIP_DEBUG
ib::info(ER_IB_MSG_1123, ZLIB_VERSION) << " with validation";
#else
ib::info(ER_IB_MSG_1123, ZLIB_VERSION);
#endif /* UNIV_ZIP_DEBUG */
#ifdef UNIV_ZIP_COPY
ib::info(ER_IB_MSG_1124) << "and extra copying";
#endif /* UNIV_ZIP_COPY */
/* Since InnoDB does not currently clean up all its internal data
structures in MySQL Embedded Server Library server_end(), we
print an error message if someone tries to start up InnoDB a
second time during the process lifetime. */
if (srv_start_has_been_called) {
ib::error(ER_IB_MSG_1125);
}
srv_start_has_been_called = true;
srv_is_being_started = true;
#ifdef HAVE_PSI_STAGE_INTERFACE
/* Register performance schema stages before any real work has been
started which may need to be instrumented. */
mysql_stage_register("innodb", srv_stages, UT_ARR_SIZE(srv_stages));
#endif /* HAVE_PSI_STAGE_INTERFACE */
/* Switch latching order checks on in , if
--innodb-sync-debug=false (default) */
ut_d(sync_check_enable());
srv_boot();
ib::info(ER_IB_MSG_1126)
<< "Using "
<< (ut_crc32_cpu_enabled ? (ut_poly_mul_cpu_enabled
? "hardware accelerated crc32 and "
"polynomial multiplication."
: "hardware accelerated crc32 and "
"software polynomial multiplication.")
: "software crc32.");
os_create_block_cache();
fil_init(innobase_get_open_files_limit());
/* This is the default directory for IBD and IBU files. Put it first
in the list of known directories. */
fil_set_scan_dir(MySQL_datadir_path.path());
/* Add --innodb-data-home-dir as a known location for IBD and IBU files
if it is not already there. */
ut_ad(srv_data_home != nullptr && *srv_data_home != '\0');
fil_set_scan_dir(Fil_path::remove_quotes(srv_data_home));
/* Add --innodb-directories as known locations for IBD and IBU files. */
if (srv_innodb_directories != nullptr && *srv_innodb_directories != 0) {
fil_set_scan_dirs(Fil_path::remove_quotes(srv_innodb_directories));
}
/* Note whether the undo path is different (not the same or under)
from all other known directories. If so, this will allow us to keep
IBD files out of this unique undo location.*/
MySQL_undo_path_is_unique = !fil_path_is_known(MySQL_undo_path.path());
/* For the purpose of file discovery at startup, we need to scan
--innodb-undo-directory also if it is different from the locations above. */
if (MySQL_undo_path_is_unique) {
fil_set_scan_dir(Fil_path::remove_quotes(MySQL_undo_path));
}
ib::info(ER_IB_MSG_378) << "Directories to scan '" << fil_get_dirs() << "'";
/* Must replace clone files before scanning directories. When
clone replaces current database, cloned files are moved to data files
at this stage. */
err = clone_init();
if (err != DB_SUCCESS) {
return (srv_init_abort(err));
}
err = fil_scan_for_tablespaces();
if (err != DB_SUCCESS) {
return (srv_init_abort(err));
}
if (!srv_read_only_mode) {
mutex_create(LATCH_ID_SRV_MONITOR_FILE, &srv_monitor_file_mutex);
if (srv_innodb_status) {
srv_monitor_file_name = static_cast<char *>(ut::malloc_withkey(
UT_NEW_THIS_FILE_PSI_KEY,
MySQL_datadir_path.len() + 20 + sizeof "/innodb_status."));
sprintf(srv_monitor_file_name, "%s/innodb_status." ULINTPF,
static_cast<const char *>(MySQL_datadir_path),
os_proc_get_number());
srv_monitor_file = fopen(srv_monitor_file_name, "w+");
if (!srv_monitor_file) {
ib::error(ER_IB_MSG_1127, srv_monitor_file_name, strerror(errno));
return (srv_init_abort(DB_ERROR));
}
} else {
srv_monitor_file_name = nullptr;
srv_monitor_file = os_file_create_tmpfile();
if (!srv_monitor_file) {
return (srv_init_abort(DB_ERROR));
}
}
mutex_create(LATCH_ID_SRV_MISC_TMPFILE, &srv_misc_tmpfile_mutex);
srv_misc_tmpfile = os_file_create_tmpfile();
if (!srv_misc_tmpfile) {
return (srv_init_abort(DB_ERROR));
}
}
srv_n_file_io_threads = srv_n_read_io_threads;
srv_n_file_io_threads += srv_n_write_io_threads;
if (!srv_read_only_mode) {
/* Add the log and ibuf IO threads. */
srv_n_file_io_threads += 2;
} else {
ib::info(ER_IB_MSG_1128);
}
ut_a(srv_n_file_io_threads <= SRV_MAX_N_IO_THREADS);
if (!os_aio_init(srv_n_read_io_threads, srv_n_write_io_threads,
SRV_MAX_N_PENDING_SYNC_IOS)) {
ib::error(ER_IB_MSG_1129);
return (srv_init_abort(DB_ERROR));
}
double size;
char unit;
if (srv_buf_pool_size >= 1024 * 1024 * 1024) {
size = ((double)srv_buf_pool_size) / (1024 * 1024 * 1024);
unit = 'G';
} else {
size = ((double)srv_buf_pool_size) / (1024 * 1024);
unit = 'M';
}
double chunk_size;
char chunk_unit;
if (srv_buf_pool_chunk_unit >= 1024 * 1024 * 1024) {
chunk_size = srv_buf_pool_chunk_unit / 1024.0 / 1024 / 1024;
chunk_unit = 'G';
} else {
chunk_size = srv_buf_pool_chunk_unit / 1024.0 / 1024;
chunk_unit = 'M';
}
ib::info(ER_IB_MSG_1130, size, unit, srv_buf_pool_instances, chunk_size,
chunk_unit);
err = buf_pool_init(srv_buf_pool_size, srv_buf_pool_instances);
if (err != DB_SUCCESS) {
ib::error(ER_IB_MSG_1131);
return (srv_init_abort(DB_ERROR));
}
ib::info(ER_IB_MSG_1132);
#ifdef UNIV_DEBUG
/* We have observed deadlocks with a 5MB buffer pool but
the actual lower limit could very well be a little higher. */
if (srv_buf_pool_size <= 5 * 1024 * 1024) {
ib::info(ER_IB_MSG_1133, ulonglong{srv_buf_pool_size / 1024 / 1024});
}
#endif /* UNIV_DEBUG */
fsp_init();
pars_init();
recv_sys_create();
recv_sys_init(buf_pool_get_curr_size());
trx_sys_create();
lock_sys_create(srv_lock_table_size);
srv_start_state_set(SRV_START_STATE_LOCK_SYS);
/* Create i/o-handler threads: */
/* For read only mode, we don't need ibuf and log I/O thread.
Please see innobase_start_or_create_for_mysql() */
ulint start = (srv_read_only_mode) ? 0 : 2;
/* Sequence number displayed in the thread os name. */
PSI_thread_seqnum pfs_seqnum;
for (ulint t = 0; t < srv_n_file_io_threads; ++t) {
IB_thread thread;
if (t < start) {
if (t == 0) {
thread = os_thread_create(io_ibuf_thread_key, 0, io_handler_thread, t);
} else {
ut_ad(t == 1);
thread = os_thread_create(io_log_thread_key, 0, io_handler_thread, t);
}
} else if (t >= start && t < (start + srv_n_read_io_threads)) {
/* Numbering for ib_io_rd-NN starts with N=1. */
pfs_seqnum = t + 1 - start;
thread = os_thread_create(io_read_thread_key, pfs_seqnum,
io_handler_thread, t);
} else if (t >= (start + srv_n_read_io_threads) &&
t < (start + srv_n_read_io_threads + srv_n_write_io_threads)) {
/* Numbering for ib_io_wr-NN starts with N=1. */
pfs_seqnum = t + 1 - start - srv_n_read_io_threads;
thread = os_thread_create(io_write_thread_key, pfs_seqnum,
io_handler_thread, t);
} else {
/* Dead code ? */
thread = os_thread_create(io_handler_thread_key, t, io_handler_thread, t);
}
thread.start();
}
/* Even in read-only mode there could be flush job generated by
intrinsic table operations. */
buf_flush_page_cleaner_init(srv_n_page_cleaners);
srv_start_state_set(SRV_START_STATE_IO);
srv_startup_is_before_trx_rollback_phase = !create_new_db;
if (create_new_db) {
recv_sys_free();
}
/* Open or create the data files. */
page_no_t sum_of_new_sizes;
err = srv_sys_space.open_or_create(false, create_new_db, &sum_of_new_sizes,
&flushed_lsn);
/* FIXME: This can be done earlier, but we now have to wait for
checking of system tablespace. */
dict_persist_init();
switch (err) {
case DB_SUCCESS:
break;
case DB_CANNOT_OPEN_FILE:
ib::error(ER_IB_MSG_1134);
[[fallthrough]];
default:
/* Other errors might come from
Datafile::validate_first_page() */
return (srv_init_abort(err));
}
dirnamelen = strlen(srv_log_group_home_dir);
ut_a(dirnamelen < (sizeof logfilename) - 10 - sizeof "ib_logfile");
memcpy(logfilename, srv_log_group_home_dir, dirnamelen);
/* Add a path separator if needed. */
if (dirnamelen && logfilename[dirnamelen - 1] != OS_PATH_SEPARATOR) {
logfilename[dirnamelen++] = OS_PATH_SEPARATOR;
}
srv_log_file_size_requested = srv_log_file_size;
if (create_new_db) {
ut_a(buf_are_flush_lists_empty_validate());
flushed_lsn = LOG_START_LSN;
err = create_log_files(logfilename, dirnamelen, flushed_lsn, 0, logfile0,
new_checkpoint_lsn);
if (err != DB_SUCCESS) {
return (srv_init_abort(err));
}
flushed_lsn = new_checkpoint_lsn;
ut_a(new_checkpoint_lsn == LOG_START_LSN + LOG_BLOCK_HDR_SIZE);
} else {
for (i = 0; i < SRV_N_LOG_FILES_CLONE_MAX; i++) {
os_offset_t size;
os_file_stat_t stat_info;
sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
err = os_file_get_status(logfilename, &stat_info, false,
srv_read_only_mode);
if (err == DB_NOT_FOUND) {
if (i == 0) {
if (flushed_lsn < static_cast<lsn_t>(1000)) {
ib::error(ER_IB_MSG_1135);
return (srv_init_abort(DB_ERROR));
}
err = create_log_files(logfilename, dirnamelen, flushed_lsn,
SRV_N_LOG_FILES_CLONE_MAX, logfile0,
new_checkpoint_lsn);
if (err != DB_SUCCESS) {
return (srv_init_abort(err));
}
create_log_files_rename(logfilename, dirnamelen, new_checkpoint_lsn,
logfile0);
/* Suppress the message about
crash recovery. */
flushed_lsn = new_checkpoint_lsn;
ut_a(log_sys != nullptr);
goto files_checked;
} else if (i < 2) {
/* must have at least 2 log files */
ib::error(ER_IB_MSG_1136);
return (srv_init_abort(err));
}
/* opened all files */
break;
}
if (!srv_file_check_mode(logfilename)) {
return (srv_init_abort(DB_ERROR));
}
err = open_log_file(&files[i], logfilename, &size);
if (err != DB_SUCCESS) {
return (srv_init_abort(err));
}
ut_a(size != (os_offset_t)-1);
if (size & ((1 << UNIV_PAGE_SIZE_SHIFT) - 1)) {
ib::error(ER_IB_MSG_1137, logfilename, ulonglong{size});
return (srv_init_abort(DB_ERROR));
}
if (i == 0) {
srv_log_file_size = size;
#ifndef UNIV_DEBUG_DEDICATED
} else if (size != srv_log_file_size) {
#else
} else if (!srv_dedicated_server && size != srv_log_file_size) {
#endif /* UNIV_DEBUG_DEDICATED */
ib::error(ER_IB_MSG_1138, logfilename, ulonglong{size},
srv_log_file_size);
return (srv_init_abort(DB_ERROR));
}
}
srv_n_log_files_found = i;
/* Create the in-memory file space objects. */
sprintf(logfilename + dirnamelen, "ib_logfile%u", 0);
/* Disable the doublewrite buffer for log files. */
fil_space_t *log_space = fil_space_create(
"innodb_redo_log", dict_sys_t::s_log_space_first_id,
fsp_flags_set_page_size(0, univ_page_size), FIL_TYPE_LOG);
ut_ad(fil_validate());
ut_a(log_space != nullptr);
/* srv_log_file_size is measured in bytes */
ut_a(srv_log_file_size / UNIV_PAGE_SIZE <= PAGE_NO_MAX);
for (unsigned j = 0; j < i; j++) {
sprintf(logfilename + dirnamelen, "ib_logfile%u", j);
const ulonglong file_pages = srv_log_file_size / UNIV_PAGE_SIZE;
if (fil_node_create(logfilename, static_cast<page_no_t>(file_pages),
log_space, false, false) == nullptr) {
return (srv_init_abort(DB_ERROR));
}
}
if (!log_sys_init(i, srv_log_file_size, dict_sys_t::s_log_space_first_id)) {
return (srv_init_abort(DB_ERROR));
}
/* Read the first log file header to get the encryption
information if it exist. */
if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO && !log_read_encryption()) {
return (srv_init_abort(DB_ERROR));
}
}
ut_a(log_sys != nullptr);
/* Open all log files and data files in the system
tablespace: we keep them open until database shutdown.
When we use goto files_checked; we don't need the line below,
because in such case, it's been already called at the end of
create_log_files_rename(). */
fil_open_log_and_system_tablespace_files();
files_checked:
if (dblwr::enabled && ((err = dblwr::open(create_new_db)) != DB_SUCCESS)) {
return (srv_init_abort(err));
}
arch_init();
mtr_t::s_logging.init();
if (create_new_db) {
ut_a(!srv_read_only_mode);
ut_a(log_sys->last_checkpoint_lsn.load() ==
LOG_START_LSN + LOG_BLOCK_HDR_SIZE);
ut_a(flushed_lsn == LOG_START_LSN + LOG_BLOCK_HDR_SIZE);
log_start(*log_sys, 0, flushed_lsn, flushed_lsn);
log_start_background_threads(*log_sys);
err = srv_undo_tablespaces_init(true);
if (err != DB_SUCCESS) {
return (srv_init_abort(err));
}
mtr_start(&mtr);
bool ret = fsp_header_init(0, sum_of_new_sizes, &mtr, false);
mtr_commit(&mtr);
if (!ret) {
return (srv_init_abort(DB_ERROR));
}
/* To maintain backward compatibility we create only
the first rollback segment before the double write buffer.
All the remaining rollback segments will be created later,
after the double write buffers haves been created. */
trx_sys_create_sys_pages();
trx_purge_sys_mem_create();
purge_queue = trx_sys_init_at_db_start();
/* The purge system needs to create the purge view and
therefore requires that the trx_sys is inited. */
trx_purge_sys_initialize(srv_threads.m_purge_workers_n, purge_queue);
err = dict_create();
if (err != DB_SUCCESS) {
return (srv_init_abort(err));
}
srv_create_sdi_indexes();
previous_lsn = log_get_lsn(*log_sys);
buf_flush_sync_all_buf_pools();
log_stop_background_threads(*log_sys);
flushed_lsn = log_get_lsn(*log_sys);
ut_a(flushed_lsn == previous_lsn);
err = fil_write_flushed_lsn(flushed_lsn);
ut_a(err == DB_SUCCESS);
create_log_files_rename(logfilename, dirnamelen, new_checkpoint_lsn,
logfile0);
log_start_background_threads(*log_sys);
ut_a(buf_are_flush_lists_empty_validate());
/* We always create the legacy double write buffer to preserve the
expected page ordering of the system tablespace.
FIXME: Try and remove this requirement. */
err = dblwr::v1::create();
if (err != DB_SUCCESS) {
return srv_init_abort(err);
}
} else {
/* Load the reserved boundaries of the legacy dblwr buffer, this is
required to check for stray reads and writes trying to access this
reserved region in the sys tablespace.
FIXME: Try and remove this requirement. */
err = dblwr::v1::init();
if (err != DB_SUCCESS) {
return srv_init_abort(err);
}
/* Invalidate the buffer pool to ensure that we reread
the page that we read above, during recovery.
Note that this is not as heavy weight as it seems. At
this point there will be only ONE page in the buf_LRU
and there must be no page in the buf_flush list. */
buf_pool_invalidate();
/* We always try to do a recovery, even if the database had
been shut down normally: this is the normal startup path */
err = recv_recovery_from_checkpoint_start(*log_sys, flushed_lsn);
if (err == DB_SUCCESS) {
arch_page_sys->post_recovery_init();
/* Initialize the change buffer. */
err = dict_boot();
}
if (err != DB_SUCCESS) {
return (srv_init_abort(err));
}
ut_ad(clone_check_recovery_crashpoint(recv_sys->is_cloned_db));
/* We need to start log threads before asking to flush
all dirty pages. That's because some dirty pages could
be dirty because of ibuf merges. The ibuf merges could
have written log records to the log buffer. The redo
log has to be flushed up to the newest_modification of
a dirty page, before the page might be flushed to disk.
Hence we need the log_flusher thread which will flush
log records related to the ibuf merges, allowing to
flush the modified pages. That's why we need to start
the log threads before flushing dirty pages. */
if (!srv_read_only_mode) {
log_start_background_threads(*log_sys);
}
if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
/* Apply the hashed log records to the
respective file pages, for the last batch of
recv_group_scan_log_recs(). */
/* Don't allow IBUF operations for cloned database
recovery as it would add extra redo log and we may
not have enough margin. */
if (recv_sys->is_cloned_db) {
recv_apply_hashed_log_recs(*log_sys, false);
} else {
recv_apply_hashed_log_recs(*log_sys, true);
}
if (recv_sys->found_corrupt_log) {
err = DB_ERROR;
return (srv_init_abort(err));
}
DBUG_PRINT("ib_log", ("apply completed"));
/* Check and print if there were any tablespaces
which had redo log records but we couldn't apply
them because the filenames were missing. */
}
if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
/* Recovery complete, start verifying the
page LSN on read. */
recv_lsn_checks_on = true;
}
/* We have gone through the redo log, now check if all the
tablespaces were found and recovered. */
if (srv_force_recovery == 0 && fil_check_missing_tablespaces()) {
ib::error(ER_IB_MSG_1139);
/* Set the abort flag to true. */
auto p = recv_recovery_from_checkpoint_finish(*log_sys, true);
ut_a(p == nullptr);
return (srv_init_abort(DB_ERROR));
}
/* We have successfully recovered from the redo log. The
data dictionary should now be readable. */
if (recv_sys->found_corrupt_log) {
ib::warn(ER_IB_MSG_1140);
}
if (!srv_force_recovery && !srv_read_only_mode) {
buf_flush_sync_all_buf_pools();
}
srv_dict_metadata = recv_recovery_from_checkpoint_finish(*log_sys, false);
if (recv_sys->is_cloned_db && srv_dict_metadata != nullptr) {
ut::delete_(srv_dict_metadata);
srv_dict_metadata = nullptr;
}
/* We need to save the dynamic metadata collected from redo log to DD
buffer table here. This is to make sure that the dynamic metadata is not
lost by any future checkpoint. Since DD and data dictionary in memory
objects are not fully initialized at this point, the usual mechanism to
persist dynamic metadata at checkpoint wouldn't work. */
if (srv_dict_metadata != nullptr && !srv_dict_metadata->empty()) {
/* Open this table in case srv_dict_metadata should be applied to this
table before checkpoint. And because DD is not fully up yet, the table
can be opened by internal APIs. */
fil_space_t *space =
fil_space_acquire_silent(dict_sys_t::s_dict_space_id);
if (space == nullptr) {
dberr_t error =
fil_ibd_open(true, FIL_TYPE_TABLESPACE, dict_sys_t::s_dict_space_id,
predefined_flags, dict_sys_t::s_dd_space_name,
dict_sys_t::s_dd_space_file_name, true, false);
if (error != DB_SUCCESS) {
ib::error(ER_IB_MSG_1142);
return (srv_init_abort(DB_ERROR));
}
} else {
fil_space_release(space);
}
dict_persist->table_buffer =
ut::new_withkey<DDTableBuffer>(UT_NEW_THIS_FILE_PSI_KEY);
/* We write redo log here. We assume that there should be enough room in
log files, supposing log_free_check() works fine before crash. */
srv_dict_metadata->store();
/* Flush logs to persist the changes. */
log_buffer_flush_to_disk(*log_sys);
}
log_sys->m_allow_checkpoints.store(true, std::memory_order_release);
if (!srv_force_recovery && !recv_sys->found_corrupt_log &&
(srv_log_file_size_requested != srv_log_file_size ||
srv_n_log_files_found != srv_n_log_files)) {
/* Prepare to replace the redo log files. */
if (srv_read_only_mode) {
ib::error(ER_IB_MSG_1141);
return (srv_init_abort(DB_READ_ONLY));
}
/* Prepare to delete the old redo log files */
flushed_lsn = srv_prepare_to_delete_redo_log_files(i);
log_stop_background_threads(*log_sys);
/* Make sure redo log is flushed after checkpoint thread is stopped. On
windows, Fil_shard::close_file intermittently hits mismatching
modification_counter and flush_counter assert while closing redo files.
This is likely because we flush redo in log_fsync (during checkpoint)
conditionally based on flush mode. Thus flush counter could remain behind
if checkpoint occurs after flush in srv_prepare_to_delete_redo_log_files.
This call is idempotent and should be harmless here. */
fil_flush_file_redo();
/* Prohibit redo log writes from any other
threads until creating a log checkpoint at the
end of create_log_files(). */
ut_d(log_sys->disable_redo_writes = true);
ut_ad(!buf_pool_check_no_pending_io());
RECOVERY_CRASH(3);
/* Stamp the LSN to the data files. */
err = fil_write_flushed_lsn(flushed_lsn);
ut_a(err == DB_SUCCESS);
RECOVERY_CRASH(4);
/* Close and free the redo log files, so that
we can replace them. */
fil_close_log_files(true);
RECOVERY_CRASH(5);
log_sys_close();
/* Finish clone file recovery before creating new log files. We
roll forward to remove any intermediate files here. */
clone_files_recovery(true);
ib::info(ER_IB_MSG_1143);
srv_log_file_size = srv_log_file_size_requested;
err =
create_log_files(logfilename, dirnamelen, flushed_lsn,
srv_n_log_files_found, logfile0, new_checkpoint_lsn);
if (err != DB_SUCCESS) {
return (srv_init_abort(err));
}
create_log_files_rename(logfilename, dirnamelen, new_checkpoint_lsn,
logfile0);
ut_d(log_sys->disable_redo_writes = false);
flushed_lsn = new_checkpoint_lsn;
log_start(*log_sys, 0, flushed_lsn, flushed_lsn);
log_start_background_threads(*log_sys);
} else if (recv_sys->is_cloned_db) {
/* Reset creator for log */
log_stop_background_threads(*log_sys);
log_files_header_read(*log_sys, 0);
lsn_t start_lsn;
start_lsn =
mach_read_from_8(log_sys->checkpoint_buf + LOG_HEADER_START_LSN);
log_files_header_read(*log_sys, LOG_CHECKPOINT_1);
log_files_header_flush(*log_sys, 0, start_lsn);
log_start_background_threads(*log_sys);
}
if (sum_of_new_sizes > 0) {
/* New data file(s) were added */
mtr_start(&mtr);
fsp_header_inc_size(0, sum_of_new_sizes, &mtr);
mtr_commit(&mtr);
/* Immediately write the log record about
increased tablespace size to disk, so that it
is durable even if mysqld would crash
quickly */
log_buffer_flush_to_disk(*log_sys);
}
err = srv_undo_tablespaces_init(false);
if (err != DB_SUCCESS && srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) {
return (srv_init_abort(err));
}
trx_purge_sys_mem_create();
/* The purge system needs to create the purge view and
therefore requires that the trx_sys is inited. */
purge_queue = trx_sys_init_at_db_start();
if (srv_is_upgrade_mode) {
if (!purge_queue->empty()) {
ib::info(ER_IB_MSG_1144);
srv_upgrade_old_undo_found = true;
}
/* Either the old or new undo tablespaces will
be deleted later depending on the value of
'failed_upgrade' in dd_upgrade_finish(). */
} else {
/* New undo tablespaces have been created.
Delete the old undo tablespaces and the references
to them in the TRX_SYS page. */
srv_undo_tablespaces_upgrade();
}
DBUG_EXECUTE_IF("check_no_undo", ut_ad(purge_queue->empty()););
/* The purge system needs to create the purge view and
therefore requires that the trx_sys and trx lists were
initialized in trx_sys_init_at_db_start(). */
trx_purge_sys_initialize(srv_threads.m_purge_workers_n, purge_queue);
}
/* Open temp-tablespace and keep it open until shutdown. */
err = srv_open_tmp_tablespace(create_new_db, &srv_tmp_space);
if (err != DB_SUCCESS) {
return (srv_init_abort(err));
}
err = ibt::open_or_create(create_new_db);
if (err != DB_SUCCESS) {
return (srv_init_abort(err));
}
/* Here the double write buffer has already been created and so
any new rollback segments will be allocated after the double
write buffer. The default segment should already exist.
We create the new segments only if it's a new database or
the database was shutdown cleanly. */
/* Note: When creating the extra rollback segments during an upgrade
we violate the latching order, even if the change buffer is empty.
We make an exception in and check srv_is_being_started
for that violation. It cannot create a deadlock because we are still
running in single threaded mode essentially. Only the IO threads
should be running at this stage. */
ut_a(srv_rollback_segments > 0);
ut_a(srv_rollback_segments <= TRX_SYS_N_RSEGS);
/* Make sure there are enough rollback segments in each tablespace
and that each rollback segment has an associated memory object.
If any of these rollback segments contain undo logs, load them into
the purge queue */
if (!trx_rseg_adjust_rollback_segments(srv_rollback_segments)) {
return (srv_init_abort(DB_ERROR));
}
/* Any undo tablespaces under construction are now fully built
with all needed rsegs. Delete the trunc.log files and clear the
construction list. */
srv_undo_tablespaces_mark_construction_done();
/* Now that all rsegs are ready for use, make them active. */
undo::spaces->s_lock();
for (auto undo_space : undo::spaces->m_spaces) {
if (!undo_space->is_empty()) {
undo_space->set_active();
}
}
undo::spaces->s_unlock();
/* Undo Tablespaces and Rollback Segments are ready. */
srv_startup_is_before_trx_rollback_phase = false;
if (!srv_read_only_mode) {
if (create_new_db) {
srv_buffer_pool_load_at_startup = FALSE;
}
/* Create the thread which watches the timeouts
for lock waits */
srv_threads.m_lock_wait_timeout = os_thread_create(
srv_lock_timeout_thread_key, 0, lock_wait_timeout_thread);
srv_threads.m_lock_wait_timeout.start();
/* Create the thread which warns of long semaphore waits */
srv_threads.m_error_monitor = os_thread_create(srv_error_monitor_thread_key,
0, srv_error_monitor_thread);
srv_threads.m_error_monitor.start();
/* Create the thread which prints InnoDB monitor info */
srv_threads.m_monitor =
os_thread_create(srv_monitor_thread_key, 0, srv_monitor_thread);
srv_threads.m_monitor.start();
srv_start_state_set(SRV_START_STATE_MONITOR);
}
srv_sys_tablespaces_open = true;
/* Rotate the encryption key for recovery. It's because
server could crash in middle of key rotation. Some tablespace
didn't complete key rotation. Here, we will resume the
rotation. */
if (!srv_read_only_mode && !create_new_db &&
srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
size_t fail_count = fil_encryption_rotate();
if (fail_count > 0) {
ib::info(ER_IB_MSG_1146)
<< "During recovery, fil_encryption_rotate() failed for "
<< fail_count << " tablespace(s).";
}
}
srv_is_being_started = false;
ut_a(trx_purge_state() == PURGE_STATE_INIT);
/* wake main loop of page cleaner up */
os_event_set(buf_flush_event);
sum_of_data_file_sizes = srv_sys_space.get_sum_of_sizes();
ut_a(sum_of_new_sizes != FIL_NULL);
tablespace_size_in_header = fsp_header_get_tablespace_size();
if (!srv_read_only_mode && !srv_sys_space.can_auto_extend_last_file() &&
sum_of_data_file_sizes != tablespace_size_in_header) {
ib::error(ER_IB_MSG_1147, ulong{tablespace_size_in_header},
ulong{sum_of_data_file_sizes});
if (srv_force_recovery == 0 &&
sum_of_data_file_sizes < tablespace_size_in_header) {
/* This is a fatal error, the tail of a tablespace is
missing */
ib::error(ER_IB_MSG_1148);
return (srv_init_abort(DB_ERROR));
}
}
if (!srv_read_only_mode && srv_sys_space.can_auto_extend_last_file() &&
sum_of_data_file_sizes < tablespace_size_in_header) {
ib::error(ER_IB_MSG_1149, ulong{tablespace_size_in_header},
ulong{sum_of_data_file_sizes});
if (srv_force_recovery == 0) {
ib::error(ER_IB_MSG_1150);
return (srv_init_abort(DB_ERROR));
}
}
/* Finish clone files recovery. This call is idempotent and is no op
if it is already done before creating new log files. */
clone_files_recovery(true);
ib::info(ER_IB_MSG_1151, INNODB_VERSION_STR,
ulonglong{log_get_lsn(*log_sys)});
return (DB_SUCCESS);
}