diff --git a/extra/mariabackup/backup_copy.cc b/extra/mariabackup/backup_copy.cc index 22a40e5fb1042..885b493a174ba 100644 --- a/extra/mariabackup/backup_copy.cc +++ b/extra/mariabackup/backup_copy.cc @@ -1772,7 +1772,7 @@ copy_back() if it exists. */ ds_tmp = ds_create(dst_dir, DS_TYPE_LOCAL); - if (!(ret = copy_or_move_file(ds_tmp, LOG_FILE_NAME, LOG_FILE_NAME, + if (!(ret = copy_or_move_file(ds_tmp, "ib_logfile0", "ib_logfile0", dst_dir, 1))) { goto cleanup; } @@ -1869,7 +1869,7 @@ copy_back() } /* skip the redo log (it was already copied) */ - if (!strcmp(filename, LOG_FILE_NAME)) { + if (!strcmp(filename, "ib_logfile0")) { continue; } diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc index 6083f382f947e..2fa1e4b29a2f7 100644 --- a/extra/mariabackup/xtrabackup.cc +++ b/extra/mariabackup/xtrabackup.cc @@ -2705,7 +2705,7 @@ static bool innodb_init() srv_log_group_home_dir= xtrabackup_target_dir; bool ret; - const std::string ib_logfile0{get_log_file_path()}; + const std::string ib_logfile0{log_sys.get_circular_path()}; os_file_delete_if_exists_func(ib_logfile0.c_str(), nullptr); os_file_t file= os_file_create_func(ib_logfile0.c_str(), OS_FILE_CREATE, @@ -5479,6 +5479,7 @@ static bool xtrabackup_backup_func() srv_n_purge_threads = 1; srv_read_only_mode = TRUE; + recv_sys.rpo = LSN_MAX; srv_operation = SRV_OPERATION_BACKUP; log_file_op = backup_file_op; @@ -5570,10 +5571,11 @@ static bool xtrabackup_backup_func() /* open the log file */ memset(&stat_info, 0, sizeof(MY_STAT)); - dst_log_file = ds_open(backup_datasinks.m_redo, LOG_FILE_NAME, &stat_info); + dst_log_file = + ds_open(backup_datasinks.m_redo, "ib_logfile0", &stat_info); if (dst_log_file == NULL) { - msg("Error: failed to open the target stream for '%s'.", - LOG_FILE_NAME); + msg("Error: failed to open the target stream" + " for 'ib_logfile0'."); goto fail; } diff --git a/mysql-test/include/innodb_encrypt_log.inc b/mysql-test/include/innodb_encrypt_log.inc index 5beebeae81f07..fd3e0ceea5758 100644 --- a/mysql-test/include/innodb_encrypt_log.inc +++ b/mysql-test/include/innodb_encrypt_log.inc @@ -2,3 +2,7 @@ # (see include/innodb_encrypt_log.combinations) --source include/have_innodb.inc +if ($MTR_COMBINATION_CRYPT) +{ +--source ../suite/encryption/include/skip_innodb_log_archive.inc +} diff --git a/mysql-test/mariadb-test-run.pl b/mysql-test/mariadb-test-run.pl index ee3412c9cd154..741ab3e49036a 100755 --- a/mysql-test/mariadb-test-run.pl +++ b/mysql-test/mariadb-test-run.pl @@ -326,7 +326,7 @@ END my $opt_debug_sync_timeout= 300; # Default timeout for WAIT_FOR actions. my $warn_seconds = 60; -my $rebootstrap_re= '--innodb[-_](?:page[-_]size|checksum[-_]algorithm|undo[-_]tablespaces|log[-_]group[-_]home[-_]dir|data[-_]home[-_]dir)|data[-_]file[-_]path|force_rebootstrap'; +my $rebootstrap_re= '--innodb[-_](?:page[-_]size|checksum[-_]algorithm|undo[-_]tablespaces|log[-_](group[-_]home[-_]dir|archive)|data[-_]home[-_]dir)|data[-_]file[-_]path|force_rebootstrap'; sub testcase_timeout ($) { return $opt_testcase_timeout * 60; } sub check_timeout ($) { return testcase_timeout($_[0]); } @@ -3145,7 +3145,7 @@ sub mysql_install_db { # need to be given to the bootstrap process as well as the # server process. foreach my $extra_opt ( @opt_extra_mysqld_opt ) { - if ($extra_opt =~ /--innodb/) { + if ($extra_opt =~ /--((loose|skip)[-_])*innodb/) { mtr_add_arg($args, $extra_opt); } } diff --git a/mysql-test/suite/encryption/include/skip_innodb_log_archive.inc b/mysql-test/suite/encryption/include/skip_innodb_log_archive.inc new file mode 100644 index 0000000000000..3fa44408a68a3 --- /dev/null +++ b/mysql-test/suite/encryption/include/skip_innodb_log_archive.inc @@ -0,0 +1,12 @@ +--disable_query_log +SET STATEMENT sql_log_bin=0 FOR +call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed\\."); +SET STATEMENT sql_log_bin=0 FOR +call mtr.add_suppression("InnoDB: Plugin initialization aborted"); +SET STATEMENT sql_log_bin=0 FOR +call mtr.add_suppression("InnoDB: ib_0.*\\.log does not match innodb_encrypt_log"); +--enable_query_log +if (`SELECT COUNT(*)=0 FROM information_schema.global_variables where variable_name='innodb_log_archive' and variable_value='OFF'`) +{ + --skip Test requires innodb_log_archive=OFF +} diff --git a/mysql-test/suite/encryption/r/innodb_encrypt_log_corruption.result b/mysql-test/suite/encryption/r/innodb_encrypt_log_corruption.result index 3c3e4831d8a0f..b7bdee10daa90 100644 --- a/mysql-test/suite/encryption/r/innodb_encrypt_log_corruption.result +++ b/mysql-test/suite/encryption/r/innodb_encrypt_log_corruption.result @@ -20,6 +20,13 @@ AND support IN ('YES', 'DEFAULT', 'ENABLED'); ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS FOUND 1 /InnoDB: Upgrade after a crash is not supported. This redo log was created before MariaDB 10\.2\.2, and we did not find a valid checkpoint/ in mysqld.1.err # empty redo log from before MariaDB 10.2.2 +# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-recovery-target=12345 +SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES +WHERE engine = 'innodb' +AND support IN ('YES', 'DEFAULT', 'ENABLED'); +COUNT(*) +0 +FOUND 1 /InnoDB: cannot fulfill innodb_log_recovery_target=12345!=/ in mysqld.1.err # restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_corruption --innodb-force-recovery=5 --innodb-log-file-size=4m SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' diff --git a/mysql-test/suite/encryption/t/bulk_insert.test b/mysql-test/suite/encryption/t/bulk_insert.test index ce7804cb4e067..94b40f0152232 100644 --- a/mysql-test/suite/encryption/t/bulk_insert.test +++ b/mysql-test/suite/encryption/t/bulk_insert.test @@ -1,4 +1,5 @@ --source include/have_innodb.inc +--source include/skip_innodb_log_archive.inc # innodb_encrypt_log --source include/have_sequence.inc --source include/have_file_key_management_plugin.inc diff --git a/mysql-test/suite/encryption/t/corrupted_during_recovery.test b/mysql-test/suite/encryption/t/corrupted_during_recovery.test index dabf06dd04789..e24e2c5425cbe 100644 --- a/mysql-test/suite/encryption/t/corrupted_during_recovery.test +++ b/mysql-test/suite/encryption/t/corrupted_during_recovery.test @@ -1,4 +1,5 @@ --source include/have_innodb.inc +--source ../../suite/innodb/include/no_checkpoint_prepare.inc --source include/have_file_key_management_plugin.inc --disable_query_log diff --git a/mysql-test/suite/encryption/t/debug_key_management.test b/mysql-test/suite/encryption/t/debug_key_management.test index 9638391e69058..3211687c0c294 100644 --- a/mysql-test/suite/encryption/t/debug_key_management.test +++ b/mysql-test/suite/encryption/t/debug_key_management.test @@ -1,4 +1,5 @@ -- source include/have_innodb.inc +-- source include/skip_innodb_log_archive.inc # innodb_encrypt_log -- source include/have_debug.inc -- source include/innodb_undo_tablespaces.inc -- source include/not_embedded.inc diff --git a/mysql-test/suite/encryption/t/doublewrite_debug.test b/mysql-test/suite/encryption/t/doublewrite_debug.test index 4f2215240441f..fc7580835027d 100644 --- a/mysql-test/suite/encryption/t/doublewrite_debug.test +++ b/mysql-test/suite/encryption/t/doublewrite_debug.test @@ -1,7 +1,9 @@ --source include/have_innodb.inc +--source ../../suite/innodb/include/skip_innodb_log_archive.inc #mariadb-backup --source include/have_debug.inc --source include/not_embedded.inc --source include/have_file_key_management_plugin.inc +--source ../../suite/innodb/include/no_checkpoint_prepare.inc call mtr.add_suppression("InnoDB: Encrypted page \\[page id: space=[1-9][0-9]*, page number=3\\] in file .*test.t[12]\\.ibd looks corrupted"); call mtr.add_suppression("InnoDB: Unable to apply log to corrupted page "); call mtr.add_suppression("InnoDB: Plugin initialization aborted"); diff --git a/mysql-test/suite/encryption/t/encrypt_and_grep.test b/mysql-test/suite/encryption/t/encrypt_and_grep.test index 648ad80780c93..988604db58ecd 100644 --- a/mysql-test/suite/encryption/t/encrypt_and_grep.test +++ b/mysql-test/suite/encryption/t/encrypt_and_grep.test @@ -1,4 +1,5 @@ -- source include/have_innodb.inc +-- source include/skip_innodb_log_archive.inc # innodb_encrypt_log -- source include/innodb_undo_tablespaces.inc -- source include/have_file_key_management_plugin.inc diff --git a/mysql-test/suite/encryption/t/encryption_force.test b/mysql-test/suite/encryption/t/encryption_force.test index 3c6f039184b96..28b49c866b489 100644 --- a/mysql-test/suite/encryption/t/encryption_force.test +++ b/mysql-test/suite/encryption/t/encryption_force.test @@ -1,4 +1,5 @@ -- source include/have_innodb.inc +-- source include/skip_innodb_log_archive.inc # innodb_encrypt_log -- source include/have_partition.inc -- source include/have_example_key_management_plugin.inc diff --git a/mysql-test/suite/encryption/t/file_creation.test b/mysql-test/suite/encryption/t/file_creation.test index 6b0126831a4ca..939da080133ce 100644 --- a/mysql-test/suite/encryption/t/file_creation.test +++ b/mysql-test/suite/encryption/t/file_creation.test @@ -1,4 +1,5 @@ --source include/have_innodb.inc +--source ../../suite/innodb/include/no_checkpoint_prepare.inc --source include/have_example_key_management_plugin.inc let $restart_noprint=2; # embedded does not support restart diff --git a/mysql-test/suite/encryption/t/innochecksum.test b/mysql-test/suite/encryption/t/innochecksum.test index e440e3d3344f8..bb2eb0e1a9359 100644 --- a/mysql-test/suite/encryption/t/innochecksum.test +++ b/mysql-test/suite/encryption/t/innochecksum.test @@ -7,6 +7,7 @@ # Require InnoDB -- source include/have_innodb.inc -- source include/have_file_key_management_plugin.inc +-- source include/skip_innodb_log_archive.inc # depends on innodb_log_archive=OFF checkpoint logic -- source include/innodb_page_size_small.inc -- source include/innodb_checksum_algorithm.inc -- source include/maybe_debug.inc diff --git a/mysql-test/suite/encryption/t/innodb-first-page-read.opt b/mysql-test/suite/encryption/t/innodb-first-page-read.opt deleted file mode 100644 index 38d69691ed6aa..0000000000000 --- a/mysql-test/suite/encryption/t/innodb-first-page-read.opt +++ /dev/null @@ -1,5 +0,0 @@ ---innodb-encrypt-tables=ON ---innodb-encrypt-log=ON ---innodb-encryption-rotate-key-age=15 ---innodb-encryption-threads=4 ---innodb-tablespaces-encryption diff --git a/mysql-test/suite/encryption/t/innodb-key-rotation-disable.test b/mysql-test/suite/encryption/t/innodb-key-rotation-disable.test index 1bd69365f6892..33936e1704768 100644 --- a/mysql-test/suite/encryption/t/innodb-key-rotation-disable.test +++ b/mysql-test/suite/encryption/t/innodb-key-rotation-disable.test @@ -1,4 +1,5 @@ -- source include/have_innodb.inc +-- source include/skip_innodb_log_archive.inc # innodb_encrypt_log -- source include/have_file_key_management_plugin.inc # not embedded because of restarts -- source include/not_embedded.inc diff --git a/mysql-test/suite/encryption/t/innodb-page_encryption_log_encryption.test b/mysql-test/suite/encryption/t/innodb-page_encryption_log_encryption.test index a736c7292ad35..3c64c039f3a45 100644 --- a/mysql-test/suite/encryption/t/innodb-page_encryption_log_encryption.test +++ b/mysql-test/suite/encryption/t/innodb-page_encryption_log_encryption.test @@ -1,4 +1,5 @@ -- source include/have_innodb.inc +-- source include/skip_innodb_log_archive.inc # innodb_encrypt_log -- source include/not_embedded.inc -- source include/have_file_key_management_plugin.inc diff --git a/mysql-test/suite/encryption/t/innodb-redo-badkey.test b/mysql-test/suite/encryption/t/innodb-redo-badkey.test index bacc71dd2c86e..85dc8e57559eb 100644 --- a/mysql-test/suite/encryption/t/innodb-redo-badkey.test +++ b/mysql-test/suite/encryption/t/innodb-redo-badkey.test @@ -1,4 +1,5 @@ -- source include/have_innodb.inc +--source ../../suite/innodb/include/no_checkpoint_prepare.inc -- source include/have_file_key_management_plugin.inc # embedded does not support restart -- source include/not_embedded.inc diff --git a/mysql-test/suite/encryption/t/innodb-redo-nokeys.test b/mysql-test/suite/encryption/t/innodb-redo-nokeys.test index 87a9e7a146e1b..b3ca7486e000c 100644 --- a/mysql-test/suite/encryption/t/innodb-redo-nokeys.test +++ b/mysql-test/suite/encryption/t/innodb-redo-nokeys.test @@ -1,4 +1,5 @@ -- source include/have_innodb.inc +--source ../../suite/innodb/include/no_checkpoint_prepare.inc -- source include/have_file_key_management_plugin.inc # embedded does not support restart -- source include/not_embedded.inc diff --git a/mysql-test/suite/encryption/t/innodb_encrypt_freed.test b/mysql-test/suite/encryption/t/innodb_encrypt_freed.test index 408e874a3b242..b37ff06dac1f7 100644 --- a/mysql-test/suite/encryption/t/innodb_encrypt_freed.test +++ b/mysql-test/suite/encryption/t/innodb_encrypt_freed.test @@ -1,4 +1,5 @@ --source include/have_innodb.inc +--source include/skip_innodb_log_archive.inc # innodb_encrypt_log --source include/have_example_key_management_plugin.inc --source include/have_debug.inc --source include/not_embedded.inc diff --git a/mysql-test/suite/encryption/t/innodb_encrypt_log.test b/mysql-test/suite/encryption/t/innodb_encrypt_log.test index 5448a606ba807..cdb807c157780 100644 --- a/mysql-test/suite/encryption/t/innodb_encrypt_log.test +++ b/mysql-test/suite/encryption/t/innodb_encrypt_log.test @@ -1,4 +1,5 @@ -- source include/have_innodb.inc +-- source include/skip_innodb_log_archive.inc # innodb_encrypt_log -- source include/not_embedded.inc -- source filekeys_plugin.inc diff --git a/mysql-test/suite/encryption/t/innodb_encrypt_log_corruption.test b/mysql-test/suite/encryption/t/innodb_encrypt_log_corruption.test index f1642e83e32e0..19858188cf1ff 100644 --- a/mysql-test/suite/encryption/t/innodb_encrypt_log_corruption.test +++ b/mysql-test/suite/encryption/t/innodb_encrypt_log_corruption.test @@ -1,3 +1,4 @@ +--source include/skip_innodb_log_archive.inc # innodb_encrypt_log --let $no_cleanup=1 --source ../../innodb/t/log_corruption.test diff --git a/mysql-test/suite/encryption/t/innodb_encryption-page-compression.test b/mysql-test/suite/encryption/t/innodb_encryption-page-compression.test index 57c8721282f4a..cf40cfc3ac816 100644 --- a/mysql-test/suite/encryption/t/innodb_encryption-page-compression.test +++ b/mysql-test/suite/encryption/t/innodb_encryption-page-compression.test @@ -1,4 +1,5 @@ -- source include/have_innodb.inc +-- source include/skip_innodb_log_archive.inc # innodb_encrypt_log -- source include/have_example_key_management_plugin.inc -- source include/not_embedded.inc # This test is too slow for valgrind and causes innnodb semaphores to time out diff --git a/mysql-test/suite/encryption/t/innodb_encryption.test b/mysql-test/suite/encryption/t/innodb_encryption.test index 2b0b2b8d7fb5c..113bbf152605d 100644 --- a/mysql-test/suite/encryption/t/innodb_encryption.test +++ b/mysql-test/suite/encryption/t/innodb_encryption.test @@ -2,6 +2,7 @@ # # -- source include/have_innodb.inc +-- source include/skip_innodb_log_archive.inc # innodb_encrypt_log -- source include/have_example_key_management_plugin.inc -- source include/innodb_undo_tablespaces.inc diff --git a/mysql-test/suite/encryption/t/innodb_encryption_discard_import.test b/mysql-test/suite/encryption/t/innodb_encryption_discard_import.test index e33aaec3e21c0..c1f51be89883f 100644 --- a/mysql-test/suite/encryption/t/innodb_encryption_discard_import.test +++ b/mysql-test/suite/encryption/t/innodb_encryption_discard_import.test @@ -1,4 +1,5 @@ -- source include/have_innodb.inc +-- source include/skip_innodb_log_archive.inc # innodb_encrypt_log -- source include/have_example_key_management_plugin.inc -- source include/not_valgrind.inc -- source include/not_embedded.inc diff --git a/mysql-test/suite/encryption/t/innodb_encryption_tables.test b/mysql-test/suite/encryption/t/innodb_encryption_tables.test index d03bc890ba4ed..3a8d244a1e3f4 100644 --- a/mysql-test/suite/encryption/t/innodb_encryption_tables.test +++ b/mysql-test/suite/encryption/t/innodb_encryption_tables.test @@ -1,4 +1,5 @@ -- source include/have_innodb.inc +-- source include/skip_innodb_log_archive.inc # innodb_encrypt_log -- source include/have_example_key_management_plugin.inc -- source include/not_embedded.inc # We can't run this test under valgrind as it 'takes forever' diff --git a/mysql-test/suite/encryption/t/innodb_first_page.test b/mysql-test/suite/encryption/t/innodb_first_page.test index db4d8eb3b16f5..838fc8396a9a8 100644 --- a/mysql-test/suite/encryption/t/innodb_first_page.test +++ b/mysql-test/suite/encryption/t/innodb_first_page.test @@ -3,6 +3,7 @@ # --source include/have_innodb.inc +--source include/skip_innodb_log_archive.inc # innodb_encrypt_log --source include/have_file_key_management_plugin.inc --source include/innodb_undo_tablespaces.inc diff --git a/mysql-test/suite/encryption/t/innodb_onlinealter_encryption.test b/mysql-test/suite/encryption/t/innodb_onlinealter_encryption.test index dc6d1e6f93c86..0b5c70339f704 100644 --- a/mysql-test/suite/encryption/t/innodb_onlinealter_encryption.test +++ b/mysql-test/suite/encryption/t/innodb_onlinealter_encryption.test @@ -1,4 +1,5 @@ -- source include/have_innodb.inc +-- source include/skip_innodb_log_archive.inc # innodb_encrypt_log -- source include/have_file_key_management_plugin.inc # test uses restart -- source include/not_embedded.inc diff --git a/mysql-test/suite/encryption/t/recovery_memory.test b/mysql-test/suite/encryption/t/recovery_memory.test index fc6f15f7ee27f..af539b3a36bec 100644 --- a/mysql-test/suite/encryption/t/recovery_memory.test +++ b/mysql-test/suite/encryption/t/recovery_memory.test @@ -1,5 +1,6 @@ --source include/have_debug.inc --source include/have_innodb.inc +--source include/skip_innodb_log_archive.inc # innodb_encrypt_log --source include/have_sequence.inc --source filekeys_plugin.inc diff --git a/mysql-test/suite/innodb/include/no_checkpoint_end.inc b/mysql-test/suite/innodb/include/no_checkpoint_end.inc index 61721650f329a..5ea419f7e173b 100644 --- a/mysql-test/suite/innodb/include/no_checkpoint_end.inc +++ b/mysql-test/suite/innodb/include/no_checkpoint_end.inc @@ -1,35 +1,33 @@ # Check that the latest checkpoint in the redo log files # is not newer than the checkpoint sampled by no_checkpoint_start.inc -if (!$no_checkpoint_kill) { --source include/kill_mysqld.inc -} +--error 2 +--exec $MYSQLD_CMD --innodb --innodb-read-only --innodb-invalid-option --innodb-page-size=$INNODB_PAGE_SIZE --innodb-buffer-pool-size=21m perl; -my $cp = $ENV{CHECKPOINT_LSN}; -$cp =~ s/^InnoDB\t\t//; -my $log = "$ENV{MYSQLD_DATADIR}ib_logfile0"; -open(LOG, "<$log") || die "Unable to open $log"; -seek(LOG, 4096, 0) || die "Unable to seek $log"; -die unless read(LOG, $_, 8) == 8; -my ($cp1hi,$cp1lo) = unpack("NN", $_); -seek(LOG, 8192, 0) || die "Unable to seek $log"; -die unless read(LOG, $_, 8) == 8; -my ($cp2hi,$cp2lo) = unpack("NN", $_); -close(LOG); - -my $cp1 = $cp1hi << 32 | $cp1lo; -my $cp2 = $cp2hi << 32 | $cp2lo; - +my $cp=0; +my $search_file= "$ENV{MYSQLTEST_VARDIR}/log/mysqld.1.err"; +open(FILE, '<', $search_file) || die("Can't open file $search_file: $!"); +while() +{ + if (/^CURRENT_TEST:/) + { + $cp=0; + } + elsif (!$cp && /\[Warning\] innodb_read_only prevents crash recovery between (\d+) and/o) + { + $cp=$1; + } +} +close(FILE); open(OUT, ">$ENV{MYSQLTEST_VARDIR}/log/check.txt") || die; - -if ($cp1 > $cp || $cp2 > $cp) { - print OUT "--source include/start_mysqld.inc\n" - unless $ENV{no_checkpoint_kill}; +if ($cp != $ENV{CHECKPOINT_LSN}) +{ + print OUT "--source include/start_mysqld.inc\n"; print OUT "$ENV{CLEANUP_IF_CHECKPOINT}\n"; - print OUT "--skip Extra checkpoint 1 after $cp ($cp1,$cp2)\n"; + print OUT "--skip Unexpected checkpoint $cp != $ENV{CHECKPOINT_LSN}\n"; } - close(OUT); EOF diff --git a/mysql-test/suite/innodb/include/no_checkpoint_prepare.inc b/mysql-test/suite/innodb/include/no_checkpoint_prepare.inc new file mode 100644 index 0000000000000..bf19bc6cc75c1 --- /dev/null +++ b/mysql-test/suite/innodb/include/no_checkpoint_prepare.inc @@ -0,0 +1,8 @@ +--disable_query_log +call mtr.add_suppression("InnoDB: innodb_read_only prevents crash recovery"); +call mtr.add_suppression("InnoDB: Plugin initialization aborted"); +call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed"); +call mtr.add_suppression("unknown option '--innodb-invalid-option'"); +call mtr.add_suppression("\\[ERROR\\] Aborting"); +let $INNODB_PAGE_SIZE=`SELECT @@GLOBAL.innodb_page_size`; +--enable_query_log diff --git a/mysql-test/suite/innodb/include/skip_innodb_log_archive.inc b/mysql-test/suite/innodb/include/skip_innodb_log_archive.inc new file mode 100644 index 0000000000000..984e5fefa369a --- /dev/null +++ b/mysql-test/suite/innodb/include/skip_innodb_log_archive.inc @@ -0,0 +1,4 @@ +if (`SELECT @@GLOBAL.innodb_log_archive`) +{ + --skip Test requires innodb_log_archive=OFF +} diff --git a/mysql-test/suite/innodb/r/corrupted_during_recovery.result b/mysql-test/suite/innodb/r/corrupted_during_recovery.result index 593943b4951ea..7aa097ee1bec2 100644 --- a/mysql-test/suite/innodb/r/corrupted_during_recovery.result +++ b/mysql-test/suite/innodb/r/corrupted_during_recovery.result @@ -1,14 +1,27 @@ -CREATE TABLE t1(a BIGINT PRIMARY KEY) ENGINE=InnoDB; +CREATE TABLE t1(a BIGINT PRIMARY KEY) ENGINE=InnoDB STATS_PERSISTENT=0; INSERT INTO t1 VALUES(1); +SET GLOBAL innodb_max_purge_lag_wait=0, innodb_log_checkpoint_now=ON; +connect stop_purge,localhost,root; +START TRANSACTION WITH CONSISTENT SNAPSHOT; +connection default; CREATE TABLE t2(a BIGINT PRIMARY KEY) ENGINE=InnoDB; INSERT INTO t1 VALUES(2); SET GLOBAL innodb_flush_log_at_trx_commit=1; INSERT INTO t2 VALUES(1); # Kill the server +disconnect stop_purge; +SELECT * FROM t2; +Got one of the listed errors +SELECT * FROM t2; +ERROR 42000: Unknown storage engine 'InnoDB' +FOUND 1 /InnoDB: cannot fulfill innodb_log_recovery_target.*/ in mysqld.1.err +DELETE FROM t1; +Got one of the listed errors +DELETE FROM t2; +Got one of the listed errors # Corrupt the pages SELECT * FROM t1; ERROR 42000: Unknown storage engine 'InnoDB' -FOUND 1 /InnoDB: Page \[page id: space=[1-9][0-9]*, page number=3\] log sequence number 1311768467463790320 is in the future!/ in mysqld.1.err SELECT * FROM t1; a 1 @@ -18,6 +31,7 @@ a CHECK TABLE t2; Table Op Msg_type Msg_text test.t2 check status OK +FOUND 1 /InnoDB: Page \[page id: space=[1-9][0-9]*, page number=3\] log sequence number 1311768467463790320 is in the future!.*/ in mysqld.1.err DROP TABLE t1, t2; CREATE TABLE t1(pk SERIAL) ENGINE=InnoDB; INSERT INTO t1 VALUES (1),(2),(3); @@ -31,6 +45,9 @@ DELETE FROM t1 WHERE pk=3; disconnect con1; # Corrupt the page SELECT * FROM t1; +ERROR 42000: Unknown storage engine 'InnoDB' +FOUND 1 /InnoDB: Did not find innodb_log_recovery_start=\d+ .*/ in mysqld.1.err +SELECT * FROM t1; pk 1 2 diff --git a/mysql-test/suite/innodb/r/innodb-wl5522,strict_crc32.rdiff b/mysql-test/suite/innodb/r/innodb-wl5522,strict_crc32.rdiff index 283bbe96aae97..6c31d9268be5f 100644 --- a/mysql-test/suite/innodb/r/innodb-wl5522,strict_crc32.rdiff +++ b/mysql-test/suite/innodb/r/innodb-wl5522,strict_crc32.rdiff @@ -1,6 +1,27 @@ --- innodb-wl5522.result +++ innodb-wl5522,strict_crc32.result~ -@@ -131,8 +131,7 @@ +@@ -1,9 +1,6 @@ + call mtr.add_suppression("InnoDB: Unable to import tablespace .* because it already exists. Please DISCARD the tablespace before IMPORT\\."); + call mtr.add_suppression("Index for table 't2' is corrupt; try to repair it"); + call mtr.add_suppression("InnoDB: Cannot save statistics for table `test`\\.`t1` because the \\.ibd file is missing"); +-call mtr.add_suppression("InnoDB: cannot fulfill innodb_log_recovery_target=102345<"); +-call mtr.add_suppression("InnoDB: Plugin initialization aborted"); +-call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed\\."); + FLUSH TABLES; + CREATE TABLE t1 + (a INT AUTO_INCREMENT PRIMARY KEY, +@@ -37,10 +34,6 @@ + t1.ibd + t2.frm + t2.ibd +-# restart: --innodb-log-recovery-target=102345 +-FOUND 1 /InnoDB: cannot fulfill innodb_log_recovery_target=1023454G"); +call mtr.add_suppression("InnoDB: innodb_log_archive=ON but .*/ib_logfile0 exists"); +call mtr.add_suppression("InnoDB: No matching file found for innodb_log_recovery_start=12290"); +call mtr.add_suppression("InnoDB: File .*/ib_logfile0 was not found"); +call mtr.add_suppression("InnoDB: innodb_log_archive_start=\\d+ is after innodb_log_recovery_start=\\d+"); +call mtr.add_suppression("InnoDB: Did not find innodb_log_recovery_start=\\d+ "); SET GLOBAL innodb_log_file_size=4194304; SHOW VARIABLES LIKE 'innodb_log_file_size'; Variable_name Value @@ -11,7 +18,7 @@ a INT PRIMARY KEY AUTO_INCREMENT, b CHAR(255) NOT NULL) ENGINE=INNODB; INSERT INTO t SELECT NULL, REPEAT('a', 255) FROM seq_1_to_20000; -# restart: --innodb-log-file-size=4194304 +# restart: --innodb-log-file-size=4194304 --skip-innodb-log-archive SELECT COUNT(*) FROM t; COUNT(*) 20000 @@ -28,8 +35,24 @@ Got one of the listed errors connect con1,localhost,root; SET GLOBAL innodb_log_file_size=7340032; connection default; +SET GLOBAL innodb_log_archive=ON; +SET GLOBAL innodb_log_archive=OFF; KILL QUERY @id; connection con1; +SET GLOBAL innodb_log_archive=ON, innodb_log_file_size=10485760; +SELECT @@GLOBAL.innodb_log_file_size!=10485760; +@@GLOBAL.innodb_log_file_size!=10485760 +1 +SET GLOBAL innodb_log_file_size=4294971392; +ERROR HY000: Failed to create specific handler file +SELECT @@GLOBAL.innodb_log_file_size<=10485760; +@@GLOBAL.innodb_log_file_size<=10485760 +1 +SET GLOBAL innodb_log_file_size=4294967296; +SELECT @@GLOBAL.innodb_log_file_size<=10485760; +@@GLOBAL.innodb_log_file_size<=10485760 +1 +SET GLOBAL innodb_log_archive=OFF; connection default; SET GLOBAL innodb_log_file_size=5242880; connection con1; @@ -45,6 +68,18 @@ global_value connection con1; disconnect con1; connection default; +# restart: --innodb-log-recovery-start=12345 --innodb-log-archive-start=1234567 +$check_no_innodb; +ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS +FOUND 1 /InnoDB: innodb_log_archive_start=1234567 is after innodb_log_recovery_start=12345/ in mysqld.1.err +# restart: --innodb-log-archive --innodb-log-file-size=5g +$check_no_innodb; +ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS +FOUND 1 /InnoDB: innodb_log_archive=ON disallows innodb_log_file_size>4G/ in mysqld.1.err +# restart: --innodb-log-archive --innodb-log-file-size=4g +$check_no_innodb; +ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS +FOUND 1 /InnoDB: innodb_log_archive=ON but .*/ib_logfile0 exists/ in mysqld.1.err # restart SELECT * FROM t WHERE a<10; a b @@ -61,6 +96,21 @@ SELECT COUNT(*),LENGTH(b) FROM t GROUP BY b; COUNT(*) LENGTH(b) 9 0 19991 255 +SET GLOBAL innodb_log_archive=ON; +# restart: --innodb-log-recovery-start=12290 +$check_no_innodb; +ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS +FOUND 1 /InnoDB: No matching file found for innodb_log_recovery_start=12290/ in mysqld.1.err +# restart: with restart_parameters +$check_no_innodb; +ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS +NOT FOUND /InnoDB: File .*/ib_logfile0 as not found/ in mysqld.1.err +FOUND 2 /InnoDB: innodb_log_archive_start=\d+ is after innodb_log_recovery_start=\d+/ in mysqld.1.err +# restart: with restart_parameters +$check_no_innodb; +ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS +# restart: with restart_parameters +SET GLOBAL innodb_log_archive=OFF; SHOW VARIABLES LIKE 'innodb_log_file_size'; Variable_name Value innodb_log_file_size 5242880 diff --git a/mysql-test/suite/innodb/r/log_upgrade.result b/mysql-test/suite/innodb/r/log_upgrade.result index 4da83460f9364..d1f1867778c34 100644 --- a/mysql-test/suite/innodb/r/log_upgrade.result +++ b/mysql-test/suite/innodb/r/log_upgrade.result @@ -2,7 +2,7 @@ call mtr.add_suppression("InnoDB: The change buffer is corrupted"); # # MDEV-24412 InnoDB: Upgrade after a crash is not supported # -# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_upgrade --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_upgrade --innodb-force-recovery=5 --innodb-log-file-size=4m +# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_upgrade --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_upgrade --skip-innodb-log-archive --innodb-force-recovery=5 --innodb-log-file-size=4m SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' AND support IN ('YES', 'DEFAULT', 'ENABLED'); diff --git a/mysql-test/suite/innodb/r/log_upgrade_101_flags.result b/mysql-test/suite/innodb/r/log_upgrade_101_flags.result index 9bcb786170a63..1f5772c1d30f9 100644 --- a/mysql-test/suite/innodb/r/log_upgrade_101_flags.result +++ b/mysql-test/suite/innodb/r/log_upgrade_101_flags.result @@ -1,7 +1,7 @@ call mtr.add_suppression("InnoDB: The change buffer is corrupted"); call mtr.add_suppression("InnoDB: Tablespace size stored in header is 768 pages, but the sum of data file sizes is 384 pages"); call mtr.add_suppression("InnoDB: adjusting FSP_SPACE_FLAGS of file"); -# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_upgrade --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_upgrade --innodb-undo-directory=MYSQLTEST_VARDIR/tmp/log_upgrade --innodb-force-recovery=5 --innodb-log-file-size=4m --innodb_page_size=32k --innodb_buffer_pool_size=11M +# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_upgrade --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_upgrade --innodb-undo-directory=MYSQLTEST_VARDIR/tmp/log_upgrade --skip-innodb-log-archive --innodb-force-recovery=5 --innodb-log-file-size=4m --innodb_page_size=32k --innodb_buffer_pool_size=11M SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' AND support IN ('YES', 'DEFAULT', 'ENABLED'); diff --git a/mysql-test/suite/innodb/r/rename_table.result b/mysql-test/suite/innodb/r/rename_table.result index a3bf59101b39e..5842ca79ce860 100644 --- a/mysql-test/suite/innodb/r/rename_table.result +++ b/mysql-test/suite/innodb/r/rename_table.result @@ -1,4 +1,7 @@ call mtr.add_suppression("InnoDB: In RENAME TABLE table `test`.`t4` is referenced in foreign key constraints which are not compatible with the new table definition."); +call mtr.add_suppression("InnoDB: Plugin initialization aborted"); +call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed"); +call mtr.add_suppression("InnoDB: cannot fulfill innodb_log_recovery_target=20480<"); CREATE DATABASE test_jfg; CREATE DATABASE test_jfg2; CREATE TABLE test_jfg.test (a int unsigned PRIMARY KEY) ENGINE=InnoDB; @@ -8,6 +11,13 @@ FROM INFORMATION_SCHEMA.INNODB_SYS_TABLESPACES WHERE filename LIKE '%test%'; path ./test_jfg2/test.ibd DROP DATABASE test_jfg; +# restart: --innodb-log-recovery-target=20480 +SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES +WHERE engine = 'innodb' +AND support IN ('YES', 'DEFAULT', 'ENABLED'); +COUNT(*) +0 +FOUND 1 /InnoDB: cannot fulfill innodb_log_recovery_target=20480", $file) or die "Unable to open $file\n"; +binmode FILE; +print FILE pack("NN", 0, $file_size - 1); +seek FILE, $file_size - 1, 0 or die "Unable to seek $file\n"; +my $polynomial = 0x82f63b78; # CRC-32C +my $FILE_CHECKPOINT=pack("CxxNN", 0xfa, 0xffffffff, 0xffbfffff); +$FILE_CHECKPOINT .= pack("CN", 1, mycrc32($FILE_CHECKPOINT, 0, $polynomial)); +my $FILE_MODIFY=pack("CCxa*", 0xb9, 127, "a/b.ibd"); +$FILE_MODIFY .= pack("CN", 1, mycrc32($FILE_MODIFY, 0, $polynomial)); +print FILE substr($FILE_MODIFY, 0, 1); +close(FILE) or die "Unable to close $file\n"; +chmod 0444, $file or die "Unable to chmod 444 $file\n"; +$file= "$ENV{DATADIR}/ib_ffffffffffc00000.log"; +open(FILE, ">", $file) or die "Unable to open $file\n"; +binmode FILE; +seek FILE, 0x3000, 0 or die "Unable to seek $file\n"; +print FILE substr($FILE_MODIFY, 1), $FILE_MODIFY x 139819, $FILE_CHECKPOINT; +seek FILE, $file_size - 1, 0 or die "Unable to seek $file\n"; +print FILE chr(0); +close(FILE) or die "Unable to close $file\n"; +EOF + +--let $restart_parameters= --innodb-read-only +--source include/start_mysqld.inc +SELECT variable_name, variable_value FROM information_schema.global_status +WHERE variable_name LIKE 'INNODB_LSN%'; + +--let $restart_parameters= +--source include/shutdown_mysqld.inc + +# Microsoft Windows does not allow to delete a read-only file +perl; +my $file= "$ENV{DATADIR}/ib_ffffffffff803000.log"; +chmod 0644, $file or die "Unable to chmod 644 $file\n"; +EOF +--remove_file $DATADIR/ib_ffffffffff803000.log +--remove_file $DATADIR/ib_ffffffffffc00000.log +--disable_result_log +--error 0,1 # fails if innodb_log_archive=ON +--move_file $DATADIR/ib_logfile_hidden $DATADIR/ib_logfile0 +--enable_result_log +--source include/start_mysqld.inc diff --git a/mysql-test/suite/innodb/t/log_corruption.test b/mysql-test/suite/innodb/t/log_corruption.test index 7c39ce02228f0..8b911b2ef62d0 100644 --- a/mysql-test/suite/innodb/t/log_corruption.test +++ b/mysql-test/suite/innodb/t/log_corruption.test @@ -1,6 +1,7 @@ --source include/have_innodb.inc --source include/have_innodb_16k.inc --source include/no_valgrind_without_big.inc +--source include/skip_innodb_log_archive.inc --disable_query_log call mtr.add_suppression("InnoDB: Upgrade after a crash is not supported"); @@ -19,6 +20,7 @@ call mtr.add_suppression("InnoDB: Obtaining redo log encryption key version 1 fa call mtr.add_suppression("InnoDB: Decrypting checkpoint failed"); call mtr.add_suppression("InnoDB: Log file .*ib_logfile1 is of different size 2097152 bytes than other log files (1048576|4194304) bytes!"); call mtr.add_suppression("InnoDB: The change buffer is corrupted"); +call mtr.add_suppression("InnoDB: cannot fulfill innodb_log_recovery_target=12345!="); --enable_query_log let bugdir= $MYSQLTEST_VARDIR/tmp/log_corruption; @@ -171,11 +173,19 @@ die unless seek(OUT, 0x800, 0); print OUT pack("NnnNx[496]N", 0x80000944, 12, 12, 0, 0xb2a); close OUT or die; EOF ---let $restart_parameters= $dirs --innodb-force-recovery=5 --innodb-log-file-size=4m +--let $restart_parameters= $dirs --innodb-log-recovery-target=12345 --source include/start_mysqld.inc SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' AND support IN ('YES', 'DEFAULT', 'ENABLED'); +--let SEARCH_PATTERN= InnoDB: cannot fulfill innodb_log_recovery_target=12345!= +--source include/search_pattern_in_file.inc + +--let $restart_parameters= $dirs --innodb-force-recovery=5 --innodb-log-file-size=4m +--source include/restart_mysqld.inc +SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES +WHERE engine = 'innodb' +AND support IN ('YES', 'DEFAULT', 'ENABLED'); --source include/shutdown_mysqld.inc --let SEARCH_PATTERN= InnoDB: Upgrading redo log: --source include/search_pattern_in_file.inc diff --git a/mysql-test/suite/innodb/t/log_corruption_recovery.test b/mysql-test/suite/innodb/t/log_corruption_recovery.test index 489010f9b774d..ac9368d604263 100644 --- a/mysql-test/suite/innodb/t/log_corruption_recovery.test +++ b/mysql-test/suite/innodb/t/log_corruption_recovery.test @@ -1,4 +1,5 @@ --source include/have_innodb.inc +--source include/skip_innodb_log_archive.inc --let DATADIR=`select @@datadir` let $check_no_innodb=SELECT * FROM INFORMATION_SCHEMA.ENGINES diff --git a/mysql-test/suite/innodb/t/log_data_file_size.test b/mysql-test/suite/innodb/t/log_data_file_size.test index fe75b9ab236a4..ce0cb3d7de61d 100644 --- a/mysql-test/suite/innodb/t/log_data_file_size.test +++ b/mysql-test/suite/innodb/t/log_data_file_size.test @@ -1,5 +1,6 @@ --source include/innodb_page_size.inc --source include/not_embedded.inc +--source include/no_checkpoint_prepare.inc let INNODB_PAGE_SIZE=`select @@innodb_page_size`; let MYSQLD_DATADIR=`select @@datadir`; diff --git a/mysql-test/suite/innodb/t/log_file.test b/mysql-test/suite/innodb/t/log_file.test index 7ff0de0fc013a..880226cd84f75 100644 --- a/mysql-test/suite/innodb/t/log_file.test +++ b/mysql-test/suite/innodb/t/log_file.test @@ -4,6 +4,7 @@ --source include/have_innodb.inc --source include/no_valgrind_without_big.inc +--source include/skip_innodb_log_archive.inc --disable_query_log call mtr.add_suppression("InnoDB: Could not create undo tablespace.*undo002"); diff --git a/mysql-test/suite/innodb/t/log_file_name.test b/mysql-test/suite/innodb/t/log_file_name.test index 81f199951a896..2a0175195c331 100644 --- a/mysql-test/suite/innodb/t/log_file_name.test +++ b/mysql-test/suite/innodb/t/log_file_name.test @@ -2,6 +2,7 @@ # Test the detection of duplicate tablespaces. --source include/have_innodb.inc +--source include/no_checkpoint_prepare.inc --source include/no_valgrind_without_big.inc # Embedded server does not support crashing diff --git a/mysql-test/suite/innodb/t/log_file_size.test b/mysql-test/suite/innodb/t/log_file_size.test index ea186965e5f92..6fd73865320f6 100644 --- a/mysql-test/suite/innodb/t/log_file_size.test +++ b/mysql-test/suite/innodb/t/log_file_size.test @@ -1,5 +1,6 @@ # Test resizing the InnoDB redo log. --source include/innodb_page_size_small.inc +--source include/skip_innodb_log_archive.inc # Embedded server tests do not support restarting --source include/not_embedded.inc # DBUG_EXECUTE_IF is needed diff --git a/mysql-test/suite/innodb/t/log_file_size_online.test b/mysql-test/suite/innodb/t/log_file_size_online.test index 8ea5e662d3b00..40f37df13fffe 100644 --- a/mysql-test/suite/innodb/t/log_file_size_online.test +++ b/mysql-test/suite/innodb/t/log_file_size_online.test @@ -1,7 +1,19 @@ --source include/have_innodb.inc +--source ../../suite/encryption/include/skip_innodb_log_archive.inc --source include/have_sequence.inc --source include/no_valgrind_without_big.inc +call mtr.add_suppression("InnoDB: innodb_log_archive_start=1234567 is after innodb_log_recovery_start=12345"); +call mtr.add_suppression("InnoDB: innodb_log_archive=ON disallows innodb_log_file_size>4G"); +call mtr.add_suppression("InnoDB: innodb_log_archive=ON but .*/ib_logfile0 exists"); +call mtr.add_suppression("InnoDB: No matching file found for innodb_log_recovery_start=12290"); +call mtr.add_suppression("InnoDB: File .*/ib_logfile0 was not found"); +call mtr.add_suppression("InnoDB: innodb_log_archive_start=\\d+ is after innodb_log_recovery_start=\\d+"); +call mtr.add_suppression("InnoDB: Did not find innodb_log_recovery_start=\\d+ "); +let $check_no_innodb=SELECT * FROM INFORMATION_SCHEMA.ENGINES +WHERE engine = 'innodb' +AND support IN ('YES', 'DEFAULT', 'ENABLED'); + let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.1.err; SET GLOBAL innodb_log_file_size=4194304; @@ -16,7 +28,7 @@ ENGINE=INNODB; INSERT INTO t SELECT NULL, REPEAT('a', 255) FROM seq_1_to_20000; ---let $restart_parameters=--innodb-log-file-size=4194304 +--let $restart_parameters=--innodb-log-file-size=4194304 --skip-innodb-log-archive --source include/restart_mysqld.inc SELECT COUNT(*) FROM t; @@ -41,11 +53,28 @@ let $ID= `SELECT @id := CONNECTION_ID()`; send SET GLOBAL innodb_log_file_size=7340032; --connection default let $ignore= `SELECT @id := $ID`; +--error 0,ER_WRONG_USAGE +SET GLOBAL innodb_log_archive=ON; +--error 0,ER_WRONG_USAGE +SET GLOBAL innodb_log_archive=OFF; + KILL QUERY @id; --connection con1 --error 0,ER_QUERY_INTERRUPTED reap; +# When innodb_log_archive=ON, SET GLOBAL innodb_log_file_size is instantaneous +# but will not reflect the file size. +SET GLOBAL innodb_log_archive=ON, innodb_log_file_size=10485760; +SELECT @@GLOBAL.innodb_log_file_size!=10485760; +--error ER_CANT_CREATE_HANDLER_FILE +SET GLOBAL innodb_log_file_size=4294971392; +SELECT @@GLOBAL.innodb_log_file_size<=10485760; +SET GLOBAL innodb_log_file_size=4294967296; +SELECT @@GLOBAL.innodb_log_file_size<=10485760; + +SET GLOBAL innodb_log_archive=OFF; + --connection default send SET GLOBAL innodb_log_file_size=5242880; @@ -64,12 +93,70 @@ reap; --connection default --let $shutdown_timeout=0 +--let $restart_parameters=--innodb-log-recovery-start=12345 --innodb-log-archive-start=1234567 +--source include/restart_mysqld.inc +--let $shutdown_timeout= +evalp $check_no_innodb; + +let SEARCH_PATTERN = InnoDB: innodb_log_archive_start=1234567 is after innodb_log_recovery_start=12345; +--source include/search_pattern_in_file.inc + +--let $restart_parameters=--innodb-log-archive --innodb-log-file-size=5g +--source include/restart_mysqld.inc +evalp $check_no_innodb; + +let SEARCH_PATTERN = InnoDB: innodb_log_archive=ON disallows innodb_log_file_size>4G; +--source include/search_pattern_in_file.inc + +--let $restart_parameters=--innodb-log-archive --innodb-log-file-size=4g +--source include/restart_mysqld.inc +evalp $check_no_innodb; + +let SEARCH_PATTERN = InnoDB: innodb_log_archive=ON but .*/ib_logfile0 exists; +--source include/search_pattern_in_file.inc + --let $restart_parameters= --source include/restart_mysqld.inc SELECT * FROM t WHERE a<10; SELECT COUNT(*),LENGTH(b) FROM t GROUP BY b; +SET GLOBAL innodb_log_archive=ON; +let $archive_start=`SELECT variable_value FROM information_schema.global_status +WHERE variable_name='innodb_lsn_archived'`; +let $archive_start_1=`SELECT $archive_start-1`; + +--let $restart_parameters= --innodb-log-recovery-start=12290 +--source include/restart_mysqld.inc + +evalp $check_no_innodb; +let SEARCH_PATTERN = InnoDB: No matching file found for innodb_log_recovery_start=12290; +--source include/search_pattern_in_file.inc + +--let $restart_noprint=1 +--let $restart_parameters= --innodb-log-archive-start=$archive_start --innodb-log-recovery-start=$archive_start_1 +--source include/restart_mysqld.inc + +evalp $check_no_innodb; + +let SEARCH_PATTERN = InnoDB: File .*/ib_logfile0 as not found; +--source include/search_pattern_in_file.inc + +let SEARCH_PATTERN = InnoDB: innodb_log_archive_start=\\d+ is after innodb_log_recovery_start=\\d+; +--source include/search_pattern_in_file.inc + +--let $restart_parameters= --innodb-log-recovery-start=$archive_start_1 +--source include/restart_mysqld.inc + +evalp $check_no_innodb; + +--let $restart_parameters= --innodb-log-recovery-start=$archive_start +--source include/restart_mysqld.inc + +--let $restart_noprint= + +SET GLOBAL innodb_log_archive=OFF; + SHOW VARIABLES LIKE 'innodb_log_file_size'; SET GLOBAL innodb_log_file_size=6291456; SHOW VARIABLES LIKE 'innodb_log_file_size'; diff --git a/mysql-test/suite/innodb/t/log_upgrade.test b/mysql-test/suite/innodb/t/log_upgrade.test index a3d237875feac..d5a3fc7f3962b 100644 --- a/mysql-test/suite/innodb/t/log_upgrade.test +++ b/mysql-test/suite/innodb/t/log_upgrade.test @@ -16,6 +16,7 @@ let bugdir= $MYSQLTEST_VARDIR/tmp/log_upgrade; --let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.1.err --let $dirs= --innodb-data-home-dir=$bugdir --innodb-log-group-home-dir=$bugdir +--let $dirs=$dirs --skip-innodb-log-archive --echo # --echo # MDEV-24412 InnoDB: Upgrade after a crash is not supported diff --git a/mysql-test/suite/innodb/t/log_upgrade_101_flags.test b/mysql-test/suite/innodb/t/log_upgrade_101_flags.test index 7b19986f73e8e..f37fe11e0b3cd 100644 --- a/mysql-test/suite/innodb/t/log_upgrade_101_flags.test +++ b/mysql-test/suite/innodb/t/log_upgrade_101_flags.test @@ -8,7 +8,7 @@ call mtr.add_suppression("InnoDB: adjusting FSP_SPACE_FLAGS of file"); let bugdir= $MYSQLTEST_VARDIR/tmp/log_upgrade; --mkdir $bugdir --let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.1.err ---let $dirs= --innodb-data-home-dir=$bugdir --innodb-log-group-home-dir=$bugdir --innodb-undo-directory=$bugdir +--let $dirs= --innodb-data-home-dir=$bugdir --innodb-log-group-home-dir=$bugdir --innodb-undo-directory=$bugdir --skip-innodb-log-archive # Test case similar to log_upgrade.test perl; diff --git a/mysql-test/suite/innodb/t/rename_table.test b/mysql-test/suite/innodb/t/rename_table.test index a61813429b381..0551457372074 100644 --- a/mysql-test/suite/innodb/t/rename_table.test +++ b/mysql-test/suite/innodb/t/rename_table.test @@ -2,6 +2,9 @@ --source include/not_embedded.inc call mtr.add_suppression("InnoDB: In RENAME TABLE table `test`.`t4` is referenced in foreign key constraints which are not compatible with the new table definition."); +call mtr.add_suppression("InnoDB: Plugin initialization aborted"); +call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed"); +call mtr.add_suppression("InnoDB: cannot fulfill innodb_log_recovery_target=20480<"); CREATE DATABASE test_jfg; CREATE DATABASE test_jfg2; @@ -13,6 +16,18 @@ FROM INFORMATION_SCHEMA.INNODB_SYS_TABLESPACES WHERE filename LIKE '%test%'; DROP DATABASE test_jfg; +--let $restart_parameters=--innodb-log-recovery-target=20480 +--source include/restart_mysqld.inc + +SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES +WHERE engine = 'innodb' +AND support IN ('YES', 'DEFAULT', 'ENABLED'); + +--let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.1.err +--let SEARCH_PATTERN= InnoDB: cannot fulfill innodb_log_recovery_target=20480< +--source include/search_pattern_in_file.inc + +--let $restart_parameters= --source include/restart_mysqld.inc DROP DATABASE test_jfg2; diff --git a/mysql-test/suite/innodb/t/row_format_redundant.test b/mysql-test/suite/innodb/t/row_format_redundant.test index 9f85c45455b1a..2a705094f02f5 100644 --- a/mysql-test/suite/innodb/t/row_format_redundant.test +++ b/mysql-test/suite/innodb/t/row_format_redundant.test @@ -153,6 +153,10 @@ DROP TABLE t2,t3; --let $restart_parameters= --source include/restart_mysqld.inc +if (!`select @@innodb_log_archive=0`) +{ +--replace_result ib_0000000000003000.log ib_logfile0 +} --list_files $bugdir --remove_files_wildcard $bugdir --rmdir $bugdir diff --git a/mysql-test/suite/innodb/t/sys_defragment.test b/mysql-test/suite/innodb/t/sys_defragment.test index a4e5a84450d0d..b880a9d09a826 100644 --- a/mysql-test/suite/innodb/t/sys_defragment.test +++ b/mysql-test/suite/innodb/t/sys_defragment.test @@ -1,4 +1,5 @@ --source include/have_innodb.inc +--source include/skip_innodb_log_archive.inc --source include/have_sequence.inc --source include/have_debug.inc diff --git a/mysql-test/suite/innodb/t/sys_defragment_fail.test b/mysql-test/suite/innodb/t/sys_defragment_fail.test index aca741b902163..a0c309350d58a 100644 --- a/mysql-test/suite/innodb/t/sys_defragment_fail.test +++ b/mysql-test/suite/innodb/t/sys_defragment_fail.test @@ -1,4 +1,5 @@ --source include/have_innodb.inc +--source include/skip_innodb_log_archive.inc --source include/have_debug.inc --source include/have_sequence.inc diff --git a/mysql-test/suite/innodb/t/table_flags.test b/mysql-test/suite/innodb/t/table_flags.test index 74a7782a90f4b..fcc0f2adb6ac2 100644 --- a/mysql-test/suite/innodb/t/table_flags.test +++ b/mysql-test/suite/innodb/t/table_flags.test @@ -232,6 +232,10 @@ DROP TABLE tr,tc,td,tz,tp; --error 0,1 --remove_file $bugdir/ib_buffer_pool +if (!`select @@innodb_log_archive=0`) +{ +--replace_result ib_0000000000003000.log ib_logfile0 +} --list_files $bugdir --remove_files_wildcard $bugdir --rmdir $bugdir diff --git a/mysql-test/suite/innodb/t/undo_space_dblwr.test b/mysql-test/suite/innodb/t/undo_space_dblwr.test index 33e8ed9d65153..e0ca45cde7a17 100644 --- a/mysql-test/suite/innodb/t/undo_space_dblwr.test +++ b/mysql-test/suite/innodb/t/undo_space_dblwr.test @@ -1,4 +1,5 @@ --source include/have_innodb.inc +--source include/no_checkpoint_prepare.inc --source include/have_debug.inc --source include/not_embedded.inc call mtr.add_suppression("Checksum mismatch in the first page of file"); diff --git a/mysql-test/suite/sys_vars/r/sysvars_innodb.result b/mysql-test/suite/sys_vars/r/sysvars_innodb.result index 23978fc881825..1b2c41ea83649 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_innodb.result +++ b/mysql-test/suite/sys_vars/r/sysvars_innodb.result @@ -932,6 +932,30 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME INNODB_LOG_ARCHIVE +SESSION_VALUE NULL +DEFAULT_VALUE OFF +VARIABLE_SCOPE GLOBAL +VARIABLE_TYPE BOOLEAN +VARIABLE_COMMENT Whether log archiving is desired +NUMERIC_MIN_VALUE NULL +NUMERIC_MAX_VALUE NULL +NUMERIC_BLOCK_SIZE NULL +ENUM_VALUE_LIST OFF,ON +READ_ONLY NO +COMMAND_LINE_ARGUMENT OPTIONAL +VARIABLE_NAME INNODB_LOG_ARCHIVE_START +SESSION_VALUE NULL +DEFAULT_VALUE 0 +VARIABLE_SCOPE GLOBAL +VARIABLE_TYPE BIGINT UNSIGNED +VARIABLE_COMMENT initial value of innodb_lsn_archived; 0=auto-detect +NUMERIC_MIN_VALUE 0 +NUMERIC_MAX_VALUE 18446744073709551615 +NUMERIC_BLOCK_SIZE 0 +ENUM_VALUE_LIST NULL +READ_ONLY YES +COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME INNODB_LOG_BUFFER_SIZE SESSION_VALUE NULL DEFAULT_VALUE 16777216 @@ -961,7 +985,7 @@ SESSION_VALUE NULL DEFAULT_VALUE ON VARIABLE_SCOPE GLOBAL VARIABLE_TYPE BOOLEAN -VARIABLE_COMMENT Whether ib_logfile0 resides in persistent memory (when supported) or should initially be memory-mapped +VARIABLE_COMMENT Whether the log resides in persistent memory (when supported) or should initially be memory-mapped NUMERIC_MIN_VALUE NULL NUMERIC_MAX_VALUE NULL NUMERIC_BLOCK_SIZE NULL @@ -973,7 +997,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 100663296 VARIABLE_SCOPE GLOBAL VARIABLE_TYPE BIGINT UNSIGNED -VARIABLE_COMMENT Redo log size in bytes. +VARIABLE_COMMENT Desired log file size in bytes NUMERIC_MIN_VALUE 4194304 NUMERIC_MAX_VALUE 18446744073709551615 NUMERIC_BLOCK_SIZE 4096 @@ -985,7 +1009,7 @@ SESSION_VALUE NULL DEFAULT_VALUE OFF VARIABLE_SCOPE GLOBAL VARIABLE_TYPE BOOLEAN -VARIABLE_COMMENT Whether each write to ib_logfile0 is write through +VARIABLE_COMMENT Whether each write to the log is write through NUMERIC_MIN_VALUE NULL NUMERIC_MAX_VALUE NULL NUMERIC_BLOCK_SIZE NULL @@ -997,13 +1021,37 @@ SESSION_VALUE NULL DEFAULT_VALUE VARIABLE_SCOPE GLOBAL VARIABLE_TYPE VARCHAR -VARIABLE_COMMENT Path to ib_logfile0 +VARIABLE_COMMENT Path to ib_logfile0 or ib_*.log NUMERIC_MIN_VALUE NULL NUMERIC_MAX_VALUE NULL NUMERIC_BLOCK_SIZE NULL ENUM_VALUE_LIST NULL READ_ONLY YES COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME INNODB_LOG_RECOVERY_START +SESSION_VALUE NULL +DEFAULT_VALUE 0 +VARIABLE_SCOPE GLOBAL +VARIABLE_TYPE BIGINT UNSIGNED +VARIABLE_COMMENT LSN to start recovery from (0=automatic) +NUMERIC_MIN_VALUE 0 +NUMERIC_MAX_VALUE 18446744073709551615 +NUMERIC_BLOCK_SIZE 0 +ENUM_VALUE_LIST NULL +READ_ONLY YES +COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME INNODB_LOG_RECOVERY_TARGET +SESSION_VALUE NULL +DEFAULT_VALUE 0 +VARIABLE_SCOPE GLOBAL +VARIABLE_TYPE BIGINT UNSIGNED +VARIABLE_COMMENT recovery point objective (end LSN; 0=unlimited) +NUMERIC_MIN_VALUE 0 +NUMERIC_MAX_VALUE 18446744073709551615 +NUMERIC_BLOCK_SIZE 0 +ENUM_VALUE_LIST NULL +READ_ONLY YES +COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME INNODB_LOG_SPIN_WAIT_DELAY SESSION_VALUE NULL DEFAULT_VALUE 0 diff --git a/sql/upgrade_conf_file.cc b/sql/upgrade_conf_file.cc index 0d7bc6034685c..f1fa9aac6ba3d 100644 --- a/sql/upgrade_conf_file.cc +++ b/sql/upgrade_conf_file.cc @@ -97,7 +97,6 @@ static const char *removed_variables[] = "innodb_locks_unsafe_for_binlog", "innodb_log_arch_dir", "innodb_log_arch_expire_sec", -"innodb_log_archive", "innodb_log_block_size", "innodb_log_checksum_algorithm", "innodb_log_checksums", diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc index c5d77a90cab8f..19a0239ea306f 100644 --- a/storage/innobase/buf/buf0dblwr.cc +++ b/storage/innobase/buf/buf0dblwr.cc @@ -257,7 +257,7 @@ dberr_t buf_dblwr_t::init_or_load_pages(pfs_os_file_t file, const char *path) init(TRX_SYS_DOUBLEWRITE + read_buf); const bool upgrade_to_innodb_file_per_table= - !srv_read_only_mode && + !recv_sys.rpo && mach_read_from_4(TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED + TRX_SYS_DOUBLEWRITE + read_buf) != TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N; @@ -333,7 +333,8 @@ void buf_dblwr_t::recover() noexcept if (!is_created()) return; const lsn_t max_lsn{log_sys.get_flushed_lsn(std::memory_order_relaxed)}; - ut_ad(recv_sys.scanned_lsn == max_lsn); + ut_ad(recv_sys.scanned_lsn == max_lsn || + (recv_sys.rpo && recv_sys.rpo < max_lsn)); ut_ad(recv_sys.scanned_lsn >= recv_sys.lsn); uint32_t page_no_dblwr= 0; diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 0fe60203901b8..fbc6658a2dee4 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -1815,51 +1815,179 @@ static ulint buf_flush_LRU(ulint max_n) noexcept inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept { ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); + ut_ad(archive ? file_size <= ~0U : next_checkpoint_lsn >= first_lsn); ut_ad(end_lsn >= next_checkpoint_lsn); ut_d(const lsn_t current_lsn{get_lsn()}); ut_ad(end_lsn <= current_lsn); ut_ad(end_lsn + SIZE_OF_FILE_CHECKPOINT <= current_lsn || srv_shutdown_state > SRV_SHUTDOWN_INITIATED); + ut_ad(this->end_lsn <= end_lsn); + ut_ad(checkpoint_buf != buf); + ut_ad(!checkpoint_buf || checkpoint_buf != resize_buf); + ut_ad(buf != resize_buf); + ut_ad(latch_have_wr()); DBUG_PRINT("ib_log", ("checkpoint at " LSN_PF " written", next_checkpoint_lsn)); - auto n= next_checkpoint_no; - const size_t offset{(n & 1) ? CHECKPOINT_2 : CHECKPOINT_1}; + size_t offset; static_assert(CPU_LEVEL1_DCACHE_LINESIZE >= 64, "efficiency"); static_assert(CPU_LEVEL1_DCACHE_LINESIZE <= 4096, "compatibility"); - byte* c= my_assume_aligned - (is_mmap() ? buf + offset : checkpoint_buf); - memset_aligned(c, 0, CPU_LEVEL1_DCACHE_LINESIZE); + lsn_t resizing{resize_lsn.load(std::memory_order_relaxed)}; + byte *c= checkpoint_buf; + bool archive_header_was_reset{false}; + + if (archive) + { + ut_ad(!resizing); +#ifdef HAVE_PMEM + ut_ad(!resize_buf || !checkpoint_buf); + ut_ad(!resize_buf || resize_log.is_opened()); +#else + ut_ad(!resize_buf); +#endif + if (end_lsn >= first_lsn + ( +#ifdef HAVE_PMEM + c && is_mmap() ? 0 : +#endif + capacity())) + { +#ifdef HAVE_PMEM + if (resize_buf) + { + ut_ad(is_mmap()); + /* @see archived_mmap_switch_complete() */ + ut_ad(!c); + const lsn_t lsn{get_lsn()}; + ut_ad(lsn == current_lsn); + persist(lsn); + checkpoint_buf= buf; + buf= resize_buf; + resize_buf= nullptr; + first_lsn+= capacity(); + file_size= resize_target; + goto unmap_old_checkpoint; + } + else if (c && is_mmap()) + { + unmap_old_checkpoint: + checkpoint_buf= nullptr; + my_munmap(c, lseek(resize_log.m_file, 0, SEEK_END)); + goto first_checkpoint_in_new_archive; + } + else +#endif + if (resize_log.is_opened()) + { + first_lsn+= capacity(); + file_size= resize_target; + +#ifdef HAVE_PMEM + ut_ad(!c == is_mmap()); + if (!c) + { + first_checkpoint_in_new_archive: + c= buf; + ut_ad(!memcmp_aligned<512>(c, field_ref_zero, START_OFFSET)); + } + else +#endif + memset_aligned<512>(c, 0, write_size); + + ut_ad(current_lsn >= first_lsn); + ut_ad(current_lsn < first_lsn + capacity()); + next_checkpoint_no= uint16_t(7 * is_encrypted() + 1); + archive_header_was_reset= true; + + if (is_encrypted()) + log_crypt_write_header(c); + +#ifdef HAVE_PMEM + c= checkpoint_buf; +#endif + } + } + + ut_ad(end_lsn >= first_lsn); + offset= next_checkpoint_no * 4; + ut_ad(offset); + /* In case all slots are filled up, overwrite the last slot. */ + if (offset >= START_OFFSET) + offset= START_OFFSET - 4, next_checkpoint_no= START_OFFSET / 4 - 1; + const lsn_t d{end_lsn - first_lsn + START_OFFSET}; + ut_ad(d <= lsn_t{~uint32_t{0}}); + ut_ad(c == checkpoint_buf); + +#ifdef HAVE_PMEM + if (!c) + { + ut_ad(is_mmap()); + c= buf; + goto archived_mmap; + } + else if (is_mmap()) + { + archived_mmap: + c+= offset; + ut_ad(next_checkpoint_no == uint16_t(7 * is_encrypted() + 1) || + (mach_read_from_4(c - 4) && mach_read_from_4(c - 4) < d)); + ut_ad(!memcmp(c, field_ref_zero, 4)); + mach_write_to_4(my_assume_aligned<4>(c), uint32_t(d)); + c= reinterpret_cast(uintptr_t(c) & ~63); + goto persist_checkpoint; + } + else +#endif + { + const size_t o{offset & (write_size - 1)}; + offset&= ~size_t(write_size - 1); + if (!o) + memset_aligned<512>(c, 0, write_size); + else + ut_ad(next_checkpoint_no == uint16_t(7 * is_encrypted() + 1) || + (mach_read_from_4(c + o - 4) && + mach_read_from_4(c + o - 4) < d)); + ut_ad(!memcmp(c + o, field_ref_zero, 4)); + mach_write_to_4(my_assume_aligned<4>(c + o), uint32_t(d)); + goto write_checkpoint; + } + + goto wrote_checkpoint; + } + + offset= (next_checkpoint_no & 1) ? CHECKPOINT_2 : CHECKPOINT_1; + c= is_mmap() ? buf + offset : checkpoint_buf; + memset_aligned(c, 0, + CPU_LEVEL1_DCACHE_LINESIZE); mach_write_to_8(my_assume_aligned<8>(c), next_checkpoint_lsn); mach_write_to_8(my_assume_aligned<8>(c + 8), end_lsn); mach_write_to_4(my_assume_aligned<4>(c + 60), my_crc32c(0, c, 60)); - lsn_t resizing; - #ifdef HAVE_PMEM if (is_mmap()) { ut_ad(!is_opened()); - resizing= resize_lsn.load(std::memory_order_relaxed); - if (resizing > 1 && resizing <= next_checkpoint_lsn) { memcpy_aligned<64>(resize_buf + CHECKPOINT_1, c, 64); header_write(resize_buf, resizing, is_encrypted()); pmem_persist(resize_buf, resize_target); } + persist_checkpoint: pmem_persist(c, 64); } else #endif { + write_checkpoint: ut_ad(!is_mmap()); ut_ad(!checkpoint_pending); checkpoint_pending= true; latch.wr_unlock(); log_write_and_flush_prepare(); resizing= resize_lsn.load(std::memory_order_relaxed); + ut_ad(!resizing || !archive); ut_ad(ut_is_2pow(write_size)); ut_ad(write_size >= 512); ut_ad(write_size <= 4096); @@ -1882,10 +2010,33 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept resizing= resize_lsn.load(std::memory_order_relaxed); } - ut_ad(!checkpoint_pending); + wrote_checkpoint: next_checkpoint_no++; + + ut_ad(!resizing || !archive); + ut_ad(!checkpoint_pending); const lsn_t checkpoint_lsn{next_checkpoint_lsn}; last_checkpoint_lsn= checkpoint_lsn; + this->end_lsn= end_lsn; + if (!archive) + archived_lsn= end_lsn; + else if (archive_header_was_reset) + { + /* Make the previous archived log file read-only */ +#ifdef _WIN32 + resize_log.close(); + SetFileAttributesA(get_archive_path().c_str(), + FILE_ATTRIBUTE_READONLY | FILE_ATTRIBUTE_ARCHIVE); +#else + struct stat st; + if (!fstat(resize_log.m_file, &st)) + st.st_mode&= 0444; + else + st.st_mode= 0444; + fchmod(resize_log.m_file, st.st_mode); + resize_log.close(); +#endif + } DBUG_PRINT("ib_log", ("checkpoint ended at " LSN_PF ", flushed to " LSN_PF, checkpoint_lsn, get_flushed_lsn())); @@ -1902,6 +2053,7 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept if (resizing > 1 && resizing <= checkpoint_lsn) { + ut_ad(!archive); ut_ad(is_mmap() == !resize_flush_buf); ut_ad(is_mmap() == !resize_log.is_opened()); @@ -1931,7 +2083,7 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept ut_ad(!log.is_opened()); bool success; log.m_file= - os_file_create_func(get_log_file_path().c_str(), OS_FILE_OPEN, + os_file_create_func(get_circular_path().c_str(), OS_FILE_OPEN, OS_LOG_FILE, false, &success); ut_a(success); ut_a(log.is_opened()); @@ -1993,16 +2145,17 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept static bool log_checkpoint_low(lsn_t oldest_lsn, lsn_t end_lsn) noexcept { ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); ut_ad(log_sys.latch_have_wr()); ut_ad(oldest_lsn <= end_lsn); ut_ad(end_lsn == log_sys.get_lsn()); - if (oldest_lsn == log_sys.last_checkpoint_lsn || - (oldest_lsn == end_lsn && - !log_sys.resize_in_progress() && - oldest_lsn == log_sys.last_checkpoint_lsn + - (log_sys.is_encrypted() - ? SIZE_OF_FILE_CHECKPOINT + 8 : SIZE_OF_FILE_CHECKPOINT))) + if (oldest_lsn == end_lsn && oldest_lsn != log_sys.get_first_lsn() && + (oldest_lsn == log_sys.last_checkpoint_lsn || + (!log_sys.resize_in_progress() && + oldest_lsn == log_sys.last_checkpoint_lsn + + (log_sys.is_encrypted() + ? SIZE_OF_FILE_CHECKPOINT + 8 : SIZE_OF_FILE_CHECKPOINT)))) { /* Do nothing, because nothing was logged (other than a FILE_CHECKPOINT record) since the previous checkpoint. */ @@ -2012,7 +2165,7 @@ static bool log_checkpoint_low(lsn_t oldest_lsn, lsn_t end_lsn) noexcept } ut_ad(!recv_no_log_write); - ut_ad(oldest_lsn > log_sys.last_checkpoint_lsn); + ut_ad(oldest_lsn >= log_sys.last_checkpoint_lsn); /* Repeat the FILE_MODIFY records after the checkpoint, in case some log records between the checkpoint and log_sys.lsn need them. Finally, write a FILE_CHECKPOINT record. Redo log apply expects to @@ -2029,7 +2182,8 @@ static bool log_checkpoint_low(lsn_t oldest_lsn, lsn_t end_lsn) noexcept log_sys.latch.wr_unlock(); log_write_up_to(flush_lsn, true); log_sys.latch.wr_lock(SRW_LOCK_CALL); - if (log_sys.last_checkpoint_lsn >= oldest_lsn) + if (log_sys.last_checkpoint_lsn >= oldest_lsn && + log_sys.last_checkpoint_lsn != log_sys.get_first_lsn()) goto do_nothing; ut_ad(log_sys.get_flushed_lsn() >= flush_lsn); @@ -2080,7 +2234,8 @@ static bool log_checkpoint() noexcept ATTRIBUTE_COLD void log_make_checkpoint() noexcept { buf_flush_wait_flushed(log_get_lsn()); - while (!log_checkpoint()); + if (!recv_sys.rpo) + while (!log_checkpoint()); } /** Wait for all dirty pages up to an LSN to be written out. @@ -2162,7 +2317,8 @@ ATTRIBUTE_COLD void buf_flush_wait_flushed(lsn_t sync_lsn) noexcept mysql_mutex_unlock(&buf_pool.flush_list_mutex); - if (UNIV_UNLIKELY(log_sys.last_checkpoint_lsn < sync_lsn)) + if (UNIV_UNLIKELY(log_sys.last_checkpoint_lsn < sync_lsn) && + !recv_sys.rpo) { /* If the buffer pool was clean, no log write was guaranteed to happen until now. There could be an outstanding FILE_CHECKPOINT @@ -2180,11 +2336,13 @@ ATTRIBUTE_COLD void buf_flush_wait_flushed(lsn_t sync_lsn) noexcept ATTRIBUTE_COLD void buf_flush_ahead(lsn_t lsn, bool furious) noexcept { ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); if (recv_recovery_is_on()) recv_sys.apply(true); - DBUG_EXECUTE_IF("ib_log_checkpoint_avoid_hard", return;); + DBUG_EXECUTE_IF("ib_log_checkpoint_avoid_hard", + if (!log_sys.archive) return;); Atomic_relaxed &limit= furious ? buf_flush_sync_lsn : buf_flush_async_lsn; @@ -2225,6 +2383,7 @@ ATTRIBUTE_COLD ATTRIBUTE_NOINLINE static void buf_flush_sync_for_checkpoint(lsn_t lsn) noexcept { ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); mysql_mutex_assert_not_owner(&buf_pool.flush_list_mutex); /* During furious flush, we need to keep generating free pages. Otherwise @@ -2601,7 +2760,7 @@ static void buf_flush_page_cleaner() noexcept IF_DBUG(if (_db_keyword_(nullptr, "ib_log_checkpoint_avoid", 1) || _db_keyword_(nullptr, "ib_log_checkpoint_avoid_hard", 1)) continue,); - if (!recv_recovery_is_on() && + if (!recv_recovery_is_on() && !recv_sys.rpo && !srv_startup_is_before_trx_rollback_phase && srv_operation <= SRV_OPERATION_EXPORT_RESTORED) log_checkpoint(); diff --git a/storage/innobase/dict/dict0crea.cc b/storage/innobase/dict/dict0crea.cc index 8ee7c46739540..482eb77e1c4ac 100644 --- a/storage/innobase/dict/dict0crea.cc +++ b/storage/innobase/dict/dict0crea.cc @@ -1356,10 +1356,12 @@ bool dict_sys_t::load_sys_tables() noexcept dberr_t dict_sys_t::create_or_check_sys_tables() noexcept { + ut_ad(!srv_read_only_mode || recv_sys.rpo); + if (sys_tables_exist()) return DB_SUCCESS; - if (srv_read_only_mode || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO) + if (recv_sys.rpo || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO) return DB_READ_ONLY; if (load_sys_tables()) diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc index 9e7a3fc4d0258..29fb6b8b05441 100644 --- a/storage/innobase/dict/dict0dict.cc +++ b/storage/innobase/dict/dict0dict.cc @@ -926,6 +926,8 @@ void dict_sys_t::create() noexcept { ut_ad(this == &dict_sys); ut_ad(!is_initialised()); + ut_ad(!srv_read_only_mode || recv_sys.rpo); + m_initialised= true; UT_LIST_INIT(table_LRU, &dict_table_t::table_LRU); UT_LIST_INIT(table_non_LRU, &dict_table_t::table_LRU); @@ -939,7 +941,7 @@ void dict_sys_t::create() noexcept latch.SRW_LOCK_INIT(dict_operation_lock_key); - if (!srv_read_only_mode) + if (!recv_sys.rpo) { dict_foreign_err_file= os_file_create_tmpfile(); ut_a(dict_foreign_err_file); @@ -3420,6 +3422,8 @@ dict_foreign_parse_drop_constraints( bool if_exists = false; ut_a(trx->mysql_thd); + ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); cs = trx->mysql_thd->charset(); @@ -3489,9 +3493,7 @@ dict_foreign_parse_drop_constraints( if (if_exists) { goto loop; - } - - if (!srv_read_only_mode) { + } else { FILE* ef = dict_foreign_err_file; mysql_mutex_lock(&dict_foreign_err_mutex); @@ -3517,7 +3519,7 @@ dict_foreign_parse_drop_constraints( goto loop; syntax_error: - if (!srv_read_only_mode) { + { FILE* ef = dict_foreign_err_file; mysql_mutex_lock(&dict_foreign_err_mutex); diff --git a/storage/innobase/dict/dict0stats_bg.cc b/storage/innobase/dict/dict0stats_bg.cc index 135ffb6000580..40e70c185c5a5 100644 --- a/storage/innobase/dict/dict0stats_bg.cc +++ b/storage/innobase/dict/dict0stats_bg.cc @@ -76,6 +76,7 @@ thread de-initialization. */ static void dict_stats_recalc_pool_deinit() { ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); recalc_pool.clear(); /* @@ -102,6 +103,7 @@ then it will be removed from the pool and skipped. */ static void dict_stats_recalc_pool_add(table_id_t id) { ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); ut_ad(id); bool schedule = false; mysql_mutex_lock(&recalc_pool_mutex); @@ -199,6 +201,7 @@ no statistics are being updated on it. */ void dict_stats_recalc_pool_del(table_id_t id, bool have_mdl_exclusive) { ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); ut_ad(id); mysql_mutex_lock(&recalc_pool_mutex); @@ -245,6 +248,7 @@ Must be called before dict_stats_thread() is started. */ void dict_stats_init() { ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); mysql_mutex_init(recalc_pool_mutex_key, &recalc_pool_mutex, nullptr); pthread_cond_init(&recalc_pool_cond, nullptr); stats_initialised= true; @@ -260,6 +264,8 @@ void dict_stats_deinit() } ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); + stats_initialised = false; dict_stats_recalc_pool_deinit(); @@ -275,6 +281,7 @@ update its stats. static bool dict_stats_process_entry_from_recalc_pool(THD *thd) { ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); table_id_t table_id; mysql_mutex_lock(&recalc_pool_mutex); next_table_id_with_mutex: diff --git a/storage/innobase/fil/fil0crypt.cc b/storage/innobase/fil/fil0crypt.cc index f7911dd11fd16..72f2bb2b4bc88 100644 --- a/storage/innobase/fil/fil0crypt.cc +++ b/storage/innobase/fil/fil0crypt.cc @@ -333,7 +333,7 @@ void fil_space_destroy_crypt_data(fil_space_crypt_t **crypt_data) *crypt_data = NULL; mysql_mutex_unlock(&fil_crypt_threads_mutex); } else { - ut_ad(srv_read_only_mode || !srv_was_started); + ut_ad(recv_sys.rpo || !srv_was_started); c = *crypt_data; *crypt_data = NULL; } @@ -2108,7 +2108,9 @@ Adjust thread count for key rotation @param[in] enw_cnt Number of threads to be used */ void fil_crypt_set_thread_cnt(const uint new_cnt) { - if (srv_read_only_mode) + ut_ad(!srv_read_only_mode || recv_sys.rpo); + + if (recv_sys.rpo) return; if (!fil_crypt_threads_inited) { @@ -2265,6 +2267,7 @@ Init threads for key rotation */ void fil_crypt_threads_init() { ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); if (!fil_crypt_threads_inited) { pthread_cond_init(&fil_crypt_cond, nullptr); diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index f6f2331f6d258..067788bd8f69b 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -3112,10 +3112,16 @@ void fil_names_dirty(fil_space_t *space) noexcept { ut_ad(log_sys.latch_have_wr()); ut_ad(recv_recovery_is_on()); + ut_ad(!srv_read_only_mode); ut_ad(log_sys.get_lsn() != 0); ut_ad(space->max_lsn == 0); ut_d(fil_space_validate_for_mtr_commit(space)); + if (UNIV_UNLIKELY(recv_sys.rpo != 0)) { + /* The log is read-only; do not write to it */ + return; + } + fil_system.named_spaces.push_back(*space); space->max_lsn = log_sys.get_lsn(); } diff --git a/storage/innobase/fts/fts0fts.cc b/storage/innobase/fts/fts0fts.cc index dac0f95d950d7..11839fbc6bd3f 100644 --- a/storage/innobase/fts/fts0fts.cc +++ b/storage/innobase/fts/fts0fts.cc @@ -2638,7 +2638,9 @@ fts_cmp_set_sync_doc_id( doc_id_t *doc_id, trx_t *trx=nullptr) { - if (srv_read_only_mode) { + ut_ad(!srv_read_only_mode || recv_sys.rpo); + + if (recv_sys.rpo) { return DB_READ_ONLY; } @@ -2726,7 +2728,9 @@ fts_update_sync_doc_id( fts_cache_t* cache = table->fts->cache; char fts_name[MAX_FULL_NAME_LEN]; - if (srv_read_only_mode) { + ut_ad(!srv_read_only_mode || recv_sys.rpo); + + if (recv_sys.rpo) { return DB_READ_ONLY; } @@ -2945,7 +2949,9 @@ fts_commit_table( /*=============*/ fts_trx_table_t* ftt) /*!< in: FTS table to commit*/ { - if (srv_read_only_mode) { + ut_ad(!srv_read_only_mode || recv_sys.rpo); + + if (recv_sys.rpo) { return DB_READ_ONLY; } @@ -4275,7 +4281,9 @@ fts_sync( bool unlock_cache, bool wait) { - if (srv_read_only_mode) { + ut_ad(!srv_read_only_mode || recv_sys.rpo); + + if (recv_sys.rpo) { return DB_READ_ONLY; } diff --git a/storage/innobase/fts/fts0opt.cc b/storage/innobase/fts/fts0opt.cc index cac29121940d5..a2cdf65714842 100644 --- a/storage/innobase/fts/fts0opt.cc +++ b/storage/innobase/fts/fts0opt.cc @@ -2434,7 +2434,9 @@ fts_optimize_table( /*===============*/ dict_table_t* table) /*!< in: table to optimiza */ { - if (srv_read_only_mode) { + ut_ad(!srv_read_only_mode || recv_sys.rpo); + + if (recv_sys.rpo) { return DB_READ_ONLY; } @@ -2818,6 +2820,7 @@ Optimize all FTS tables. static void fts_optimize_callback(void *) { ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); static ulint current; static bool done; @@ -2955,6 +2958,7 @@ fts_optimize_init(void) ib_alloc_t* heap_alloc; ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); /* For now we only support one optimize thread. */ ut_a(!fts_optimize_wq); @@ -2998,6 +3002,7 @@ void fts_optimize_shutdown() { ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); /* If there is an ongoing activity on dictionary, such as srv_master_evict_from_table_cache(), wait for it */ diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 921331fda4ec7..4e42a32ce8ffa 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -950,6 +950,7 @@ static SHOW_VAR innodb_status_variables[]= { {"lsn_flushed", &export_vars.innodb_lsn_flushed, SHOW_ULONGLONG}, {"lsn_last_checkpoint", &export_vars.innodb_lsn_last_checkpoint, SHOW_ULONGLONG}, + {"lsn_archived", &log_sys.archived_lsn, SHOW_ULONGLONG}, {"master_thread_active_loops", &srv_main_active_loops, SHOW_SIZE_T}, {"master_thread_idle_loops", &srv_main_idle_loops, SHOW_SIZE_T}, {"max_trx_id", &export_vars.innodb_max_trx_id, SHOW_ULONGLONG}, @@ -1497,7 +1498,9 @@ innobase_start_trx_and_assign_read_view( @return false */ static bool innobase_flush_logs(handlerton*) { - if (!srv_read_only_mode) + ut_ad(!srv_read_only_mode || recv_sys.rpo); + + if (!recv_sys.rpo) /* Write and flush any outstanding redo log. */ log_buffer_flush_to_disk(true); return false; @@ -1941,9 +1944,11 @@ static void drop_garbage_tables_after_restore() static int innodb_ddl_recovery_done(handlerton*) { + ut_ad(!srv_read_only_mode || recv_sys.rpo); ut_ad(!ddl_recovery_done); ut_d(ddl_recovery_done= true); - if (!srv_read_only_mode && srv_operation <= SRV_OPERATION_EXPORT_RESTORED && + + if (!recv_sys.rpo && srv_operation <= SRV_OPERATION_EXPORT_RESTORED && srv_force_recovery < SRV_FORCE_NO_BACKGROUND) { if (srv_start_after_restore && !high_level_read_only) @@ -3694,6 +3699,9 @@ compression_algorithm_is_not_loaded(ulong compression_algorithm, myf flags) return 1; } +/** Initial value of innodb_lsn_archived */ +static uint64_t innodb_log_archive_start; + /** Initialize, validate and normalize the InnoDB startup parameters. @return failure code @retval 0 on success @@ -3982,6 +3990,32 @@ static int innodb_init_params() skip_buffering_tweak: #endif + log_sys.archived_lsn= innodb_log_archive_start; + + if (recv_sys.recovery_start && + log_sys.archived_lsn > recv_sys.recovery_start) + { + sql_print_error("InnoDB: innodb_log_archive_start=" LSN_PF + " is after innodb_log_recovery_start=" LSN_PF, + log_sys.archived_lsn, recv_sys.recovery_start); + DBUG_RETURN(HA_ERR_INITIALIZATION); + } + + if (recv_sys.rpo && recv_sys.recovery_start > recv_sys.rpo) + { + sql_print_error("InnoDB: innodb_log_recovery_start=" LSN_PF + " is after innodb_log_recovery_target=" LSN_PF, + recv_sys.recovery_start, recv_sys.rpo); + DBUG_RETURN(HA_ERR_INITIALIZATION); + } + + if (log_sys.archive && srv_log_file_size > log_sys.ARCHIVE_FILE_SIZE_MAX) + { + sql_print_error("InnoDB: innodb_log_archive=ON" + " disallows innodb_log_file_size>4G"); + DBUG_RETURN(HA_ERR_INITIALIZATION); + } + if (!tpool::supports_native_aio()) srv_use_native_aio= FALSE; @@ -4145,6 +4179,9 @@ static int innodb_init(void* p) err = srv_start(create_new_db); if (err != DB_SUCCESS) { + if (!recv_sys.rpo) { + recv_sys.rpo = srv_read_only_mode; + } innodb_shutdown(); DBUG_RETURN(innodb_init_abort()); } @@ -5673,6 +5710,7 @@ dberr_t ha_innobase::statistics_init(dict_table_t *table, bool recalc) { ut_ad(table->is_readable()); ut_ad(!table->stats_mutex_is_owner()); + ut_ad(!srv_read_only_mode || recv_sys.rpo); uint32_t stat= table->stat; dberr_t err= DB_SUCCESS; @@ -5682,7 +5720,7 @@ dberr_t ha_innobase::statistics_init(dict_table_t *table, bool recalc) dict_stats_empty_table(table); else { - if (dict_table_t::stats_is_persistent(stat) && !srv_read_only_mode + if (dict_table_t::stats_is_persistent(stat) && !recv_sys.rpo #ifdef WITH_WSREP && !wsrep_thd_skip_locking(m_user_thd) #endif @@ -7987,6 +8025,7 @@ calc_row_difference( const bool skip_virtual = ha_innobase::omits_virtual_cols(*table->s); ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); clust_index = dict_table_get_first_index(prebuilt->table); auto_inc = 0; @@ -12081,6 +12120,17 @@ int create_table_info_t::prepare_create_table(const char* name, bool strict) DBUG_RETURN(parse_table_name(name)); } +/********************************************************************//** +Helper function to push warnings from InnoDB internals to SQL-layer. */ +static +void +ib_foreign_warn( + trx_t* trx, /*!< in: trx */ + dberr_t error, /*!< in: error code to push as warning */ + const char *table_name, + const char *format,/*!< in: warning message */ + ...); + /** Push warning message to SQL-layer based on foreign key constraint index match error. @param[in] trx Current transaction @@ -13762,6 +13812,7 @@ static dberr_t innobase_rename_table(trx_t *trx, const char *from, DBUG_ASSERT(trx->dict_operation); ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); normalize_table_name(norm_to, to); normalize_table_name(norm_from, from); @@ -14748,6 +14799,8 @@ ha_innobase::info_low( DBUG_ENTER("info"); + ut_ad(!srv_read_only_mode || recv_sys.rpo); + DEBUG_SYNC_C("ha_innobase_info_low"); /* If we are forcing recovery at a high level, we will suppress @@ -14778,7 +14831,7 @@ ha_innobase::info_low( m_prebuilt->trx->op_info = "updating table statistics"; if (ib_table->stats_is_persistent() - && !srv_read_only_mode + && !recv_sys.rpo && dict_stats_persistent_storage_check(false) == SCHEMA_OK) { if (is_analyze) { @@ -16070,6 +16123,8 @@ ha_innobase::external_lock( DBUG_ENTER("ha_innobase::external_lock"); DBUG_PRINT("enter",("lock_type: %d", lock_type)); + ut_ad(!srv_read_only_mode || recv_sys.rpo); + update_thd(thd); trx_t* trx = m_prebuilt->trx; ut_ad(m_prebuilt->table); @@ -16128,7 +16183,7 @@ ha_innobase::external_lock( const auto sql_command = thd_sql_command(thd); /* Check for UPDATEs in read-only mode. */ - if (srv_read_only_mode) { + if (recv_sys.rpo) { switch (sql_command) { case SQLCOM_CREATE_TABLE: if (lock_type != F_WRLCK) { @@ -16181,7 +16236,7 @@ ha_innobase::external_lock( switch (m_prebuilt->table->quiesce) { case QUIESCE_START: /* Check for FLUSH TABLE t WITH READ LOCK; */ - if (!srv_read_only_mode + if (!recv_sys.rpo && sql_command == SQLCOM_FLUSH && lock_type == F_RDLCK) { @@ -16357,7 +16412,9 @@ innodb_show_status( /* We don't create the temp files or associated mutexes in read-only-mode */ - if (srv_read_only_mode) { + ut_ad(!srv_read_only_mode || recv_sys.rpo); + + if (recv_sys.rpo) { DBUG_RETURN(0); } @@ -16509,6 +16566,8 @@ ha_innobase::store_lock( 'lock'; this may also be TL_IGNORE */ { + ut_ad(!srv_read_only_mode || recv_sys.rpo); + /* Note that trx in this function is NOT necessarily m_prebuilt->trx because we call update_thd() later, in ::external_lock()! Failure to understand this caused a serious memory corruption bug in 5.1.11. */ @@ -16550,7 +16609,7 @@ ha_innobase::store_lock( const bool in_lock_tables = thd_in_lock_tables(thd); const int sql_command = thd_sql_command(thd); - if (srv_read_only_mode + if (recv_sys.rpo && (sql_command == SQLCOM_UPDATE || sql_command == SQLCOM_INSERT || sql_command == SQLCOM_REPLACE @@ -17436,6 +17495,8 @@ fast_shutdown_validate( for update function */ struct st_mysql_value* value) /*!< in: incoming string */ { + ut_ad(!srv_read_only_mode || recv_sys.rpo); + if (check_sysvar_int(thd, var, save, value)) { return(1); } @@ -17443,7 +17504,7 @@ fast_shutdown_validate( uint new_val = *reinterpret_cast(save); if (srv_fast_shutdown && !new_val - && !srv_read_only_mode && abort_loop) { + && !recv_sys.rpo && abort_loop) { return(1); } @@ -18316,10 +18377,12 @@ static void checkpoint_now_set(THD* thd, st_mysql_sys_var*, void*, const void *save) { + ut_ad(!srv_read_only_mode || recv_sys.rpo); + if (!*static_cast(save)) return; - if (srv_read_only_mode) + if (recv_sys.rpo) { push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, HA_ERR_UNSUPPORTED, @@ -18446,7 +18509,9 @@ buffer_pool_dump_now( const void* save) /*!< in: immediate result from check function */ { - if (*(my_bool*) save && !srv_read_only_mode) { + ut_ad(!srv_read_only_mode || recv_sys.rpo); + + if (*(my_bool*) save && !recv_sys.rpo) { mysql_mutex_unlock(&LOCK_global_system_variables); buf_dump_start(); mysql_mutex_lock(&LOCK_global_system_variables); @@ -18471,7 +18536,9 @@ buffer_pool_load_now( const void* save) /*!< in: immediate result from check function */ { - if (*(my_bool*) save && !srv_read_only_mode) { + ut_ad(!srv_read_only_mode || recv_sys.rpo); + + if (*(my_bool*) save && !recv_sys.rpo) { mysql_mutex_unlock(&LOCK_global_system_variables); buf_load_start(); mysql_mutex_lock(&LOCK_global_system_variables); @@ -18496,7 +18563,9 @@ buffer_pool_load_abort( const void* save) /*!< in: immediate result from check function */ { - if (*(my_bool*) save && !srv_read_only_mode) { + ut_ad(!srv_read_only_mode || recv_sys.rpo); + + if (*(my_bool*) save && !recv_sys.rpo) { mysql_mutex_unlock(&LOCK_global_system_variables); buf_load_abort(); mysql_mutex_lock(&LOCK_global_system_variables); @@ -18540,7 +18609,8 @@ static void innodb_data_file_write_through_update(THD *, st_mysql_sys_var*, static void innodb_doublewrite_update(THD *, st_mysql_sys_var*, void *, const void *save) { - if (!srv_read_only_mode) + ut_ad(!srv_read_only_mode || recv_sys.rpo); + if (!recv_sys.rpo) fil_system.set_use_doublewrite(*static_cast(save)); } @@ -19048,7 +19118,7 @@ static MYSQL_SYSVAR_ENUM(flush_method, innodb_flush_method, static MYSQL_SYSVAR_STR(log_group_home_dir, srv_log_group_home_dir, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Path to ib_logfile0", NULL, NULL, NULL); + "Path to ib_logfile0 or ib_*.log", NULL, NULL, NULL); static MYSQL_SYSVAR_DOUBLE(max_dirty_pages_pct, srv_max_buf_pool_modified_pct, PLUGIN_VAR_RQCMDARG, @@ -19412,7 +19482,7 @@ static MYSQL_SYSVAR_UINT(log_buffer_size, log_sys.buf_size, NULL, NULL, 16U << 20, 2U << 20, log_sys.buf_size_max, 4096); static constexpr const char *innodb_log_file_mmap_description= - "Whether ib_logfile0" + "Whether the log" " resides in persistent memory (when supported) or" " should initially be memory-mapped"; static MYSQL_SYSVAR_BOOL(log_file_mmap, log_sys.log_mmap, @@ -19429,7 +19499,7 @@ static MYSQL_SYSVAR_BOOL(log_file_buffering, log_sys.log_buffered, static MYSQL_SYSVAR_BOOL(log_file_write_through, log_sys.log_write_through, PLUGIN_VAR_OPCMDARG, - "Whether each write to ib_logfile0 is write through", + "Whether each write to the log is write through", nullptr, innodb_log_file_write_through_update, FALSE); static MYSQL_SYSVAR_BOOL(data_file_buffering, fil_system.buffered, @@ -19442,11 +19512,37 @@ static MYSQL_SYSVAR_BOOL(data_file_write_through, fil_system.write_through, "Whether each write to data files writes through", nullptr, innodb_data_file_write_through_update, FALSE); +static void innodb_log_archive_update(THD *, st_mysql_sys_var*, + void *, const void *save) noexcept +{ + log_sys.set_archive(*static_cast(save)); +} + +static MYSQL_SYSVAR_BOOL(log_archive, log_sys.archive, + PLUGIN_VAR_OPCMDARG, + "Whether log archiving is desired", + nullptr, innodb_log_archive_update, FALSE); + +static MYSQL_SYSVAR_UINT64_T(log_archive_start, innodb_log_archive_start, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "initial value of innodb_lsn_archived; 0=auto-detect", + nullptr, nullptr, 0, 0, std::numeric_limits::max(), 0); + +static MYSQL_SYSVAR_UINT64_T(log_recovery_start, recv_sys.recovery_start, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "LSN to start recovery from (0=automatic)", + nullptr, nullptr, 0, 0, std::numeric_limits::max(), 0); + +static MYSQL_SYSVAR_UINT64_T(log_recovery_target, recv_sys.rpo, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "recovery point objective (end LSN; 0=unlimited)", + nullptr, nullptr, 0, 0, std::numeric_limits::max(), 0); + static MYSQL_SYSVAR_ULONGLONG(log_file_size, srv_log_file_size, PLUGIN_VAR_RQCMDARG, - "Redo log size in bytes.", + "Desired log file size in bytes", nullptr, innodb_log_file_size_update, - 96 << 20, 4 << 20, std::numeric_limits::max(), 4096); + 96 << 20, log_t::FILE_SIZE_MIN, std::numeric_limits::max(), 4096); static uint innodb_log_spin_wait_delay; @@ -19876,6 +19972,10 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(log_file_write_through), MYSQL_SYSVAR(data_file_buffering), MYSQL_SYSVAR(data_file_write_through), + MYSQL_SYSVAR(log_archive), + MYSQL_SYSVAR(log_archive_start), + MYSQL_SYSVAR(log_recovery_start), + MYSQL_SYSVAR(log_recovery_target), MYSQL_SYSVAR(log_file_size), MYSQL_SYSVAR(log_write_ahead_size), MYSQL_SYSVAR(log_spin_wait_delay), diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index 8905f8f8bc028..20077780fff1e 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -7999,6 +7999,7 @@ ha_innobase::prepare_inplace_alter_table( DBUG_ASSERT(!ha_alter_info->handler_ctx); DBUG_ASSERT(ha_alter_info->create_info); DBUG_ASSERT(!srv_read_only_mode); + DBUG_ASSERT(!recv_sys.rpo); /* Init online ddl status variables */ onlineddl_rowlog_rows = 0; @@ -8869,6 +8870,7 @@ ha_innobase::inplace_alter_table( bool rebuild_templ = false; DBUG_ENTER("inplace_alter_table"); DBUG_ASSERT(!srv_read_only_mode); + DBUG_ASSERT(!recv_sys.rpo); DEBUG_SYNC(m_user_thd, "innodb_inplace_alter_table_enter"); @@ -11333,6 +11335,7 @@ ha_innobase::commit_inplace_alter_table( DBUG_ENTER("commit_inplace_alter_table"); DBUG_ASSERT(!srv_read_only_mode); + DBUG_ASSERT(!recv_sys.rpo); DBUG_ASSERT(!ctx0 || ctx0->prebuilt == m_prebuilt); DBUG_ASSERT(!ctx0 || ctx0->old_table == m_prebuilt->table); diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc index 46f7f1f923ed9..1e9c947bb573f 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.cc +++ b/storage/innobase/ibuf/ibuf0ibuf.cc @@ -891,11 +891,8 @@ static dberr_t ibuf_open(btr_cur_t *cur, mtr_t *mtr) ATTRIBUTE_COLD dberr_t ibuf_upgrade() { - if (srv_read_only_mode) - { - sql_print_error("InnoDB: innodb_read_only_mode prevents an upgrade"); - return DB_READ_ONLY; - } + ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); sql_print_information("InnoDB: Upgrading the change buffer"); @@ -1061,14 +1058,25 @@ dberr_t ibuf_upgrade_needed() err= DB_CORRUPTION; goto err_exit; } - else if (srv_read_only_mode) - { - sql_print_error("InnoDB: innodb_read_only=ON prevents an upgrade" - " of the change buffer"); - err= DB_READ_ONLY; - } - else if (srv_force_recovery != SRV_FORCE_NO_LOG_REDO) - err= DB_FAIL; + else + do + { + const char *reason; + if (srv_read_only_mode) + reason= "read_only=ON"; + else if (recv_sys.rpo) + reason= "log_recovery_target"; + else + { + if (srv_force_recovery != SRV_FORCE_NO_LOG_REDO) + err= DB_FAIL; + continue; + } + sql_print_error("InnoDB: innodb_%s prevents an upgrade" + " of the change buffer", reason); + err= DB_READ_ONLY; + } + while (false); goto func_exit; } diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h index 42edb3ab4315c..a87ca8715679f 100644 --- a/storage/innobase/include/ha_prototypes.h +++ b/storage/innobase/include/ha_prototypes.h @@ -369,16 +369,6 @@ ATTRIBUTE_COLD void innodb_fk_error(const trx_t *trx, dberr_t err, const char *name, const dict_foreign_t& foreign); -/********************************************************************//** -Helper function to push warnings from InnoDB internals to SQL-layer. */ -void -ib_foreign_warn( - trx_t* trx, /*!< in: trx */ - dberr_t error, /*!< in: error code to push as warning */ - const char *table_name, - const char *format,/*!< in: warning message */ - ...); - /** Normalizes a table name string. A normalized name consists of the database name catenated to '/' and table name. For example: test/mytable. diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h index e80011a9c4c50..b78cf85c05f7c 100644 --- a/storage/innobase/include/log0log.h +++ b/storage/innobase/include/log0log.h @@ -35,22 +35,6 @@ Created 12/9/1995 Heikki Tuuri using st_::span; -static const char LOG_FILE_NAME_PREFIX[] = "ib_logfile"; -static const char LOG_FILE_NAME[] = "ib_logfile0"; - -/** Composes full path for a redo log file -@param[in] filename name of the redo log file -@return path with log file name*/ -std::string get_log_file_path(const char *filename= LOG_FILE_NAME); - -/** Delete log file. -@param[in] suffix suffix of the file name */ -static inline void delete_log_file(const char* suffix) -{ - auto path = get_log_file_path(LOG_FILE_NAME_PREFIX).append(suffix); - os_file_delete_if_exists_func(path.c_str(), nullptr); -} - struct completion_callback; /** Ensure that the log has been written to the log file up to a given @@ -127,9 +111,12 @@ class log_file_t bool flush() const noexcept { return os_file_flush(m_file); } }; +struct recv_warp; + /** Redo log buffer */ struct log_t { + friend recv_warp; /** The maximum buf_size */ static constexpr unsigned buf_size_max= os_file_request_size_max; @@ -186,6 +173,13 @@ struct log_t public: /** innodb_log_buffer_size (usable append_prepare() size in bytes) */ unsigned buf_size; + /** set when there may be need to initiate a log checkpoint. + This must hold if lsn - last_checkpoint_lsn > max_checkpoint_age. */ + std::atomic need_checkpoint; + /** whether a checkpoint is pending; protected by latch.wr_lock() */ + Atomic_relaxed checkpoint_pending; + /** next checkpoint number (protected by latch.wr_lock()) */ + uint16_t next_checkpoint_no; /** log file size in bytes, including the header */ lsn_t file_size; @@ -211,6 +205,40 @@ struct log_t /** latch_have_wr() for checkpoint, latch_have_any() for append_prepare() */ log_rwlock latch; + /** current innodb_log_write_ahead_size */ + uint write_size; + /** format of the redo log: e.g., FORMAT_10_8 */ + uint32_t format; + /** the minimum log file size */ + static constexpr lsn_t FILE_SIZE_MIN{4 << 20}; + /** the maximum log file size in innodb_log_archive=ON format */ + static constexpr lsn_t ARCHIVE_FILE_SIZE_MAX{1ULL << 32}; + /** the current value of innodb_log_archive; protected by latch.wr_lock() */ + my_bool archive; + /** whether the memory-mapped interface is enabled for the log */ + my_bool log_mmap; + /** the default value of log_mmap */ + static constexpr bool log_mmap_default= +# if defined __linux__ /* MAP_POPULATE would enable read-ahead */ + true || +# elif defined __FreeBSD__ /* MAP_PREFAULT_READ would enable read-ahead */ + true || +# else /* an unnecessary read-ahead of a large ib_logfile0 is a risk */ +# endif + false; +#if defined __linux__ || defined _WIN32 + /** whether file system caching is enabled for the log */ + my_bool log_buffered; +# ifdef _WIN32 + static constexpr bool log_maybe_unbuffered= true; +# else + /** whether file system caching may be disabled */ + bool log_maybe_unbuffered; +# endif +#endif + /** whether each write to ib_logfile0 is durable (O_DSYNC) */ + my_bool log_write_through; + /** log record buffer, written to by mtr_t::commit() */ alignas(CPU_LEVEL1_DCACHE_LINESIZE) byte *buf; @@ -230,13 +258,6 @@ struct log_t In write_buf(), buf and flush_buf may be swapped */ byte *flush_buf; - /** set when there may be need to initiate a log checkpoint. - This must hold if lsn - last_checkpoint_lsn > max_checkpoint_age. */ - std::atomic need_checkpoint; - /** whether a checkpoint is pending; protected by latch.wr_lock() */ - Atomic_relaxed checkpoint_pending; - /** next checkpoint number (protected by latch.wr_lock()) */ - byte next_checkpoint_no; /** Log sequence number when a log file overwrite (broken crash recovery) was noticed. Protected by latch.wr_lock(). */ lsn_t overwrite_warned; @@ -247,15 +268,24 @@ struct log_t lsn_t (*writer)() noexcept; /** next checkpoint LSN (protected by latch.wr_lock()) */ lsn_t next_checkpoint_lsn; + /** end_lsn of the first available checkpoint, or 0; + protected by latch.wr_lock() */ + lsn_t archived_lsn; /** Log file */ log_file_t log; private: - /** Log file being constructed during resizing; protected by latch */ + /** Log file being constructed during resizing, + or the previous archived log file; protected by latch */ log_file_t resize_log; - /** size of resize_log; protected by latch */ + /** size of resize_log, or the requested innodb_log_file_size + of the next file created if archive==TRUE; protected by latch */ lsn_t resize_target; - /** Buffer for writing to resize_log; @see buf */ + /** Buffer for writing to resize_log; @see buf + Also a spare buffer between archived_mmap_switch_prepare() + and archived_mmap_switch_complete(), + or archived_mmap_switch_recovery_prepare() + and archived_mmap_switch_recovery_complete(). */ byte *resize_buf; /** Buffer for writing to resize_log; @see flush_buf */ byte *resize_flush_buf; @@ -263,37 +293,11 @@ struct log_t /** log sequence number when log resizing was initiated; 0 if the log is not being resized, 1 if resize_start() is in progress */ std::atomic resize_lsn; - /** the log sequence number at the start of the log file */ + /** the log sequence number at the start of the current log file */ lsn_t first_lsn; + /** the log sequence number when the latest checkpoint was initiated */ + lsn_t end_lsn; public: - /** current innodb_log_write_ahead_size */ - uint write_size; - /** format of the redo log: e.g., FORMAT_10_8 */ - uint32_t format; - /** whether the memory-mapped interface is enabled for the log */ - my_bool log_mmap; - /** the default value of log_mmap */ - static constexpr bool log_mmap_default= -# if defined __linux__ /* MAP_POPULATE would enable read-ahead */ - true || -# elif defined __FreeBSD__ /* MAP_PREFAULT_READ would enable read-ahead */ - true || -# else /* an unnecessary read-ahead of a large ib_logfile0 is a risk */ -# endif - false; -#if defined __linux__ || defined _WIN32 - /** whether file system caching is enabled for the log */ - my_bool log_buffered; -# ifdef _WIN32 - static constexpr bool log_maybe_unbuffered= true; -# else - /** whether file system caching may be disabled */ - bool log_maybe_unbuffered; -# endif -#endif - /** whether each write to ib_logfile0 is durable (O_DSYNC) */ - my_bool log_write_through; - /** Fields involved in checkpoints @{ */ lsn_t log_capacity; /*!< capacity of the log; if the checkpoint age exceeds this, it is @@ -311,7 +315,9 @@ struct log_t for lsn - last_checkpoint_lsn when a new query step is started */ - /** buffer for checkpoint header */ + /** buffer for checkpoint header; protected by latch; + nullptr if is_mmap() and at most one log file is open; + a pointer to the oldest open archive log file if is_mmap() */ byte *checkpoint_buf; /* @} */ @@ -333,7 +339,7 @@ struct log_t bool is_mmap() const noexcept { return !flush_buf; } /** @return whether a handle to the log is open; - is_mmap() && !is_opened() holds for PMEM */ + is_mmap() && (is_opened() == archive) holds for PMEM */ bool is_opened() const noexcept { return log.is_opened(); } /** @return LSN at which log resizing was started and is still in progress @@ -362,14 +368,33 @@ struct log_t { return thd == resize_initiator; } /** Replicate a write to the log. + @tparam mmap whether the memory-mapped interface is enabled @param lsn start LSN @param end end of the mini-transaction @param len length of the mini-transaction @param seq offset of the sequence bit from the end */ + template inline void resize_write(lsn_t lsn, const byte *end, - size_t len, size_t seq) noexcept; + size_t len, size_t seq) noexcept + { + if (UNIV_LIKELY_NULL(resize_buf)) + resize_write_low(lsn, end, len, seq); + } + + /** SET GLOBAL innodb_log_archive + @param archive the new value of innodb_log_archive */ + void set_archive(my_bool archive) noexcept; private: + /** Replicate a write to the log. + @tparam mmap whether the memory-mapped interface is enabled + @param lsn start LSN + @param end end of the mini-transaction + @param len length of the mini-transaction + @param seq offset of the sequence bit from the end */ + template + ATTRIBUTE_COLD void resize_write_low(lsn_t lsn, const byte *end, + size_t len, size_t seq) noexcept; /** Write resize_buf to resize_log. @param b resize_buf or resize_flush_buf @param length the used length of b */ @@ -380,23 +405,19 @@ struct log_t @return whether an error occurred */ static bool resize_rename() noexcept; - /** @return pointer for writing to resize_buf - @retval nullptr if no is_mmap() based resizing is active */ - inline byte *resize_buf_begin(lsn_t lsn) const noexcept; - /** @return end of resize_buf */ - inline const byte *resize_buf_end() const noexcept - { return resize_buf + resize_target; } - /** Initialise the redo log subsystem. */ void create() noexcept; /** Attach a log file. @return whether the memory allocation succeeded */ - bool attach(log_file_t file, os_offset_t size) noexcept; + bool attach(log_file_t file, os_offset_t size, bool read_only) noexcept; /** Disable memory-mapped access (update log_mmap) */ void clear_mmap() noexcept; void close_file(bool really_close= true) noexcept; + /** Stash a log archive file in multi-file recovery */ + inline void stash_archive_file() noexcept; + #if defined __linux__ || defined _WIN32 /** Try to enable or disable file system caching (update log_buffered) */ void set_buffered(bool buffered) noexcept; @@ -413,6 +434,10 @@ struct log_t @param encrypted whether the log is encrypted */ static void header_write(byte *buf, lsn_t lsn, bool encrypted) noexcept; + /** Rewrite the log file header in set_archive() + @param archive the new value of innodb_log_archive */ + void header_rewrite(my_bool archive) noexcept; + /** @return an estimate of get_lsn(), using acquire-release ordering with write_buf() or persist(); an upper bound if said functions have updated only one of the fields, @@ -434,6 +459,13 @@ struct log_t (write_lsn_offset & (WRITE_BACKOFF - 1)); } + /** @return whether a back-off in a log write is in progress */ + bool is_backoff() const noexcept + { + ut_ad(latch_have_wr()); + return write_lsn_offset & WRITE_BACKOFF; + } + lsn_t get_flushed_lsn(std::memory_order order= std::memory_order_acquire) const noexcept { return flushed_to_disk_lsn.load(order); } @@ -455,7 +487,34 @@ struct log_t /** Persist the log. @param lsn desired new value of flushed_to_disk_lsn */ void persist(lsn_t lsn) noexcept; + /** @return the overflow buffer when ARCHIVED_MMAP is wrapping around */ + byte *get_archived_mmap_switch() const noexcept + { + ut_ad(archived_mmap_switch()); + return resize_buf + START_OFFSET; + } #endif + /** @return whether archived_mmap_switch_complete() needs to be called */ + bool archived_mmap_switch() const noexcept + { + ut_ad(latch_have_any()); + return UNIV_UNLIKELY(archive && resize_buf); + } + /** Create a new log file when the current one will fill up. + @param buf log records to append + @param length size of the log records, in bytes + @param offset log file offset */ + ATTRIBUTE_COLD void archive_new_write(const byte *buf, size_t length, + lsn_t offset) noexcept; + + /** Ensure that innodb_log_archive=ON will default to the current + innodb_log_file_size if no size has been specified. */ + void archive_set_size() noexcept + { + ut_ad(!resize_in_progress()); + if (!resize_target) + resize_target= file_size; + } bool check_for_checkpoint() const { @@ -489,13 +548,71 @@ struct log_t @param late whether the WRITE_BACKOFF flag had already been set @param ex whether log_sys.latch is exclusively locked */ ATTRIBUTE_COLD void append_prepare_wait(bool late, bool ex) noexcept; +#ifdef HAVE_PMEM + /** Wait in append_prepare() for buffer to become available + @param late whether the WRITE_BACKOFF flag had already been set + @param ex whether log_sys.latch is exclusively locked */ + ATTRIBUTE_COLD void archived_mmap_switch_prepare(bool late, bool ex) + noexcept; +#endif public: + /** Attempt to finish archived_mmap_switch_prepare(). + @return the current LSN in the new file + @retval 0 if no switch took place */ + ATTRIBUTE_COLD lsn_t archived_mmap_switch_complete() noexcept; + + /** Prepare for multi-file memory-mapped log recovery. */ + ATTRIBUTE_COLD void archived_mmap_switch_recovery_prepare() noexcept; + /** Finish archived_mmap_switch_recovery_prepare(). */ + ATTRIBUTE_COLD void archived_mmap_switch_recovery_complete() noexcept; + /** Try to switch archive log files on recovery. + @return whether the log file was switched */ + inline bool archived_switch_recovery() noexcept; + /** Undo archived_switch_recovery() + in recv_sys_t::find_checkpoint_archived() */ + inline void archived_switch_recovery_rewind() noexcept; + + /** How to write log */ + enum write { + /** normal writing !log_sys.is_mmap() */ + WRITE_NORMAL, + /** circular memory-mapped writing when log_sys.is_mmap() */ + CIRCULAR_MMAP, + /** memory-mapped log for log_sys.archive */ + ARCHIVED_MMAP + }; + + /** Get a name of a circular log file. + @param i log file number (0 to 101) + @return the path name of the log file */ + ATTRIBUTE_COLD static std::string get_circular_path(size_t i= 0); + + /** @return the name of the current log file */ + ATTRIBUTE_COLD std::string get_path() const; + + /** Append the archive log file base name to a string. + @param path directory name and separator + @param lsn first LSN stored in the file + @return path with the base file name appended */ + static ATTRIBUTE_COLD std::string &append_archive_name(std::string &path, + lsn_t lsn); + + /** Generate an archive log file name. + @param lsn first LSN stored in the file + @return archive log file name */ + ATTRIBUTE_COLD std::string get_archive_path(lsn_t lsn) const; + /** @return the current archive log file name */ + std::string get_archive_path() const { return get_archive_path(first_lsn); } + + /** @return the next archive log file name */ + ATTRIBUTE_COLD std::string get_next_archive_path() const; + /** Reserve space in the log buffer for appending data. - @tparam mmap log_sys.is_mmap() + @tparam mode how to write log @param size total length of the data to append(), in bytes @param ex whether log_sys.latch is exclusively locked @return the start LSN and the buffer position for append() */ - template + template std::pair append_prepare(size_t size, bool ex) noexcept; /** Append a string of bytes to the redo log. @@ -527,6 +644,18 @@ struct log_t /** @return the first LSN of the log file */ lsn_t get_first_lsn() const noexcept { return first_lsn; } + /** Set the recovered checkpoint. + @param lsn log sequence number of the checkpoint + @param end_lsn LSN passed to write_checkpoint() + @param number checkpoint number */ + void set_recovered_checkpoint(lsn_t lsn, lsn_t end_lsn, uint16_t number) + noexcept + { + next_checkpoint_lsn= lsn; + this->end_lsn= end_lsn; + next_checkpoint_no= number; + } + /** Determine the sequence bit at a log sequence number */ byte get_sequence_bit(lsn_t lsn) const noexcept { @@ -547,6 +676,9 @@ struct log_t @param end_lsn start LSN of the FILE_CHECKPOINT mini-transaction */ inline void write_checkpoint(lsn_t end_lsn) noexcept; + /** Wait for write_checkpoint() if necessary. */ + ATTRIBUTE_COLD void checkpoint_margin() noexcept; + /** Variations of write_buf() */ enum resizing_and_latch { /** skip latch.wr_unlock(); log resizing may or may not be in progress */ diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h index 95847965e1d4d..08b56cba57e5b 100644 --- a/storage/innobase/include/log0recv.h +++ b/storage/innobase/include/log0recv.h @@ -244,6 +244,11 @@ struct recv_sys_t lsn_t scanned_lsn; /** log sequence number at the end of the FILE_CHECKPOINT record, or 0 */ lsn_t file_checkpoint; + /** recovery start checkpoint */ + lsn_t recovery_start; + /** recovery point objective (a limit for scanned_lsn) */ + lsn_t rpo; + /** the time when progress was last reported */ time_t progress_time; @@ -401,6 +406,15 @@ struct recv_sys_t @return error code or DB_SUCCESS */ dberr_t find_checkpoint(); +private: + /** Find a checkpoint in an innodb_log_archive=ON file. + @param first_lsn the first LSN of the file + @param silent whether to silence error reporting + @return error code + @retval DB_SUCCESS if a suitable checkpoint was found */ + dberr_t find_checkpoint_archived(lsn_t first_lsn, bool silent); +public: + /** Register a redo log snippet for a page. @param it page iterator @param l redo log snippet diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h index aeeb90b8e6a83..adb310c031cf9 100644 --- a/storage/innobase/include/mtr0mtr.h +++ b/storage/innobase/include/mtr0mtr.h @@ -691,13 +691,15 @@ struct mtr_t { ATTRIBUTE_NOINLINE size_t crc32c() noexcept; /** Commit the mini-transaction log. - @tparam pmem log_sys.is_mmap() + @tparam mmap log_sys.is_mmap() @param mtr mini-transaction @param lsns {start_lsn,flush_ahead_lsn} */ - template + template static void commit_log(mtr_t *mtr, std::pair lsns) noexcept; - /** Release log_sys.latch. */ + /** Release log_sys.latch. + @tparam mmap log_sys.is_mmap() */ + template void commit_log_release() noexcept; /** Append the redo log records to the redo log buffer. @@ -705,11 +707,11 @@ struct mtr_t { std::pair do_write() noexcept; /** Append the redo log records to the redo log buffer. - @tparam mmap log_sys.is_mmap() + @tparam how how to write @param mtr mini-transaction @param len number of bytes to write @return {start_lsn,flush_ahead_lsn} */ - template static + template static std::pair finish_writer(mtr_t *mtr, size_t len); /** The applicable variant of commit_log() */ diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h index 493e48467a89e..010d028dc8798 100644 --- a/storage/innobase/include/trx0sys.h +++ b/storage/innobase/include/trx0sys.h @@ -414,7 +414,7 @@ class rw_trx_hash_t trx_state_eq(trx, TRX_STATE_PREPARED_RECOVERED) || (trx_state_eq(trx, TRX_STATE_ACTIVE) && (!srv_was_started || - srv_read_only_mode || + recv_sys.rpo || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO))); trx_free_at_shutdown(trx); } diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 266893588f5d9..1a2b271cd2f9d 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -357,7 +357,7 @@ static bool lock_rec_validate_page(const buf_block_t *block, bool latched) lock_sys_t lock_sys; /** Only created if !srv_read_only_mode. Protected by lock_sys.latch. */ -static FILE *lock_latest_err_file; +FILE *lock_latest_err_file; /*********************************************************************//** Reports that a transaction id is insensible, i.e., in the future. */ @@ -418,6 +418,7 @@ void lock_sys_t::create(ulint n_cells) { ut_ad(this == &lock_sys); ut_ad(!is_initialised()); + ut_ad(srv_read_only_mode == !lock_latest_err_file); m_initialised= true; @@ -435,12 +436,6 @@ void lock_sys_t::create(ulint n_cells) rec_hash.create(n_cells); prdt_hash.create(n_cells); prdt_page_hash.create(n_cells); - - if (!srv_read_only_mode) - { - lock_latest_err_file= os_file_create_tmpfile(); - ut_a(lock_latest_err_file); - } } #ifdef UNIV_PFS_RWLOCK @@ -494,12 +489,6 @@ void lock_sys_t::close() if (!m_initialised) return; - if (lock_latest_err_file) - { - my_fclose(lock_latest_err_file, MYF(MY_WME)); - lock_latest_err_file= nullptr; - } - rec_hash.free(); prdt_hash.free(); prdt_page_hash.free(); diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index 7d3942aa19350..c7af713b41f93 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -109,6 +109,8 @@ void log_t::create() noexcept #endif last_checkpoint_lsn= FIRST_LSN; + first_lsn= FIRST_LSN; + end_lsn= FIRST_LSN; log_capacity= 0; max_modified_age_async= 0; max_checkpoint_age= 0; @@ -152,7 +154,7 @@ dberr_t log_file_t::read(os_offset_t offset, span buf) noexcept ut_a(size < buf.size()); } - sql_print_error("InnoDB: pread(\"ib_logfile0\") returned %zd," + sql_print_error("InnoDB: pread(log) returned %zd," " operating system error %u", s, unsigned(IF_WIN(GetLastError(), errno))); return DB_IO_ERROR; @@ -180,7 +182,7 @@ void log_file_t::write(os_offset_t offset, span buf) noexcept ut_a(size < buf.size()); } - sql_print_error("[FATAL] InnoDB: pwrite(\"ib_logfile0\") returned %zd," + sql_print_error("[FATAL] InnoDB: pwrite(log) returned %zd," " operating system error %u", s, unsigned(IF_WIN(GetLastError(), errno))); abort(); @@ -191,15 +193,17 @@ void log_file_t::write(os_offset_t offset, span buf) noexcept # endif /** Attempt to memory map a file. -@param file log file handle -@param size file size +@param file log file handle +@param size file size +@param read_only whether the file is read-only @return pointer to memory mapping @retval MAP_FAILED if the memory cannot be mapped */ static void *log_mmap(os_file_t file, # ifdef HAVE_PMEM bool &is_pmem, /*!< whether the file is on pmem */ # endif - os_offset_t size) + os_offset_t size, + bool read_only) { #if SIZEOF_SIZE_T < 8 if (size != os_offset_t(size_t(size))) @@ -228,8 +232,8 @@ static void *log_mmap(os_file_t file, The mapping will always be read-only if innodb_read_only=ON or if mariadb-backup is running in any other mode than --prepare --export. */ - const bool read_only= - srv_read_only_mode || srv_operation >= SRV_OPERATION_BACKUP; + ut_ad(read_only || + (!srv_read_only_mode && srv_operation < SRV_OPERATION_BACKUP)); # ifdef _WIN32 void *ptr= MAP_FAILED; @@ -319,34 +323,41 @@ ATTRIBUTE_COLD static void log_file_message() noexcept static inline void log_file_message() noexcept {} #endif -bool log_t::attach(log_file_t file, os_offset_t size) noexcept +bool log_t::attach(log_file_t file, os_offset_t size, bool read_only) noexcept { - log= file; + ut_ad(!log.is_opened()); + ut_ad(archive || !resize_log.is_opened()); + ut_ad(archive || !buf); + ut_ad(archive || !resize_buf); + ut_ad(archive || !flush_buf); + ut_ad(archive || !resize_flush_buf); ut_ad(!size || size >= START_OFFSET + SIZE_OF_FILE_CHECKPOINT); + ut_ad(!writer); + file_size= size; - ut_ad(!buf); - ut_ad(!flush_buf); - ut_ad(!writer); if (size) { # ifdef HAVE_PMEM bool is_pmem; - void *ptr= ::log_mmap(log.m_file, is_pmem, size); + void *ptr= ::log_mmap(file.m_file, is_pmem, size, read_only); # else - void *ptr= ::log_mmap(log.m_file, size); + void *ptr= ::log_mmap(file.m_file, size, read_only); # endif if (ptr != MAP_FAILED) { + if (archive) + log= file; + else + file.close(); # ifdef HAVE_PMEM if (is_pmem) { - log.close(); log_buffered= false; log_maybe_unbuffered= true; - IF_WIN(,mprotect(ptr, size_t(size), PROT_READ)); } # endif + IF_WIN(,mprotect(ptr, size_t(size), PROT_READ)); buf= static_cast(ptr); writer_update(false); # ifdef HAVE_PMEM @@ -355,7 +366,19 @@ bool log_t::attach(log_file_t file, os_offset_t size) noexcept # endif goto func_exit; } + + if (buf) + { + ut_ad(archive); + log= file; + log_mmap= false; + return true; + } } + else + ut_ad(!archive); + + log= file; log_mmap= false; buf= static_cast(ut_malloc_dontdump(buf_size, PSI_INSTRUMENT_ME)); if (!buf) @@ -430,25 +453,31 @@ void log_t::create(lsn_t lsn) noexcept ut_ad(is_latest()); ut_ad(this == &log_sys); + next_checkpoint_no= archive ? (is_encrypted() ? 8 : 1) : 0; write_lsn_offset= 0; base_lsn.store(lsn, std::memory_order_relaxed); flushed_to_disk_lsn.store(lsn, std::memory_order_relaxed); first_lsn= lsn; + end_lsn= lsn; write_lsn= lsn; + if (!archived_lsn) + archived_lsn= lsn; - last_checkpoint_lsn= 0; + last_checkpoint_lsn= lsn; DBUG_PRINT("ib_log", ("write header " LSN_PF, lsn)); #ifdef HAVE_PMEM if (is_mmap()) { - ut_ad(!is_opened()); + ut_ad(is_opened() == archive); mprotect(buf, size_t(file_size), PROT_READ | PROT_WRITE); + buf_size= unsigned(std::min(capacity(), buf_size_max)); + if (archive) + goto archive_header; memset_aligned<4096>(buf, 0, 4096); - log_sys.header_write(buf, lsn, is_encrypted()); + header_write(buf, lsn, is_encrypted()); pmem_persist(buf, 512); - buf_size= unsigned(std::min(capacity(), buf_size_max)); } else #endif @@ -456,9 +485,18 @@ void log_t::create(lsn_t lsn) noexcept ut_ad(!is_mmap()); memset_aligned<4096>(flush_buf, 0, buf_size); memset_aligned<4096>(buf, 0, buf_size); - log_sys.header_write(buf, lsn, is_encrypted()); - log.write(0, {buf, 4096}); - memset_aligned<512>(buf, 0, 512); + if (!archive) + { + header_write(buf, lsn, is_encrypted()); + log.write(0, {buf, 4096}); + memset_aligned<512>(buf, 0, 512); + } + else +#ifdef HAVE_PMEM + archive_header: +#endif + if (is_encrypted()) + log_crypt_write_header(buf); } } @@ -558,10 +596,9 @@ void log_t::set_buffered(bool buffered) noexcept { if (const dberr_t err= log.close()) log_close_failed(err); - std::string path{get_log_file_path()}; log_buffered= buffered; bool success; - log.m_file= os_file_create_func(path.c_str(), + log.m_file= os_file_create_func(get_path().c_str(), OS_FILE_OPEN, OS_LOG_FILE, false, &success); ut_a(log.m_file != OS_FILE_CLOSED); @@ -581,14 +618,13 @@ void log_t::set_write_through(bool write_through) bool(log_write_through) != write_through) { os_file_close_func(log.m_file); - log.m_file= OS_FILE_CLOSED; - std::string path{get_log_file_path()}; + log= OS_FILE_CLOSED; log_write_through= write_through; bool success; - log.m_file= os_file_create_func(path.c_str(), + log.m_file= os_file_create_func(get_path().c_str(), OS_FILE_OPEN, OS_LOG_FILE, false, &success); - ut_a(log.m_file != OS_FILE_CLOSED); + ut_a(log.is_opened()); sql_print_information(log_write_through ? "InnoDB: Log writes write through" : "InnoDB: Log writes may be cached"); @@ -596,6 +632,221 @@ void log_t::set_write_through(bool write_through) log_resize_release(); } +/** Rewrite the log file header in set_archive() +@param archive the new value of innodb_log_archive */ +void log_t::header_rewrite(my_bool archive) noexcept +{ + ut_ad(!resize_buf); + ut_ad(this->archive == !archive); + + /* We will rewrite the log file header while the file + name is not ib_logfile0. That is, the archived log file + recovery will accept both the circular and the archived + format for the last file. */ + + byte* c= checkpoint_buf; + ut_ad(end_lsn >= first_lsn); + ut_ad(!archive || end_lsn <= first_lsn + ~0U); + ut_ad(format == (is_encrypted() ? FORMAT_ENC_11 : FORMAT_10_8)); +#ifdef HAVE_PMEM + if (!c) + { + ut_ad(is_mmap()); + if (!archive) + { + memset_aligned<512>(buf + 512, 0, START_OFFSET - 512); + c= buf + CHECKPOINT_1; + mach_write_to_8(my_assume_aligned<8>(c), next_checkpoint_lsn); + mach_write_to_8(my_assume_aligned<8>(c + 8), end_lsn); + mach_write_to_4(my_assume_aligned<4>(c + 60), my_crc32c(0, c, 60)); + pmem_persist(buf + 512, START_OFFSET - 512); + header_write(buf, first_lsn, is_encrypted()); + memset_aligned<512>(buf + 512, 0, CHECKPOINT_1 - 512); + pmem_persist(buf, CHECKPOINT_1); + } + else + { + next_checkpoint_no= uint16_t(7 * is_encrypted() + 2); + const uint32_t d= uint32_t(end_lsn - first_lsn + START_OFFSET); + if (!is_encrypted()) + { + mach_write_to_8(buf, d); + memset_aligned<8>(buf + 8, 0, 64 - 8); + } + else + { + log_crypt_write_header(buf); + mach_write_to_4(buf + 32, d); + memset_aligned<4>(buf + 36, 0, 64 - 36); + } + pmem_persist(buf, 64); + memset_aligned<64>(buf + 64, 0, START_OFFSET - 64); + pmem_persist(buf, START_OFFSET); + } + return; + } +#endif + memset_aligned<512>(c, 0, write_size); + + if (!archive) + { + mach_write_to_8(my_assume_aligned<8>(c), next_checkpoint_lsn); + mach_write_to_8(my_assume_aligned<8>(c + 8), end_lsn); + mach_write_to_4(my_assume_aligned<4>(c + 60), my_crc32c(0, c, 60)); + log.write(CHECKPOINT_1, {c, write_size}); + os_file_flush(log.m_file); + memset_aligned<512>(c, 0, write_size); + for (size_t offset= CHECKPOINT_1; (offset+= write_size) < START_OFFSET;) + log.write(offset, {c, write_size}); + header_write(c, first_lsn, is_encrypted()); + if (write_size > 512) + memset_aligned<512>(c + 512, 0, write_size - 512); + log.write(0, {c, write_size}); + os_file_flush(log.m_file); + memset_aligned<512>(c, 0, write_size); + for (size_t offset= 0; (offset+= write_size) < CHECKPOINT_1;) + log.write(offset, {c, write_size}); + } + else + { + next_checkpoint_no= uint16_t(7 * is_encrypted() + 2); + const uint32_t d= uint32_t(end_lsn - first_lsn + START_OFFSET); + if (!is_encrypted()) + mach_write_to_8(c, d); + else + { + log_crypt_write_header(c); + mach_write_to_4(c + 32, d); + } + log.write(0, {c, write_size}); + os_file_flush(log.m_file); + for (size_t offset= 0; (offset+= write_size) < START_OFFSET;) + log.write(offset, {field_ref_zero, write_size}); + } + + os_file_flush(log.m_file); +} + +/** SET GLOBAL innodb_log_archive +@param archive the new value of innodb_log_archive */ +void log_t::set_archive(my_bool archive) noexcept +{ + for (;;) + { + IF_WIN(log_resize_acquire(), latch.wr_lock(SRW_LOCK_CALL)); + if (resize_in_progress()) + { + my_printf_error(ER_WRONG_USAGE, + "SET GLOBAL innodb_log_file_size is in progress", + MYF(0)); + break; + } + if (archive == this->archive) + break; + if (archive && file_size > ARCHIVE_FILE_SIZE_MAX) + { + my_printf_error(ER_WRONG_USAGE, "innodb_log_file_size>4M", MYF(0)); + break; + } +#ifdef HAVE_PMEM + if (is_mmap()) + { + ut_ad(this->archive == log.is_opened()); + if (is_backoff()) + /* Prevent a race condition with append_prepare() */ + goto retry; + if (archive); + else if (resize_buf) + /* Wait for a call to archived_mmap_switch_complete() */ + goto retry; + else if (checkpoint_buf) + /* Wait for write_checkpoint() */ + goto retry; + else + log.close(); + } +#endif + else if (checkpoint_pending) + { + /* Prevent a race condition with write_checkpoint() */ +#ifdef HAVE_PMEM + retry: +#endif + IF_WIN(log_resize_release(), latch.wr_unlock()); + continue; + } + + ut_ad(!resize_buf); + ut_ad(!resize_log.is_opened()); // FIXME: wait for checkpoint? + + const lsn_t old_first_lsn{first_lsn}; + if (archive) + first_lsn+= (end_lsn - old_first_lsn) / capacity() * capacity(); + std::string normal_name{get_circular_path()}; + std::string arch_name{get_archive_path()}; + + const char *old_name= normal_name.c_str(); + const char *new_name= arch_name.c_str(); + if (!archive) + { + std::swap(old_name, new_name); + header_rewrite(archive); + } +#if defined HAVE_PMEM && !defined _WIN32 + else if (is_mmap()) + { + /* Open the file so that write_checkpoint() + will be able to flag it read-only */ + bool success; + log.m_file= + os_file_create_func(old_name, OS_FILE_OPEN, OS_LOG_FILE, + false, &success); + if (!log.is_opened()) + { + my_error(ER_ERROR_ON_READ, MYF(0), old_name, errno); + break; + } + } +#endif + +#ifdef _WIN32 + /* On Microsoft Windows, there must be no open file handles to a + file that is being renamed. */ + if (const dberr_t err= log.close()) + log_close_failed(err); +#endif + int fail= my_rename(old_name, new_name, MY_SYNC_DIR); +#ifdef _WIN32 + { + bool success; + log.m_file= os_file_create_func(fail ? old_name : new_name, + OS_FILE_OPEN, OS_LOG_FILE, + false, &success); + ut_a(log.m_file != OS_FILE_CLOSED); + } +#endif + if (fail) + { + my_error(ER_ERROR_ON_RENAME, MYF(0), old_name, new_name, my_errno); + first_lsn= old_first_lsn; + break; + } + + if (archive) + { + header_rewrite(archive); + archive_set_size(); + } + + archived_lsn= end_lsn; + this->archive= archive; + mtr_t::finisher_update(); + break; + } + + IF_WIN(log_resize_release(), latch.wr_unlock()); +} + /** Start resizing the log and release the exclusive latch. @param size requested new file_size @param thd the current thread identifier @@ -606,8 +857,8 @@ log_t::resize_start_status log_t::resize_start(os_offset_t size, void *thd) ut_ad(size >= 4U << 20); ut_ad(!(size & 4095)); ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); ut_ad(thd); - log_resize_acquire(); resize_start_status status; @@ -616,6 +867,19 @@ log_t::resize_start_status log_t::resize_start(os_offset_t size, void *thd) status= RESIZE_NO_CHANGE; else if (resize_in_progress()) status= RESIZE_IN_PROGRESS; + else if (archive) + { + if (size > ARCHIVE_FILE_SIZE_MAX) + status= RESIZE_FAILED; + else + { + status= RESIZE_NO_CHANGE; + /* When the current log becomes full and a new archivable log file + is being created, it will be of this size. At that point we will assign + file_size= resize_target, resize_target= 0; */ + resize_target= size; + } + } else { lsn_t start_lsn; @@ -624,7 +888,7 @@ log_t::resize_start_status log_t::resize_start(os_offset_t size, void *thd) ut_ad(!resize_buf); ut_ad(!resize_flush_buf); ut_ad(!resize_initiator); - std::string path{get_log_file_path("ib_logfile101")}; + const std::string path{get_circular_path(101)}; bool success; resize_initiator= thd; resize_lsn.store(1, std::memory_order_relaxed); @@ -645,7 +909,7 @@ log_t::resize_start_status log_t::resize_start(os_offset_t size, void *thd) else if (is_mmap()) { bool is_pmem{false}; - ptr= ::log_mmap(resize_log.m_file, is_pmem, size); + ptr= ::log_mmap(resize_log.m_file, is_pmem, size, false); if (ptr == MAP_FAILED) goto alloc_fail; @@ -748,8 +1012,8 @@ void log_t::resize_abort(void *thd) noexcept resize_target= 0; resize_lsn.store(0, std::memory_order_relaxed); resize_initiator= nullptr; - std::string path{get_log_file_path("ib_logfile101")}; - IF_WIN(DeleteFile(path.c_str()), unlink(path.c_str())); + IF_WIN(DeleteFile(get_circular_path(101).c_str()), + unlink(get_circular_path(101).c_str())); writer_update(false); } @@ -757,10 +1021,13 @@ void log_t::resize_abort(void *thd) noexcept } /** Write an aligned buffer to ib_logfile0. -@param buf buffer to be written -@param length length of data to be written -@param offset log file offset */ -static void log_write_buf(const byte *buf, size_t length, lsn_t offset) +@param max_length the maximum length that can be written to the file +@param buf buffer to be written +@param length length of data to be written +@param offset log file offset */ +static void log_write_buf(lsn_t max_length, + const byte *buf, size_t length, lsn_t offset) + noexcept { ut_ad(write_lock.is_owner()); ut_ad(!recv_no_log_write); @@ -769,21 +1036,99 @@ static void log_write_buf(const byte *buf, size_t length, lsn_t offset) ut_ad(!(length & block_size_1)); ut_ad(!(size_t(buf) & block_size_1)); ut_ad(length); + ut_ad(max_length == log_sys.file_size - offset); - const lsn_t maximum_write_length{log_sys.file_size - offset}; - ut_ad(maximum_write_length <= log_sys.file_size - log_sys.START_OFFSET); - - if (UNIV_UNLIKELY(length > maximum_write_length)) + if (UNIV_UNLIKELY(length > max_length)) { - log_sys.log.write(offset, {buf, size_t(maximum_write_length)}); - length-= size_t(maximum_write_length); - buf+= size_t(maximum_write_length); + ut_ad(!log_sys.archive); + log_sys.log.write(offset, {buf, size_t(max_length)}); + length-= size_t(max_length); + buf+= size_t(max_length); ut_ad(log_sys.START_OFFSET + length < offset); offset= log_sys.START_OFFSET; } log_sys.log.write(offset, {buf, length}); } +ATTRIBUTE_COLD +std::string &log_t::append_archive_name(std::string &path, lsn_t lsn) +{ + path.append("ib_"); + for (int i= 16; i--; lsn<<= 4) + path.push_back("0123456789abcdef"[lsn >> 60]); + path.append(".log"); + return path; +} + +ATTRIBUTE_COLD std::string log_t::get_archive_path(lsn_t lsn) const +{ + size_t size= strlen(srv_log_group_home_dir); + retry: + switch (srv_log_group_home_dir[size - 1]) { +#ifdef _WIN32 + case '\\': +#endif + case '/': + if (size <= 1) + break; + size--; + goto retry; + } + if (size == 1 && *srv_log_group_home_dir == '.') + size= 0; + std::string path; + path.reserve(size + sizeof "/ib_0000000000000000.log"); + path.assign(srv_log_group_home_dir, size); + if (size) + path.push_back('/'); + return append_archive_name(path, lsn); +} + +ATTRIBUTE_COLD std::string log_t::get_next_archive_path() const +{ return get_archive_path(first_lsn + capacity()); } + +ATTRIBUTE_COLD void log_t::archive_new_write(const byte *buf, size_t length, + lsn_t offset) noexcept +{ + ut_ad(latch_have_wr()); + ut_ad(write_lock.is_owner()); + ut_ad(archive); + ut_ad(length >= file_size - offset); + ut_ad(!resize_log.is_opened()); + ut_ad(!resize_buf); + ut_ad(!resize_in_progress()); + ut_ad(resize_target >= 4U << 20); + ut_ad(is_latest()); + + const size_t first{size_t(file_size - offset)}; + log.write(offset, {buf, first}); + length-= first; + buf+= first; + + std::string path{get_next_archive_path()}; + bool success; + pfs_os_file_t file= + os_file_create_func(path.c_str(), OS_FILE_CREATE, OS_LOG_FILE, + false, &success); + ut_ad(success == (file != OS_FILE_CLOSED)); + if (file != OS_FILE_CLOSED) + { + if (os_file_set_size(path.c_str(), file, resize_target)) + { + resize_log= log; + log.m_file= file; + if (length) + log.write(START_OFFSET, {buf, length}); + return; + } + os_file_close(file); + IF_WIN(DeleteFile(path.c_str()), unlink(path.c_str())); + } + sql_print_error("[FATAL] InnoDB: Failed to create %s of %" PRIu64 + " bytes", path.c_str(), resize_target); + abort(); +} + /** Invoke commit_checkpoint_notify_ha() to notify that outstanding log writes have been completed. */ void log_flush_notify(lsn_t flush_lsn); @@ -910,16 +1255,124 @@ static size_t log_pad(lsn_t lsn, size_t pad, byte *begin, byte *extra) #endif #ifdef HAVE_PMEM +ATTRIBUTE_COLD +void log_t::archived_mmap_switch_prepare(bool late, bool ex) noexcept +{ + ut_ad(archive); + ut_ad(is_mmap()); + ut_ad(log.is_opened()); + ut_ad(!resize_log.is_opened()); + ut_ad(!resize_buf); + ut_ad(!checkpoint_buf); + ut_ad(!resize_in_progress()); + ut_ad(resize_target >= 4U << 20); + ut_ad(is_latest()); + + if (UNIV_LIKELY(!ex)) + { + latch.rd_unlock(); + if (!late) + { + /* Wait for all threads to back off. */ + latch.wr_lock(SRW_LOCK_CALL); + goto got_ex; + } + + const auto delay= my_cpu_relax_multiplier / 4 * srv_spin_wait_delay; + const auto rounds= srv_n_spin_wait_rounds; + + for (;;) + { + HMT_low(); + for (auto r= rounds + 1; r--; ) + { + if (write_lsn_offset.load(std::memory_order_relaxed) & WRITE_BACKOFF) + { + for (auto d= delay; d--; ) + MY_RELAX_CPU(); + } + else + { + HMT_medium(); + goto done; + } + } + HMT_medium(); + std::this_thread::sleep_for(std::chrono::microseconds(100)); + } + } + else + { + got_ex: + const uint64_t l= write_lsn_offset.load(std::memory_order_relaxed); + const lsn_t lsn= base_lsn.load(std::memory_order_relaxed) + + (l & (WRITE_BACKOFF - 1)); + waits++; + ut_ad(archive); + ut_ad(!resize_buf); + ut_ad(!resize_in_progress()); + ut_ad(resize_target >= 4U << 20); + ut_ad(is_latest()); + ut_ad(log.is_opened()); + ut_ad(!resize_log.is_opened()); + + do + { + std::string path{get_next_archive_path()}; + bool success; + os_file_t file= + os_file_create_func(path.c_str(), OS_FILE_CREATE, OS_LOG_FILE, + false, &success); + ut_ad(success == (file != OS_FILE_CLOSED)); + if (file != OS_FILE_CLOSED) + { + if (os_file_set_size(path.c_str(), file, resize_target)) + { + bool is_pmem{false}; + resize_buf= static_cast(::log_mmap(file, is_pmem, + resize_target, false)); + if (resize_buf != MAP_FAILED) + { + /* Will be closed in write_checkpoint() */ + resize_log= log; + log= file; + continue; + } + resize_buf= nullptr; + os_file_close(file); + } + } + + IF_WIN(DeleteFile(path.c_str()), unlink(path.c_str())); + sql_print_error("[FATAL] InnoDB: Failed to create and map %s of %" PRIu64 + " bytes", path.c_str(), resize_target); + abort(); + } + while (false); + + ut_ad(lsn - get_flushed_lsn(std::memory_order_relaxed) < capacity()); + persist(lsn); + /* Above we cleared the WRITE_BACKOFF flag, + which our caller will recheck. */ + if (ex) + return; + latch.wr_unlock(); + } + +done: + latch.rd_lock(SRW_LOCK_CALL); +} + void log_t::persist(lsn_t lsn) noexcept { - ut_ad(!is_opened()); ut_ad(!write_lock.is_owner()); ut_ad(!flush_lock.is_owner()); ut_ad(latch_have_wr()); + ut_ad(is_opened() == archive); lsn_t old= flushed_to_disk_lsn.load(std::memory_order_relaxed); - if (old >= lsn) + if (old > lsn) return; const size_t start(calc_lsn_offset(old)); @@ -1013,6 +1466,7 @@ lsn_t log_t::write_buf() noexcept } else { + ut_ad(!recv_sys.rpo); ut_ad(write_lock.is_owner()); ut_ad(!recv_no_log_write); write_lock.set_pending(lsn); @@ -1091,18 +1545,31 @@ lsn_t log_t::write_buf() noexcept ut_ad(base + (write_lsn_offset & (WRITE_TO_BUF - 1)) == lsn); write_to_log++; + DBUG_PRINT("ib_log", ("write " LSN_PF " to " LSN_PF " at " LSN_PF, + write_lsn, lsn, offset)); + + const lsn_t max_length{file_size - offset}; + ut_ad(max_length <= capacity()); + if (UNIV_UNLIKELY(length > max_length)) + { + if (resizing != RESIZING && archive) + { + archive_new_write(write_buf, length, offset); + if (resizing != RETAIN_LATCH) + latch.wr_unlock(); + goto written; + } + } if (resizing != RETAIN_LATCH) latch.wr_unlock(); - DBUG_PRINT("ib_log", ("write " LSN_PF " to " LSN_PF " at " LSN_PF, - write_lsn, lsn, offset)); - /* Do the write to the log file */ - log_write_buf(write_buf, length, offset); + log_write_buf(max_length, write_buf, length, offset); if (UNIV_LIKELY_NULL(re_write_buf)) resize_write_buf(re_write_buf, length); + written: write_lsn= lsn; if (UNIV_UNLIKELY(srv_shutdown_state > SRV_SHUTDOWN_INITIATED)) @@ -1248,38 +1715,33 @@ void log_t::clear_mmap() noexcept { if (!is_mmap() || high_level_read_only) return; -#ifdef HAVE_PMEM - if (!is_opened()) - { - ut_d(latch.wr_lock(SRW_LOCK_CALL)); - ut_ad(!resize_in_progress()); - ut_ad(get_lsn() == get_flushed_lsn(std::memory_order_relaxed)); - ut_d(latch.wr_unlock()); - return; - } -#endif log_resize_acquire(); ut_ad(!resize_in_progress()); - ut_ad(write_lsn == get_lsn()); - ut_ad(write_lsn == get_flushed_lsn(std::memory_order_relaxed)); - - if (buf) /* this may be invoked while creating a new database */ + ut_ad(get_lsn() == get_flushed_lsn(std::memory_order_relaxed)); +#ifdef HAVE_PMEM + if (is_opened() && !archive) +#endif { - alignas(16) byte log_block[4096]; - const size_t bs{write_size}; + ut_ad(write_lsn == get_lsn()); + + if (buf) /* this may be invoked while creating a new database */ { - const size_t bf= - size_t(write_lsn - base_lsn.load(std::memory_order_relaxed)); - memcpy_aligned<16>(log_block, buf + (bf & ~(bs - 1)), bs); - } + alignas(16) byte log_block[4096]; + const size_t bs{write_size}; + { + const size_t bf= + size_t(write_lsn - base_lsn.load(std::memory_order_relaxed)); + memcpy_aligned<16>(log_block, buf + (bf & ~(bs - 1)), bs); + } - close_file(false); - log_mmap= false; - ut_a(attach(log, file_size)); - ut_ad(!is_mmap()); + close_file(false); + log_mmap= false; + ut_a(attach(log, file_size, false)); + ut_ad(!is_mmap()); - memcpy_aligned<16>(buf, log_block, bs); + memcpy_aligned<16>(buf, log_block, bs); + } } log_resize_release(); } @@ -1288,6 +1750,7 @@ void log_t::clear_mmap() noexcept ATTRIBUTE_COLD void log_write_and_flush() noexcept { ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); #ifdef HAVE_PMEM if (log_sys.is_mmap()) log_sys.persist(log_sys.get_lsn()); @@ -1300,45 +1763,46 @@ ATTRIBUTE_COLD void log_write_and_flush() noexcept } } -/****************************************************************//** -Tries to establish a big enough margin of free space in the log, such -that a new log entry can be catenated without an immediate need for a -checkpoint. NOTE: this function may only be called if the calling thread -owns no synchronization objects! */ -ATTRIBUTE_COLD static void log_checkpoint_margin() noexcept +ATTRIBUTE_COLD void log_t::checkpoint_margin() noexcept { - while (log_sys.check_for_checkpoint()) + ut_ad(this == &log_sys); + ut_ad(!recv_no_log_write); + + while (check_for_checkpoint()) { - log_sys.latch.wr_lock(SRW_LOCK_CALL); + latch.wr_lock(SRW_LOCK_CALL); ut_ad(!recv_no_log_write); - if (!log_sys.check_for_checkpoint()) + if (!check_for_checkpoint()) { func_exit: - log_sys.latch.wr_unlock(); + latch.wr_unlock(); return; } - const lsn_t lsn= log_sys.get_lsn(); - const lsn_t max_age= log_sys.max_checkpoint_age; - const lsn_t age= lsn_t(lsn - log_sys.last_checkpoint_lsn); + const lsn_t last{last_checkpoint_lsn}, max_age{max_checkpoint_age}; + lsn_t lsn{get_lsn()}; - if (age <= max_age) + if (last < first_lsn) + lsn= first_lsn; + else { + if (lsn_t(lsn - last) <= max_age) + { #ifndef DBUG_OFF - skip_checkpoint: + skip_checkpoint: #endif - log_sys.set_check_for_checkpoint(false); - goto func_exit; + set_check_for_checkpoint(false); + goto func_exit; + } + DBUG_EXECUTE_IF("ib_log_checkpoint_avoid_hard", goto skip_checkpoint;); + lsn-= max_age; } - DBUG_EXECUTE_IF("ib_log_checkpoint_avoid_hard", goto skip_checkpoint;); - log_sys.latch.wr_unlock(); + latch.wr_unlock(); /* We must wait to prevent the tail of the log overwriting the head. */ - buf_flush_wait_flushed(lsn - max_age); - /* Sleep to avoid a thundering herd */ - std::this_thread::sleep_for(std::chrono::milliseconds(10)); + buf_flush_wait_flushed(lsn); } } @@ -1349,10 +1813,7 @@ void log_free_check() noexcept { ut_ad(!lock_sys.is_holder()); if (log_sys.check_for_checkpoint()) - { - ut_ad(!recv_no_log_write); - log_checkpoint_margin(); - } + log_sys.checkpoint_margin(); } #ifdef __linux__ @@ -1367,6 +1828,8 @@ ATTRIBUTE_COLD void logs_empty_and_mark_files_at_shutdown() noexcept lsn_t lsn; ulint count = 0; + ut_ad(!srv_read_only_mode || recv_sys.rpo); + ib::info() << "Starting shutdown..."; /* Wait until the master task and all other operations are idle: our @@ -1381,7 +1844,7 @@ ATTRIBUTE_COLD void logs_empty_and_mark_files_at_shutdown() noexcept srv_shutdown_state = SRV_SHUTDOWN_CLEANUP; if (srv_buffer_pool_dump_at_shutdown && - !srv_read_only_mode && srv_fast_shutdown < 2) { + !recv_sys.rpo && srv_fast_shutdown < 2) { buf_dump_start(); } srv_monitor_timer.reset(); @@ -1402,7 +1865,7 @@ ATTRIBUTE_COLD void logs_empty_and_mark_files_at_shutdown() noexcept shutdown, because the InnoDB layer may have committed or prepared transactions and we don't want to lose them. */ - if (ulint total_trx = srv_was_started && !srv_read_only_mode + if (ulint total_trx = srv_was_started && !recv_sys.rpo && srv_force_recovery < SRV_FORCE_NO_TRX_UNDO ? trx_sys.any_active_transactions() : 0) { @@ -1426,7 +1889,7 @@ ATTRIBUTE_COLD void logs_empty_and_mark_files_at_shutdown() noexcept ? "rollback of recovered transactions" : nullptr; if (thread_name) { - ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); wait_suspend_loop: service_manager_extend_timeout( COUNT_INTERVAL * CHECK_INTERVAL/1000000 * 2, @@ -1462,8 +1925,8 @@ ATTRIBUTE_COLD void logs_empty_and_mark_files_at_shutdown() noexcept buf_flush_buffer_pool(); } - if (srv_fast_shutdown == 2 || !srv_was_started) { - if (!srv_read_only_mode && srv_was_started) { + if (srv_fast_shutdown == 2 || !srv_was_started || recv_sys.rpo) { + if (!recv_sys.rpo && srv_was_started) { sql_print_information( "InnoDB: Executing innodb_fast_shutdown=2." " Next startup will execute crash recovery!"); @@ -1480,9 +1943,7 @@ ATTRIBUTE_COLD void logs_empty_and_mark_files_at_shutdown() noexcept srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE; return; - } - - if (!srv_read_only_mode) { + } else { service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL, "ensuring dirty buffer pool are written to log"); log_make_checkpoint(); @@ -1504,8 +1965,6 @@ ATTRIBUTE_COLD void logs_empty_and_mark_files_at_shutdown() noexcept if (lsn_changed) { goto loop; } - } else { - lsn = recv_sys.lsn; } srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE; @@ -1580,10 +2039,10 @@ void log_t::close() recv_sys.close(); } -std::string get_log_file_path(const char *filename) +ATTRIBUTE_COLD std::string log_t::get_circular_path(size_t i) { - const size_t size= strlen(srv_log_group_home_dir) + /* path separator */ 1 + - strlen(filename) + /* longest suffix */ 3; + ut_ad(i <= 101); + const size_t size= strlen(srv_log_group_home_dir) + sizeof "/ib_logfile101"; std::string path; path.reserve(size); path.assign(srv_log_group_home_dir); @@ -1597,7 +2056,10 @@ std::string get_log_file_path(const char *filename) default: path.push_back('/'); } - path.append(filename); + return path.append("ib_logfile").append(std::to_string(i)); +} - return path; +ATTRIBUTE_COLD std::string log_t::get_path() const +{ + return archive ? get_archive_path() : get_circular_path(); } diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index e213231b210e2..fb213b410ad49 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -32,6 +32,8 @@ Created 9/20/1997 Heikki Tuuri #include "log0recv.h" +#define LOG_ARCHIVE_NAME "ib_%016" PRIx64 ".log" + #ifdef HAVE_MY_AES_H #include #endif @@ -57,6 +59,8 @@ Created 9/20/1997 Heikki Tuuri /** The recovery system */ recv_sys_t recv_sys; +/** 0 or the first LSN that would conflict with innodb_log_recovery_target */ +static lsn_t recv_sys_rpo_exceeded; /** TRUE when recv_init_crash_recovery() has been called. */ bool recv_needed_recovery; #ifdef UNIV_DEBUG @@ -1688,10 +1692,42 @@ static dberr_t recv_log_recover_10_5(lsn_t lsn_offset) return DB_SUCCESS; } +/** @return if the specified innodb_log_recovery_target is being violated */ +static bool recv_sys_invalid_rpo(lsn_t lsn) noexcept +{ + if (!recv_sys.rpo || recv_sys.rpo >= lsn) + return false; + sql_print_error("InnoDB: cannot fulfill innodb_log_recovery_target=%" + PRIu64 "<%" PRIu64, recv_sys.rpo, lsn); + log_sys.set_recovered_lsn(lsn); + return true; +} + +inline void log_t::stash_archive_file() noexcept +{ + ut_ad(log.is_opened()); + if (resize_log.is_opened()) + { + ut_ad(!is_mmap() == !resize_buf); + if (resize_buf) + my_munmap(resize_buf, size_t(resize_target)); + resize_log.close(); + } + if (is_mmap()) + { + resize_buf= buf; + buf= nullptr; + } + std::swap(log, resize_log); + resize_target= file_size; + writer= nullptr; +} + dberr_t recv_sys_t::find_checkpoint() { - bool wrong_size= false; byte *buf; + lsn_t first_lsn= 0; + bool read_only{srv_read_only_mode || srv_operation >= SRV_OPERATION_BACKUP}; ut_ad(pages.empty()); pages_it= pages.end(); @@ -1699,14 +1735,195 @@ dberr_t recv_sys_t::find_checkpoint() if (files.empty()) { file_checkpoint= 0; - std::string path{get_log_file_path()}; + int archive= log_sys.archive; + retry: + std::string path{log_sys.get_circular_path()}; bool success; - os_file_t file{os_file_create_func(path.c_str(), - OS_FILE_OPEN, - OS_LOG_FILE, - srv_read_only_mode, &success)}; - if (file == OS_FILE_CLOSED) + os_file_t file{os_file_create_func(path.c_str(), archive < 0 + ? OS_FILE_OPEN : OS_FILE_OPEN_SILENT, + OS_LOG_FILE, read_only, &success)}; + if (file != OS_FILE_CLOSED) + { + if (archive > 0) + { + sql_print_error("InnoDB: innodb_log_archive=ON but %s exists", + path.c_str()); + return DB_ERROR; + } + } + else if (archive < 0 || srv_operation != SRV_OPERATION_NORMAL) return DB_ERROR; + else + { + path.reserve(strlen(srv_log_group_home_dir) + + sizeof "/ib_0000000000000000.log"); + if (!tmp_buf) + { + tmp_buf= static_cast + (ut_malloc_dontdump(tmp_buf_size, PSI_INSTRUMENT_ME)); + if (!tmp_buf) + return DB_OUT_OF_MEMORY; + } +#ifdef _WIN32 + WIN32_FIND_DATAA entry; + path.assign(srv_log_group_home_dir); + switch (path.back()) { + case '\\': case '/': + break; + default: + path.push_back('/'); + } + path.append("ib_????????????????.log"); + HANDLE d= FindFirstFileA(path.c_str(), &entry); + if (d != INVALID_HANDLE_VALUE) + goto readdir; +#else + DIR *d= opendir(srv_log_group_home_dir); + if (d) + goto readdir; +#endif + no_archive_found: + if (archive) + sql_print_error("InnoDB: innodb_log_archive files not found in '%s'", + srv_log_group_home_dir); + if (archive) + return DB_ERROR; + archive= -1; + goto retry; + + readdir: + struct log { lsn_t end; bool read_only; }; + std::map logs; +#ifdef _WIN32 + do + { + if (entry.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) + continue; + lsn_t lsn; + int n{0}; + const char *fn{entry.cFileName}; + if (1 != sscanf(fn, LOG_ARCHIVE_NAME "%n", &lsn, &n) || fn[n] || + lsn < log_t::FIRST_LSN) + continue; + LARGE_INTEGER filesize; + filesize.LowPart= entry.nFileSizeLow; + filesize.HighPart= entry.nFileSizeHigh; + if ((filesize.LowPart & 4095) || + lsn_t(filesize.QuadPart) > log_t::ARCHIVE_FILE_SIZE_MAX || + lsn_t(filesize.QuadPart) < log_t::FILE_SIZE_MIN) + { + sql_print_warning("InnoDB: ignoring %s", fn); + continue; + } + logs.emplace(lsn, + log{lsn - log_t::START_OFFSET + filesize.QuadPart, + bool(entry.dwFileAttributes & + FILE_ATTRIBUTE_READONLY)}); + } + while (FindNextFile(d, &entry)); + FindClose(d); +#else + while (dirent *e= readdir(d)) + { + lsn_t lsn; + int n{0}; + const char *fn{e->d_name}; + if (1 != sscanf(fn, LOG_ARCHIVE_NAME "%n", &lsn, &n) || fn[n] || + lsn < log_t::FIRST_LSN) + continue; + path.assign(srv_log_group_home_dir); + path.push_back('/'); + struct stat st; + if (stat(log_sys.append_archive_name(path, lsn).c_str(), &st) || + (st.st_size & 4095) || + lsn_t(st.st_size) > log_t::ARCHIVE_FILE_SIZE_MAX || + lsn_t(st.st_size) < log_t::FILE_SIZE_MIN) + { + sql_print_warning("InnoDB: ignoring %s", path.c_str()); + continue; + } + logs.emplace(lsn, + log{lsn - log_t::START_OFFSET + st.st_size, + !(st.st_mode & 0400)}); + } + closedir(d); +#endif + + const auto end= logs.cend(); + auto i= logs.cbegin(), start= i, found_recovery_start= end; + int subsequent= 0; + if (i == end) + goto no_archive_found; + log_sys.format= srv_encrypt_log + ? log_t::FORMAT_ENC_11 : log_t::FORMAT_10_8; + log_sys.archive= true; + for (;;) + { + const lsn_t first{i->first}, last{i->second.end}; + if (log_sys.archived_lsn > first && log_sys.archived_lsn < last) + { + sql_print_error("InnoDB: Invalid innodb_log_archive_start=" LSN_PF); + return DB_ERROR; + } + if (recovery_start >= first && recovery_start < last) + found_recovery_start= i; + const auto prev= i; + if (++i == end) + { + if (!recovery_start) + subsequent= 0, start= prev; + break; + } + if (last == i->first) + subsequent++; + else + subsequent=0, start= i; + } + + if (recovery_start && found_recovery_start == end) + { + sql_print_error("InnoDB: No matching file found for" + " innodb_log_recovery_start=" LSN_PF, recovery_start); + return DB_ERROR; + } + + for (i= logs.cbegin();; start--, subsequent++) + { + if (recovery_start && start != found_recovery_start) + continue; + path.assign(srv_log_group_home_dir); + switch (path.back()) { +#ifdef _WIN32 + case '\\': +#endif + case '/': + break; + default: + path.push_back('/'); + } + read_only= subsequent || start->second.read_only; + const bool open_read_only{read_only || rpo || srv_read_only_mode}; + file= + os_file_create_func(log_sys.append_archive_name(path, start->first). + c_str(), OS_FILE_OPEN, OS_LOG_FILE, + open_read_only, &success); + if (file == OS_FILE_CLOSED) + return DB_ERROR; + if (!log_sys.attach(file, start->second.end - start->first + + log_t::START_OFFSET, open_read_only)) + { + os_file_close(file); + return DB_ERROR; + } + const dberr_t err= + find_checkpoint_archived(start->first, !read_only && i != start); + if (err == DB_SUCCESS || read_only || recovery_start || i == start) + return err; + log_sys.stash_archive_file(); + } + } + + ut_ad(!log_sys.archive); const os_offset_t size{os_file_get_size(file)}; if (!size) { @@ -1716,21 +1933,21 @@ dberr_t recv_sys_t::find_checkpoint() else if (size < log_t::START_OFFSET + SIZE_OF_FILE_CHECKPOINT) { too_small: - sql_print_error("InnoDB: File %.*s is too small", - int(path.size()), path.data()); + sql_print_error("InnoDB: File %s is too small", path.c_str()); err_exit: os_file_close(file); return DB_ERROR; } - else if (!log_sys.attach(file, size)) + else if (!log_sys.attach(file, size, read_only)) goto err_exit; else file= OS_FILE_CLOSED; - recv_sys.files.emplace_back(file); + files.emplace_back(file); + for (int i= 1; i < 101; i++) { - path= get_log_file_path(LOG_FILE_NAME_PREFIX).append(std::to_string(i)); + path= log_sys.get_circular_path(i); file= os_file_create_func(path.c_str(), OS_FILE_OPEN_SILENT, OS_LOG_FILE, true, &success); @@ -1742,14 +1959,14 @@ dberr_t recv_sys_t::find_checkpoint() sql_print_error("InnoDB: Log file %.*s is of different size " UINT64PF " bytes than other log files " UINT64PF " bytes!", int(path.size()), path.data(), sz, size); - wrong_size= true; + first_lsn= LSN_MAX; } - recv_sys.files.emplace_back(file); + files.emplace_back(file); } if (!size) { - if (wrong_size) + if (first_lsn == LSN_MAX) return DB_CORRUPTION; lsn= log_sys.next_checkpoint_lsn; log_sys.format= log_t::FORMAT_3_23; @@ -1758,18 +1975,21 @@ dberr_t recv_sys_t::find_checkpoint() } else ut_ad(srv_operation == SRV_OPERATION_BACKUP); + + ut_ad(!log_sys.archive); log_sys.next_checkpoint_lsn= 0; lsn= 0; buf= my_assume_aligned<4096>(log_sys.buf); if (!log_sys.is_mmap()) if (dberr_t err= log_sys.log.read(0, {buf, log_sys.START_OFFSET})) return err; + /* Check the header page checksum. There was no checksum in the first redo log format (version 0). */ log_sys.format= mach_read_from_4(buf + LOG_HEADER_FORMAT); if (log_sys.format == log_t::FORMAT_3_23) { - if (wrong_size) + if (first_lsn == LSN_MAX) return DB_CORRUPTION; if (dberr_t err= recv_log_recover_pre_10_2()) return err; @@ -1779,6 +1999,12 @@ dberr_t recv_sys_t::find_checkpoint() log_sys.last_checkpoint_lsn= log_sys.next_checkpoint_lsn; log_sys.set_recovered_lsn(log_sys.next_checkpoint_lsn); lsn= file_checkpoint= log_sys.next_checkpoint_lsn; + if (rpo && rpo != lsn) + { + sql_print_error("InnoDB: cannot fulfill innodb_log_recovery_target=%" + PRIu64 "!=%" PRIu64, rpo, lsn); + return DB_CORRUPTION; + } if (UNIV_LIKELY(lsn != 0)) scanned_lsn= lsn; log_sys.next_checkpoint_no= 0; @@ -1791,7 +2017,7 @@ dberr_t recv_sys_t::find_checkpoint() return DB_CORRUPTION; } - const lsn_t first_lsn{mach_read_from_8(buf + LOG_HEADER_START_LSN)}; + first_lsn= mach_read_from_8(buf + LOG_HEADER_START_LSN); log_sys.set_first_lsn(first_lsn); char creator[LOG_HEADER_CREATOR_END - LOG_HEADER_CREATOR + 1]; memcpy(creator, buf + LOG_HEADER_CREATOR, sizeof creator); @@ -1847,14 +2073,15 @@ dberr_t recv_sys_t::find_checkpoint() } if (checkpoint_lsn >= log_sys.next_checkpoint_lsn) - { - log_sys.next_checkpoint_lsn= checkpoint_lsn; - log_sys.next_checkpoint_no= field == log_t::CHECKPOINT_1; - lsn= end_lsn; - } + log_sys.set_recovered_checkpoint(checkpoint_lsn, lsn= end_lsn, + field == log_t::CHECKPOINT_1); } if (!log_sys.next_checkpoint_lsn) goto got_no_checkpoint; + else if (!log_sys.archived_lsn) + log_sys.archived_lsn= lsn; + if (recv_sys_invalid_rpo(lsn)) + return DB_READ_ONLY; if (!memcmp(creator, "Backup ", 7)) srv_start_after_restore= true; @@ -1921,7 +2148,7 @@ dberr_t recv_sys_t::find_checkpoint() return DB_ERROR; } - if (wrong_size) + if (first_lsn == LSN_MAX) return DB_CORRUPTION; if (dberr_t err= recv_log_recover_10_5(lsn_offset)) @@ -2217,10 +2444,16 @@ struct recv_buf constexpr bool operator==(const recv_buf other) const { return ptr == other.ptr; } - static const byte *end() { return &log_sys.buf[recv_sys.len]; } + static const byte *end() noexcept { return &log_sys.buf[recv_sys.len]; } + + size_t get_offset() const noexcept + { + size_t offset= size_t(ptr - log_sys.buf); + ut_ad(offset < recv_sys.len); + return offset; + } - static constexpr bool may_wrap() { return false; } - static constexpr bool is_wrapped(const recv_buf&) { return false; } + static constexpr bool is_split(const recv_buf&) noexcept { return false; } bool is_eof(size_t len= 0) const noexcept { return ptr + len >= end(); } byte operator*() const noexcept { return *ptr; } @@ -2241,6 +2474,9 @@ struct recv_buf { return my_crc32c(crc, start.ptr, ptr - start.ptr); } + + void *memcpy(void *buf, size_t size) const noexcept + { return ::memcpy(buf, ptr, size); } }; /** Ring buffer wrapper for log_sys.buf[]; recv_sys.len == log_sys.file_size */ @@ -2248,8 +2484,20 @@ struct recv_ring : public recv_buf { constexpr recv_ring(const byte *ptr) : recv_buf(ptr) {} - static constexpr bool may_wrap() { return true; } - bool is_wrapped(const recv_ring &end) const { return end.ptr < ptr; } + size_t get_offset() const noexcept + { + size_t offset= size_t(ptr - log_sys.buf); + if (offset == log_sys.file_size) + { + ut_ad(log_sys.is_mmap()); + offset= log_sys.START_OFFSET; + } + else + ut_ad(offset < log_sys.file_size); + return offset; + } + + bool is_split(const recv_ring &end) const noexcept { return end.ptr < ptr; } constexpr static bool is_eof() { return false; } constexpr static bool is_eof(size_t) { return false; } @@ -2319,6 +2567,131 @@ struct recv_ring : public recv_buf &log_sys.buf[log_sys.START_OFFSET], ptr - &log_sys.buf[log_sys.START_OFFSET]); } + + void *memcpy(void *buf, size_t size) const noexcept + { + size_t wrap_size(end() - ptr); + ut_ad(wrap_size < size); + ::memcpy(buf, ptr, wrap_size); + ::memcpy(static_cast(buf) + wrap_size, + &log_sys.buf[log_sys.START_OFFSET], size - wrap_size); + return buf; + } +}; + +/** Buffer wrapper for memory-mapped log_sys.archive, +with the capability to warp from log_sys.buf to log_sys.resize_buf */ +struct recv_warp : public recv_buf +{ + constexpr recv_warp(const byte *ptr) : recv_buf(ptr) {} + + size_t get_offset() const noexcept + { + size_t offset= size_t(ptr - log_sys.buf); + if (offset < recv_sys.len) + return offset; + offset= size_t(ptr - &log_sys.resize_buf[log_sys.START_OFFSET]); + ut_ad(offset < log_sys.resize_target); + return recv_sys.len + offset; + } + + bool is_split(const recv_warp &end) const noexcept + { + const size_t len{recv_sys.len}; + const byte *buf{log_sys.buf}; + int db= (size_t(end.ptr - buf) < len) - (size_t(ptr - buf) < len); + ut_ad(db <= 0); + return db != 0; + } + + constexpr static bool is_eof() { return false; } + constexpr static bool is_eof(size_t) { return false; } + + byte operator*() const noexcept + { + ut_ad((ptr >= &log_sys.buf[log_sys.START_OFFSET] && ptr < end()) || + (ptr >= &log_sys.resize_buf[log_sys.START_OFFSET] && + ptr < &log_sys.resize_buf[log_sys.resize_target])); + return *ptr; + } + recv_warp operator+(size_t len) const noexcept + { recv_warp r{*this}; return r+= len; } + recv_warp &operator++() noexcept { return *this+= 1; } + recv_warp &operator+=(size_t len) noexcept + { + ut_ad(len < recv_sys.MTR_SIZE_MAX * 2); + const byte *const e{end()}; + const bool first{ptr < e && ptr >= log_sys.buf}; + ut_ad(!first || ptr >= &log_sys.buf[log_sys.START_OFFSET]); + ptr+= len; + if (first) + { + if (ptr < e) + return *this; + ptr= &log_sys.resize_buf[log_sys.START_OFFSET + (ptr - e)]; + } + ut_ad(ptr >= &log_sys.resize_buf[log_sys.START_OFFSET]); + ut_ad(ptr < &log_sys.resize_buf[log_sys.resize_target]); + return *this; + } + size_t operator-(const recv_warp &start) const noexcept + { + return start.is_split(*this) + ? size_t((ptr - &log_sys.resize_buf[log_sys.START_OFFSET]) + + (end() - start.ptr)) + : size_t(ptr - start.ptr); + } + + uint32_t decode_varint() const noexcept + { + recv_warp log{*this}; + uint32_t i{*log}; + if (i < MIN_2BYTE) + return i; + uint32_t j{*++log}; + if (i < 0xc0) + return MIN_2BYTE + ((i & ~0xc0) << 8 | j); + j<<= 8; + j|= *++log; + if (i < 0xe0) + return MIN_3BYTE + ((i & ~0xe0) << 16 | j); + j<<= 8; + j|= *++log; + if (i < 0xf0) + return MIN_4BYTE + ((i & ~0xf0) << 24 | j); + if (i == 0xf0) + { + j<<= 8; + j|= *++log; + if (j <= ~MIN_5BYTE) + return MIN_5BYTE + j; + } + return MLOG_DECODE_ERROR; + } + + uint32_t crc32c(uint32_t crc, const recv_warp &start) const noexcept + { + return start.is_split(*this) + ? my_crc32c(my_crc32c(crc, start.ptr, end() - start.ptr), + &log_sys.resize_buf[log_sys.START_OFFSET], + ptr - &log_sys.resize_buf[log_sys.START_OFFSET]) + : my_crc32c(crc, start.ptr, ptr - start.ptr); + } + + void *memcpy(void *buf, size_t size) const noexcept + { + if (is_split(*this + size)) + { + size_t wrap_size(end() - ptr); + ut_ad(wrap_size < size); + ::memcpy(buf, ptr, wrap_size); + ::memcpy(static_cast(buf) + wrap_size, + &log_sys.resize_buf[log_sys.START_OFFSET], size - wrap_size); + return buf; + } + else + return ::memcpy(buf, ptr, size); + } }; ATTRIBUTE_COLD @@ -2427,9 +2800,17 @@ recv_sys_t::parse_mtr_result log_parse_start(source &l, unsigned nonce) return recv_sys_t::PREMATURE_EOF; eom_found: - if (*l != log_sys.get_sequence_bit((l - begin) + recv_sys.lsn)) + const lsn_t end_lsn{(l - begin) + recv_sys.lsn}; + + if (*l != log_sys.get_sequence_bit(end_lsn) && !log_sys.archive) return recv_sys_t::GOT_EOF; + if (recv_sys.rpo && recv_sys.rpo < end_lsn) + { + recv_sys_rpo_exceeded= end_lsn; + return recv_sys_t::GOT_EOF; + } + if (l.is_eof(5 + nonce)) return recv_sys_t::PREMATURE_EOF; @@ -2440,16 +2821,12 @@ recv_sys_t::parse_mtr_result log_parse_start(source &l, unsigned nonce) crc32c= crc.crc32c(crc32c, l + 1); ut_ad(!crc.is_eof(3)); - if (!crc.is_wrapped(crc + 3)) + if (!crc.is_split(crc + 3)) stored_crc32c= mach_read_from_4(crc.ptr); else { byte b[4]; - size_t wrap_size(crc.end() - crc.ptr); - ut_ad(wrap_size < 4); - memcpy(b, crc.ptr, wrap_size); - memcpy(b + wrap_size, &log_sys.buf[log_sys.START_OFFSET], 4 - wrap_size); - stored_crc32c= mach_read_from_4(b); + stored_crc32c= mach_read_from_4(static_cast(crc.memcpy(b, 4))); } return crc32c == stored_crc32c ? recv_sys_t::OK : recv_sys_t::GOT_EOF; @@ -2474,7 +2851,7 @@ recv_sys_t::parse_mtr_result recv_sys_t::parse(source l, bool if_exists) (srv_operation == SRV_OPERATION_BACKUP || srv_operation == SRV_OPERATION_BACKUP_NO_DEFER)); mysql_mutex_assert_owner(&mutex); - ut_ad(log_sys.next_checkpoint_lsn); + ut_ad(log_sys.next_checkpoint_lsn || log_sys.archive); ut_ad(log_sys.is_recoverable()); ut_ad(log_sys.format == format); @@ -2497,33 +2874,21 @@ recv_sys_t::parse_mtr_result recv_sys_t::parse(source l, bool if_exists) const size_t s{l - begin}; ut_ad(s + 9 <= tmp_buf_size); constexpr size_t tail_size{format == log_t::FORMAT_10_8 ? 1 + 4 : 1 + 8 + 4}; - const bool is_wrapped{begin.is_wrapped(l + (tail_size - 5))}; + const bool is_split{begin.is_split(l + (tail_size - 5))}; l+= tail_size; start_offset= begin.ptr - log_sys.buf; - offset= l.ptr - log_sys.buf; - if (l.may_wrap() && offset == log_sys.file_size) - { - ut_ad(log_sys.is_mmap()); - offset= log_sys.START_OFFSET; - } - ut_ad(offset < log_sys.file_size); + offset= l.get_offset(); const byte *ptr; - if (format == log_t::FORMAT_ENC_11 || is_wrapped) + if (format == log_t::FORMAT_ENC_11 || is_split) { byte *tmp= tmp_buf; ptr= tmp; - if (format == log_t::FORMAT_ENC_11 && !is_wrapped) + if (format == log_t::FORMAT_ENC_11 && !is_split) memcpy(tmp, begin.ptr, s + 9); else - { - size_t wrap_size(begin.end() - begin.ptr); - ut_ad(wrap_size < s + 9); - memcpy(tmp, begin.ptr, wrap_size); - memcpy(tmp + wrap_size, &log_sys.buf[log_sys.START_OFFSET], - s + 9 - wrap_size); - } + begin.memcpy(tmp, s + 9); if (format == log_t::FORMAT_ENC_11) log_decrypt_mtr(tmp, ptr + s); } @@ -2737,14 +3102,21 @@ log_parse_file(const page_id_t id, bool if_exists, ? "ignored" : recv_sys.file_checkpoint ? "reread" : "read", recv_sys.lsn)); - if (c == log_sys.next_checkpoint_lsn) + if (c == log_sys.next_checkpoint_lsn || !log_sys.next_checkpoint_lsn) { /* There can be multiple FILE_CHECKPOINT for the same LSN. */ if (!recv_sys.file_checkpoint) { + ut_ad(log_sys.next_checkpoint_lsn || log_sys.archive); + ut_ad(log_sys.last_checkpoint_lsn || log_sys.archive); + log_sys.next_checkpoint_lsn= c; + if (!log_sys.last_checkpoint_lsn) + log_sys.last_checkpoint_lsn= c; recv_sys.file_checkpoint= recv_sys.lsn; return recv_sys_t::GOT_EOF; } + else + ut_ad(log_sys.next_checkpoint_lsn); } } break; @@ -2788,6 +3160,18 @@ log_parse_file(const page_id_t id, bool if_exists, break; } + if (!log_sys.next_checkpoint_lsn) + { + /* We are currently validating checkpoints in + recv_log_t::find_checkpoint_archived(). We must not open and + validate data files until we actually start recovery from a + checkpoint, because there could be lots of FILE_MODIFY and + FILE_CHECKPOINT log records to be parsed. */ + ut_ad(!recv_sys.file_checkpoint); + ut_ad(log_sys.archive); + return recv_sys_t::OK; + } + fil_name_process(reinterpret_cast(l), fnend - l, space_id, fn2 ? FILE_MODIFY : mfile_type_t(b & 0xf0), recv_sys.start_lsn, if_exists); @@ -3211,6 +3595,90 @@ recv_sys_t::parse_mtr_result recv_sys_t::parse_mtr(bool if_exists) return recv_sys.parse(s, if_exists); } +inline void log_t::archived_switch_recovery_rewind() noexcept +{ + ut_ad(latch_have_wr()); + ut_ad(archive); + ut_ad(resize_log.is_opened()); + ut_ad(log.is_opened()); + + if (is_mmap()) + std::swap(buf, resize_buf); + + std::swap(log, resize_log); + std::swap(file_size, resize_target); + + first_lsn-= capacity(); +} + +inline bool log_t::archived_switch_recovery() noexcept +{ + ut_ad(latch_have_wr()); + ut_ad(archive); + ut_ad(!is_mmap()); + + if (!resize_log.is_opened()) + { + std::string path_name{get_archive_path(first_lsn + capacity())}; + bool success; + resize_log.m_file= os_file_create_func(path_name.c_str(), + OS_FILE_OPEN, OS_LOG_FILE, true, + &success); + ut_ad(success == (resize_log.m_file != OS_FILE_CLOSED)); + if (resize_log.m_file == OS_FILE_CLOSED) + { + sql_print_error("InnoDB: Failed to open %s", path_name.c_str()); + return false; + } + resize_target= os_file_get_size(resize_log.m_file); + if ((resize_target & 4095) || + resize_target > ARCHIVE_FILE_SIZE_MAX || + resize_target < FILE_SIZE_MIN) + { + resize_log.close(); + sql_print_error("InnoDB: %s is of invalid size " LSN_PF, + path_name.c_str(), resize_target); + return false; + } + } + + if (recv_sys.file_checkpoint) + log.close(); + std::swap(log, resize_log); + + first_lsn+= capacity(); + std::swap(file_size, resize_target); + + return true; +} + +ATTRIBUTE_COLD void log_t::archived_mmap_switch_recovery_complete() noexcept +{ + ut_ad(archived_mmap_switch()); + ut_ad(buf); + ut_ad(!checkpoint_buf); + ut_ad(recv_sys.offset > capacity()); + + if (recv_sys.file_checkpoint) + { + my_munmap(buf, size_t(file_size)); + resize_log.close(); + buf= resize_buf; + resize_buf= nullptr; + } + else + { + std::swap(buf, resize_buf); + std::swap(log, resize_log); + } + + const size_t jump= size_t(capacity()); + recv_sys.offset-= jump; + first_lsn+= jump; + std::swap(file_size, resize_target); + recv_sys.len= size_t(file_size); +} + template recv_sys_t::parse_mtr_result recv_sys_t::parse_mmap(bool if_exists) { @@ -3220,6 +3688,14 @@ recv_sys_t::parse_mtr_result recv_sys_t::parse_mmap(bool if_exists) ut_ad(recv_sys.len == log_sys.file_size); ut_ad(recv_sys.offset >= log_sys.START_OFFSET); ut_ad(recv_sys.offset <= recv_sys.len); + if (log_sys.archive) + { + ut_ad(log_sys.archived_mmap_switch()); + recv_warp s{&log_sys.buf[recv_sys.offset]}; + auto r= recv_sys.parse(s,if_exists); + log_sys.archived_mmap_switch_recovery_complete(); + return r; + } recv_ring s {recv_sys.offset == recv_sys.len ? &log_sys.buf[log_sys.START_OFFSET] @@ -4234,19 +4710,25 @@ static bool recv_scan_log(bool last_phase, const recv_sys_t::parser *parser) #endif if (size_t size= buf_size - recv_sys.len) { + ut_ad(!log_sys.is_mmap()); + const lsn_t end_lsn{recv_sys.lsn + recv_sys.len - recv_sys.offset}; #ifndef UNIV_DEBUG lsn_t #endif - source_offset= - log_sys.calc_lsn_offset(recv_sys.lsn + recv_sys.len - recv_sys.offset); + source_offset= log_sys.calc_lsn_offset(end_lsn); ut_ad(!wrap || source_offset == log_t::START_OFFSET); source_offset&= ~block_size_1; if (source_offset + size > log_sys.file_size) size= static_cast(log_sys.file_size - source_offset); - if (dberr_t err= log_sys.log.read(source_offset, - {log_sys.buf + recv_sys.len, size})) + if (log_sys.archive && + end_lsn >= log_sys.get_first_lsn() + log_sys.capacity() && + !log_sys.archived_switch_recovery()) + recv_sys.set_corrupt_log(); + else if (dberr_t err= + log_sys.log.read(source_offset, + {log_sys.buf + recv_sys.len, size})) { sql_print_error("InnoDB: Failed to read log at %" PRIu64 ": %s", source_offset, ut_strerr(err)); @@ -4277,7 +4759,7 @@ static bool recv_scan_log(bool last_phase, const recv_sys_t::parser *parser) ut_ad(!recv_sys.file_checkpoint); for (;;) { - const byte& b{log_sys.buf[recv_sys.offset]}; + const byte b{log_sys.buf[recv_sys.offset]}; r= parser[false](false); switch (r) { case recv_sys_t::PREMATURE_EOF: @@ -4294,7 +4776,7 @@ static bool recv_scan_log(bool last_phase, const recv_sys_t::parser *parser) ut_ad(!end || end == recv_sys.lsn); bool corrupt_fs= recv_sys.is_corrupt_fs(); - if (!end && !corrupt_fs) + if (!end && !corrupt_fs && !log_sys.archive) { recv_sys.set_corrupt_log(); sql_print_error("InnoDB: Missing FILE_CHECKPOINT(" LSN_PF @@ -4701,7 +5183,8 @@ static dberr_t recv_rename_files() inline void log_t::set_recovered() noexcept { ut_ad(get_flushed_lsn() == get_lsn()); - ut_ad(recv_sys.lsn == get_flushed_lsn()); + ut_ad(recv_sys.lsn == get_flushed_lsn() || + (recv_sys.rpo && recv_sys.rpo < get_flushed_lsn())); if (!is_mmap()) { const size_t bs{log_sys.write_size}, bs_1{bs - 1}; @@ -4747,6 +5230,155 @@ static recv_sys_t::parser get_parse_mmap() noexcept ut_error; } +dberr_t recv_sys_t::find_checkpoint_archived(lsn_t first_lsn, bool silent) +{ + ut_ad(log_sys.archive); + ut_ad(!log_sys.checkpoint_buf == log_sys.is_mmap()); + const byte *buf; + if (byte *c= log_sys.checkpoint_buf) + { + buf= c; + if (dberr_t err= log_sys.log.read(0, {c, log_sys.write_size})) + return err; + } + else + buf= log_sys.buf; + uint16_t n_checkpoint; + { + const uint32_t format{mach_read_from_4(buf)}; + if (format != uint32_t(srv_encrypt_log)) + { + /* TODO: correct the file header later, if we can recover + from the previous file */ + if (!silent) + sql_print_error(format < 2 + ? "InnoDB: " LOG_ARCHIVE_NAME + " does not match innodb_encrypt_log" + : "InnoDB: " LOG_ARCHIVE_NAME + " is in unrecognized format", + first_lsn); + return DB_ERROR; + } + + if (!format) + buf+= 4, n_checkpoint= 1; + else if (!log_crypt_read_header(buf)) + return DB_ERROR; + else + { + buf+= 32/*log_crypt_read_header()*/, n_checkpoint= 32 / 4; + if (!tmp_buf) + { + tmp_buf= static_cast + (ut_malloc_dontdump(tmp_buf_size, PSI_INSTRUMENT_ME)); + if (!tmp_buf) + return DB_OUT_OF_MEMORY; + } + } + } + + log_sys.next_checkpoint_lsn= 0; + log_sys.set_first_lsn(first_lsn); + lsn= 0; + /* Validate the checkpoints */ + lsn_t end_lsn{0}, checkpoint{0}, recovery_start_checkpoint{0}; + const recv_sys_t::parser parser[2] { + get_parse_mmap(), get_parse_mmap() + }; + ut_ad(recv_spaces.empty()); + while (n_checkpoint < log_sys.START_OFFSET / 4) + { + const uint32_t d{mach_read_from_4(my_assume_aligned<4>(buf))}; + if (d < log_sys.START_OFFSET || d >= log_sys.file_size) + break; + const lsn_t parse_start{first_lsn + d - log_sys.START_OFFSET}; + if (parse_start < end_lsn) + break; + lsn= parse_start; + file_checkpoint= 0; + log_sys.next_checkpoint_lsn= 0; + ut_d(const bool rescan=) recv_scan_log(false, parser); + ut_ad(rescan); + ut_ad(recv_spaces.empty()); + if (!file_checkpoint) + { + found_corrupt_log= false; + break; + } + ut_ad(file_checkpoint == lsn); + checkpoint= log_sys.next_checkpoint_lsn; + ut_ad(checkpoint); + ut_ad(checkpoint < lsn); + if (!log_sys.archived_lsn) + log_sys.archived_lsn= parse_start; + end_lsn= parse_start; + if (end_lsn == recovery_start) + recovery_start_checkpoint= checkpoint; + n_checkpoint++; + + if (first_lsn != log_sys.get_first_lsn()) + { + /* This checkpoint spanned two files, and it therefore should be + the last valid checkpoint in the file. Either + log_t::archived_switch_recovery() switched log_sys.log to point + to the second file, or log_t::archived_mmap_switch_recovery_complete() + switched both log_sys.log and log_sys.buf. */ + log_sys.archived_switch_recovery_rewind(); + break; + } + + buf+= 4; + if (byte *c= log_sys.checkpoint_buf) + { + uint offset(n_checkpoint * 4); + if (offset & (log_sys.write_size - 1)) + continue; + buf= c; + if (dberr_t err= log_sys.log.read(offset, {c, log_sys.write_size})) + return err; + } + } + + if (recv_sys_invalid_rpo(recv_sys_rpo_exceeded)) + return DB_READ_ONLY; + + if (!checkpoint) + { + if (!silent) + sql_print_error("InnoDB: Did not find any checkpoint after LSN=" LSN_PF, + first_lsn); + return DB_CORRUPTION; + } + + if (!recovery_start); + else if (recovery_start_checkpoint) + checkpoint= recovery_start_checkpoint, end_lsn= recovery_start; + else + { + end_lsn= lsn= recovery_start; + file_checkpoint= 0; + log_sys.next_checkpoint_lsn= 0; + ut_d(const bool rescan=) recv_scan_log(false, parser); + ut_ad(rescan); + ut_ad(recv_spaces.empty()); + checkpoint= log_sys.next_checkpoint_lsn; + if (file_checkpoint) + checkpoint= log_sys.next_checkpoint_lsn; + else + { + ut_ad(!silent); + sql_print_error("InnoDB: Did not find innodb_log_recovery_start=" LSN_PF + " in " LOG_ARCHIVE_NAME, + recovery_start, first_lsn); + return DB_CORRUPTION; + } + } + + file_checkpoint= 0; + log_sys.set_recovered_checkpoint(checkpoint, lsn= end_lsn, n_checkpoint); + return DB_SUCCESS; +} + /** Start recovering from a redo log checkpoint. of first system tablespace page @return error code or DB_SUCCESS */ @@ -4766,10 +5398,12 @@ dberr_t recv_recovery_from_checkpoint_start() if (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO) { sql_print_information("InnoDB: innodb_force_recovery=6" " skips redo log apply"); + recv_sys.rpo = LSN_MAX; return err; } recv_sys.recovery_on = true; + recv_sys_rpo_exceeded = 0; log_sys.latch.wr_lock(SRW_LOCK_CALL); log_sys.set_capacity(); @@ -4787,17 +5421,33 @@ dberr_t recv_recovery_from_checkpoint_start() recv_sys_t::parser parser[2]; if (log_sys.is_recoverable()) { + if (recv_sys.recovery_start > recv_sys.lsn) { + /* recv_sys_t::find_checkpoint_archived() + already checked this */ + ut_ad(!log_sys.archive); + sql_print_error("InnoDB: Did not find" + " innodb_log_recovery_start=%" PRIu64 + " between %" PRIu64 " and %" PRIu64, + recv_sys.recovery_start, + log_sys.next_checkpoint_lsn, + recv_sys.lsn); + err = DB_CORRUPTION; + goto func_exit; + } + log_sys.last_checkpoint_lsn = log_sys.next_checkpoint_lsn; const bool rewind = recv_sys.lsn != log_sys.next_checkpoint_lsn; - log_sys.last_checkpoint_lsn = log_sys.next_checkpoint_lsn; parser[false] = get_parse_mmap(); parser[true] = get_parse_mmap(); recv_scan_log(false, parser); if (recv_needed_recovery) { read_only_recovery: sql_print_warning("InnoDB: innodb_read_only" - " prevents crash recovery"); + " prevents crash recovery between " LSN_PF + " and " LSN_PF, + log_sys.next_checkpoint_lsn, recv_sys.lsn); err = DB_READ_ONLY; + recv_sys.rpo = recv_sys.scanned_lsn; goto func_exit; } if (recv_sys.is_corrupt_log()) { @@ -4817,8 +5467,11 @@ dberr_t recv_recovery_from_checkpoint_start() } rescan = recv_scan_log(false, parser); - if (srv_read_only_mode && recv_needed_recovery) { - goto read_only_recovery; + if (srv_read_only_mode) { + recv_sys.rpo = recv_sys.scanned_lsn; + if (recv_needed_recovery) { + goto read_only_recovery; + } } if ((recv_sys.is_corrupt_log() && !srv_force_recovery) @@ -4827,7 +5480,12 @@ dberr_t recv_recovery_from_checkpoint_start() } } - log_sys.set_recovered_lsn(recv_sys.scanned_lsn); + + if (recv_sys_invalid_rpo(recv_sys_rpo_exceeded)) { + high_level_read_only = true; + } else { + log_sys.set_recovered_lsn(recv_sys.scanned_lsn); + } if (recv_needed_recovery) { bool missing_tablespace = false; @@ -4904,15 +5562,16 @@ dberr_t recv_recovery_from_checkpoint_start() ut_ad(recv_sys.pages.empty()); } - if (!log_sys.is_recoverable()) { - } else if (recv_sys.validate_checkpoint()) { + if (log_sys.is_recoverable()) { + if (recv_sys_rpo_exceeded || recv_sys.validate_checkpoint()) { err_exit: - err = DB_ERROR; - goto func_exit; - } + err = DB_ERROR; + goto func_exit; + } - if (!srv_read_only_mode && log_sys.is_recoverable()) { - log_sys.set_recovered(); + if (!srv_read_only_mode) { + log_sys.set_recovered(); + } } DBUG_EXECUTE_IF("before_final_redo_apply", goto err_exit;); diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc index 952850b3f499d..2059ca0eed461 100644 --- a/storage/innobase/mtr/mtr0mtr.cc +++ b/storage/innobase/mtr/mtr0mtr.cc @@ -52,12 +52,14 @@ void mtr_t::finisher_update() if (log_sys.is_mmap()) { commit_logger= mtr_t::commit_log; - finisher= mtr_t::finish_writer; + finisher= log_sys.archive + ? mtr_t::finish_writer + : mtr_t::finish_writer; return; } commit_logger= mtr_t::commit_log; #endif - finisher= mtr_t::finish_writer; + finisher= mtr_t::finish_writer; } void mtr_memo_slot_t::release() const @@ -336,7 +338,52 @@ void mtr_t::release() m_memo.clear(); } -ATTRIBUTE_NOINLINE void mtr_t::commit_log_release() noexcept +#ifdef HAVE_PMEM +ATTRIBUTE_COLD lsn_t log_t::archived_mmap_switch_complete() noexcept +{ + ut_ad(latch_have_wr()); + if (!archive || !resize_buf) + return 0; + const lsn_t lsn{get_lsn()}, last_lsn{first_lsn + capacity()}; + if (lsn < last_lsn) + return 0; + ut_a(!checkpoint_buf); + persist(lsn); + checkpoint_buf= buf; + buf= resize_buf; + resize_buf= nullptr; + first_lsn= last_lsn; + file_size= resize_target; + return lsn; +} + +template<> +ATTRIBUTE_NOINLINE void mtr_t::commit_log_release() noexcept +{ + if (m_latch_ex) + { + completed: + const lsn_t lsn{log_sys.archived_mmap_switch_complete()}; + log_sys.latch.wr_unlock(); + m_latch_ex= false; + if (lsn) + buf_flush_ahead(lsn, true); + } + else + { + const bool retry{log_sys.archived_mmap_switch()}; + log_sys.latch.rd_unlock(); + if (retry) + { + log_sys.latch.wr_lock(SRW_LOCK_CALL); + goto completed; + } + } +} +#endif + +template<> +ATTRIBUTE_NOINLINE void mtr_t::commit_log_release() noexcept { if (m_latch_ex) { @@ -395,12 +442,12 @@ void mtr_t::commit_log(mtr_t *mtr, std::pair lsns) noexcept buf_pool.page_cleaner_wakeup(); mysql_mutex_unlock(&buf_pool.flush_list_mutex); - mtr->commit_log_release(); + mtr->commit_log_release(); mtr->release(); } else { - mtr->commit_log_release(); + mtr->commit_log_release(); for (auto it= mtr->m_memo.rbegin(); it != mtr->m_memo.rend(); ) { @@ -488,6 +535,7 @@ void mtr_t::commit() } ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); std::pair lsns{do_write()}; process_freed_pages(); #ifdef HAVE_PMEM @@ -741,6 +789,7 @@ ATTRIBUTE_COLD lsn_t mtr_t::commit_files(lsn_t checkpoint_lsn) ut_ad(!m_made_dirty); ut_ad(m_memo.empty()); ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); ut_ad(!m_freed_space); ut_ad(!m_freed_pages); ut_ad(!m_user_space); @@ -852,7 +901,9 @@ static time_t log_close_warn_time; making the server crash-unsafe. */ ATTRIBUTE_COLD static void log_overwrite_warning(lsn_t lsn) { - if (log_sys.overwrite_warned) + ut_ad(!log_sys.archive); /* we hope that this is unreachable */ + + if (log_sys.overwrite_warned || log_sys.archive) return; time_t t= time(nullptr); @@ -872,6 +923,39 @@ ATTRIBUTE_COLD static void log_overwrite_warning(lsn_t lsn) ? ". Shutdown is in progress" : ""); } + +#ifdef HAVE_PMEM +template<> +inline std::pair +log_t::append_prepare(size_t size, bool ex) noexcept +{ + ut_ad(ex ? latch_have_wr() : latch_have_rd()); + ut_ad(is_mmap()); + ut_ad(archive); + ut_ad(archived_lsn); + + uint64_t l, lsn; + static_assert(WRITE_TO_BUF == WRITE_BACKOFF << 1, ""); + while (UNIV_UNLIKELY((l= write_lsn_offset.fetch_add(size + WRITE_TO_BUF) & + (WRITE_TO_BUF - 1)) + size + + (lsn= base_lsn.load(std::memory_order_relaxed)) >= + first_lsn + capacity()) && + !resize_buf && !checkpoint_buf) + { + /* The following is inlined here instead of being part of + archive_mmap_switch_prepare() below, in order to increase the + locality of reference and to expedite setting the WRITE_BACKOFF flag. */ + bool late(write_lsn_offset.fetch_or(WRITE_BACKOFF) & WRITE_BACKOFF); + /* Subtract our LSN overshoot. */ + write_lsn_offset.fetch_sub(size); + archived_mmap_switch_prepare(late, ex); + } + + lsn+= l; + return {lsn, buf + FIRST_LSN + (lsn - first_lsn)}; +} +#endif + ATTRIBUTE_COLD void log_t::append_prepare_wait(bool late, bool ex) noexcept { if (UNIV_LIKELY(!ex)) @@ -918,6 +1002,7 @@ ATTRIBUTE_COLD void log_t::append_prepare_wait(bool late, bool ex) noexcept const bool is_pmem{is_mmap()}; if (is_pmem) { + ut_ad(!archive); ut_ad(lsn - get_flushed_lsn(std::memory_order_relaxed) < capacity() || overwrite_warned); persist(lsn); @@ -942,17 +1027,20 @@ ATTRIBUTE_COLD void log_t::append_prepare_wait(bool late, bool ex) noexcept } /** Reserve space in the log buffer for appending data. -@tparam mmap log_sys.is_mmap() +@tparam mode how to write log @param size total length of the data to append(), in bytes @param ex whether log_sys.latch is exclusively locked @return the start LSN and the buffer position for append() */ -template +template inline std::pair log_t::append_prepare(size_t size, bool ex) noexcept { ut_ad(ex ? latch_have_wr() : latch_have_rd()); - ut_ad(mmap == is_mmap()); - ut_ad(!mmap || buf_size == std::min(capacity(), buf_size_max)); + static_assert(!bool(WRITE_NORMAL), ""); + static_assert(bool(CIRCULAR_MMAP), ""); + static_assert(mode == WRITE_NORMAL || mode == CIRCULAR_MMAP, ""); + ut_ad(bool(mode) == is_mmap()); + ut_ad(!mode || buf_size == std::min(capacity(), buf_size_max)); const size_t buf_size{this->buf_size - size}; uint64_t l; static_assert(WRITE_TO_BUF == WRITE_BACKOFF << 1, ""); @@ -968,14 +1056,13 @@ std::pair log_t::append_prepare(size_t size, bool ex) noexcept append_prepare_wait(late, ex); } - const lsn_t lsn{l + base_lsn.load(std::memory_order_relaxed)}, - end_lsn{lsn + size}; + const lsn_t lsn{l + base_lsn.load(std::memory_order_relaxed)}; - if (UNIV_UNLIKELY(end_lsn >= last_checkpoint_lsn + log_capacity)) + if (UNIV_UNLIKELY(lsn + size >= last_checkpoint_lsn + log_capacity)) set_check_for_checkpoint(true); return {lsn, - buf + size_t(mmap ? FIRST_LSN + (lsn - first_lsn) % capacity() : l)}; + buf + size_t(mode ? FIRST_LSN + (lsn - first_lsn) % capacity() : l)}; } /** Finish appending data to the log. @@ -1111,12 +1198,14 @@ std::pair mtr_t::do_write() noexcept return finish_write(len); } -inline void log_t::resize_write(lsn_t lsn, const byte *end, size_t len, - size_t seq) noexcept +template +ATTRIBUTE_COLD +void log_t::resize_write_low(lsn_t lsn, const byte *end, + size_t len, size_t seq) noexcept { ut_ad(latch_have_any()); + ut_ad(resize_buf); - if (UNIV_LIKELY_NULL(resize_buf)) { ut_ad(end >= buf); end-= len; @@ -1208,92 +1297,142 @@ inline void log_t::resize_write(lsn_t lsn, const byte *end, size_t len, inline void log_t::append(byte *&d, const void *s, size_t size) noexcept { ut_ad(log_sys.latch_have_any()); - ut_ad(d + size <= log_sys.buf + - (log_sys.is_mmap() ? log_sys.file_size : log_sys.buf_size)); + ut_ad(log_sys.is_mmap() + ? ((d >= log_sys.buf && d + size <= log_sys.buf + log_sys.file_size) || + (log_sys.archive && + d >= log_sys.resize_buf && + d + size <= log_sys.resize_buf + log_sys.resize_target)) + : (d >= log_sys.buf && d + size <= log_sys.buf + log_sys.buf_size)); memcpy(d, s, size); d+= size; } -template -std::pair mtr_t::finish_writer(mtr_t *mtr, size_t len) +template +std::pair +mtr_t::finish_writer(mtr_t *mtr, size_t len) { ut_ad(log_sys.is_latest()); ut_ad(!recv_no_log_write); + ut_ad(!recv_sys.rpo); ut_ad(mtr->is_logged()); ut_ad(mtr->m_latch_ex ? log_sys.latch_have_wr() : log_sys.latch_have_rd()); ut_ad(len < recv_sys.MTR_SIZE_MAX); + ut_ad(mode == log_t::WRITE_NORMAL || + log_sys.archive == (mode == log_t::ARCHIVED_MMAP)); const size_t size{mtr->m_commit_lsn ? 5U + 8U : 5U}; std::pair start= - log_sys.append_prepare(len, mtr->m_latch_ex); + log_sys.append_prepare(len, mtr->m_latch_ex); - if (!mmap) - { + if (mode == log_t::WRITE_NORMAL) +#ifdef HAVE_PMEM + write_normal: +#endif for (const mtr_buf_t::block_t &b : mtr->m_log) log_sys.append(start.second, b.begin(), b.used()); - - write_trailer: - *start.second++= log_sys.get_sequence_bit(start.first + len - size); - if (mtr->m_commit_lsn) - { - mach_write_to_8(start.second, mtr->m_commit_lsn); - mtr->m_crc= my_crc32c(mtr->m_crc, start.second, 8); - start.second+= 8; - } - mach_write_to_4(start.second, mtr->m_crc); - start.second+= 4; - } +#ifdef HAVE_PMEM else { - if (UNIV_LIKELY(start.second + len <= &log_sys.buf[log_sys.file_size])) + const size_t file_size= log_sys.file_size; + byte *const buf{log_sys.buf}; + byte *end= &buf[file_size]; + if (UNIV_LIKELY(start.second + len <= end)) + goto write_normal; + byte *const begin= mode == log_t::ARCHIVED_MMAP + ? log_sys.get_archived_mmap_switch() + : buf + log_sys.START_OFFSET; + if (mode == log_t::ARCHIVED_MMAP && UNIV_UNLIKELY(start.second > end)) { - for (const mtr_buf_t::block_t &b : mtr->m_log) - log_sys.append(start.second, b.begin(), b.used()); - goto write_trailer; + /* Our mini-transaction will not span two log files. We are + somewhere between log_t::archived_mmap_switch_prepare() and + log_t::archived_mmap_switch_complete(), and our entire log must + be written to the new file. */ + start.second= begin + (start.second - end); + goto write_normal; } + for (const mtr_buf_t::block_t &b : mtr->m_log) { size_t size{b.used()}; - const size_t size_left(&log_sys.buf[log_sys.file_size] - start.second); + const size_t size_left(end - start.second); const byte *src= b.begin(); if (size > size_left) { ::memcpy(start.second, src, size_left); - start.second= &log_sys.buf[log_sys.START_OFFSET]; + start.second= begin; + if (mode == log_t::ARCHIVED_MMAP) + /* An approximation; the minimum innodb_log_file_size + always exceeds the maximum mtr->get_log_size() */ + end= begin + file_size; src+= size_left; size-= size_left; } ::memcpy(start.second, src, size); start.second+= size; } - const size_t size_left(&log_sys.buf[log_sys.file_size] - start.second); - if (size_left > size) - goto write_trailer; + const size_t size_left(end - start.second); + if (size_left <= size) + { + byte tail[5 + 8]; + tail[0]= + (mode == log_t::WRITE_NORMAL + ? log_sys.archive : mode == log_t::ARCHIVED_MMAP) || + log_sys.get_sequence_bit(start.first + len - size); - byte tail[5 + 8]; - tail[0]= log_sys.get_sequence_bit(start.first + len - size); + if (mtr->m_commit_lsn) + { + mach_write_to_8(tail + 1, mtr->m_commit_lsn); + mtr->m_crc= my_crc32c(mtr->m_crc, tail + 1, 8); + mach_write_to_4(tail + 9, mtr->m_crc); + } + else + mach_write_to_4(tail + 1, mtr->m_crc); - if (mtr->m_commit_lsn) - { - mach_write_to_8(tail + 1, mtr->m_commit_lsn); - mtr->m_crc= my_crc32c(mtr->m_crc, tail + 1, 8); - mach_write_to_4(tail + 9, mtr->m_crc); + ::memcpy(start.second, tail, size_left); + ::memcpy(begin, tail + size_left, size - size_left); + start.second= ((size >= size_left) ? begin : end) + (size - size_left); + goto wrote_trailer; } - else - mach_write_to_4(tail + 1, mtr->m_crc); - - ::memcpy(start.second, tail, size_left); - ::memcpy(log_sys.buf + log_sys.START_OFFSET, tail + size_left, - size - size_left); - start.second= log_sys.buf + - ((size >= size_left) ? log_sys.START_OFFSET : log_sys.file_size) + - (size - size_left); } +#endif - log_sys.resize_write(start.first, start.second, len, size); + *start.second++= + (mode == log_t::WRITE_NORMAL + ? log_sys.archive : mode == log_t::ARCHIVED_MMAP) || + log_sys.get_sequence_bit(start.first + len - size); + + if (mtr->m_commit_lsn) + { + mach_write_to_8(start.second, mtr->m_commit_lsn); + mtr->m_crc= my_crc32c(mtr->m_crc, start.second, 8); + start.second+= 8; + } + mach_write_to_4(start.second, mtr->m_crc); + start.second+= 4; + +#ifdef HAVE_PMEM +wrote_trailer: +#else + static_assert(mode == log_t::WRITE_NORMAL, ""); +#endif mtr->m_commit_lsn= start.first + len; - return {start.first, log_close(mtr->m_commit_lsn)}; + + switch (mode) { + case log_t::ARCHIVED_MMAP: + ut_ad(!log_sys.resize_in_progress()); + return {start.first, (log_sys.get_first_lsn() > log_sys.last_checkpoint_lsn + ? log_sys.get_first_lsn() : 0)}; + case log_t::CIRCULAR_MMAP: + log_sys.resize_write(start.first, start.second, len, size); + return {start.first, log_close(mtr->m_commit_lsn)}; + case log_t::WRITE_NORMAL: + log_sys.resize_write(start.first, start.second, len, size); + } + return {start.first, log_sys.archive + ? (log_sys.get_first_lsn() > log_sys.last_checkpoint_lsn + ? log_sys.get_first_lsn() : 0) + : log_close(mtr->m_commit_lsn)}; } bool mtr_t::have_x_latch(const buf_block_t &block) const diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc index a582642a17ebf..035ca69ca2d0c 100644 --- a/storage/innobase/row/row0import.cc +++ b/storage/innobase/row/row0import.cc @@ -4034,6 +4034,7 @@ dberr_t FetchIndexRootPages::run(const fil_iterator_t& iter, const bool full_crc32 = fil_space_t::full_crc32(m_space_flags); bool skip_checksum_check = false; ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); if (!page_compress_buf) return DB_OUT_OF_MEMORY; @@ -4134,6 +4135,7 @@ static dberr_t fil_iterate( byte* page_compress_buf= static_cast(malloc(get_buf_size())); ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); if (!page_compress_buf) { return DB_OUT_OF_MEMORY; @@ -4462,6 +4464,7 @@ fil_tablespace_iterate( ut_a(n_io_buffers > 0); ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); DBUG_EXECUTE_IF("ib_import_trigger_corruption_1", return(DB_CORRUPTION);); @@ -4740,6 +4743,7 @@ row_import_for_mysql( /* The caller assured that this is not read_only_mode and that no temorary tablespace is being imported. */ ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); ut_ad(!table->is_temporary()); ut_ad(table->space_id); diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc index 3f373027e7ae7..3dcbff3e4505a 100644 --- a/storage/innobase/row/row0ins.cc +++ b/storage/innobase/row/row0ins.cc @@ -679,6 +679,7 @@ row_ins_set_detailed( dict_foreign_t* foreign) /*!< in: foreign key constraint */ { ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); mysql_mutex_lock(&srv_misc_tmpfile_mutex); rewind(srv_misc_tmpfile); @@ -713,6 +714,7 @@ row_ins_foreign_trx_print( ulint heap_size; ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); { TMLockMutexGuard g{SRW_LOCK_CALL}; @@ -751,10 +753,6 @@ row_ins_foreign_report_err( { std::string fk_str; - if (srv_read_only_mode) { - return; - } - FILE* ef = dict_foreign_err_file; trx_t* trx = thr_get_trx(thr); @@ -807,11 +805,6 @@ row_ins_foreign_report_add_err( child table */ { std::string fk_str; - - if (srv_read_only_mode) { - return; - } - FILE* ef = dict_foreign_err_file; row_ins_set_detailed(trx, foreign); diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc index adf6d0dbef140..f2f76b594c880 100644 --- a/storage/innobase/row/row0merge.cc +++ b/storage/innobase/row/row0merge.cc @@ -3723,6 +3723,7 @@ row_merge_insert_index_tuples( DBUG_ENTER("row_merge_insert_index_tuples"); ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); ut_ad(!(index->type & DICT_FTS)); ut_ad(!dict_index_is_spatial(index)); @@ -3896,6 +3897,7 @@ row_merge_drop_index_dict( pars_info_t* info; ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); ut_ad(trx->dict_operation_lock_mode); ut_ad(trx->dict_operation); ut_ad(dict_sys.locked()); @@ -3959,6 +3961,7 @@ row_merge_drop_indexes_dict( pars_info_t* info; ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); ut_ad(trx->dict_operation_lock_mode); ut_ad(trx->dict_operation); ut_ad(dict_sys.locked()); @@ -4033,6 +4036,7 @@ row_merge_drop_indexes( dict_index_t* next_index; ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); ut_ad(trx->dict_operation_lock_mode); ut_ad(trx->dict_operation); ut_ad(dict_sys.locked()); @@ -4436,6 +4440,7 @@ row_merge_file_destroy( merge_file_t* merge_file) /*!< in/out: merge file structure */ { ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); if (merge_file->fd != OS_FILE_CLOSED) { row_merge_file_destroy_low(merge_file->fd); @@ -4513,6 +4518,7 @@ row_merge_create_index( DBUG_ENTER("row_merge_create_index"); ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); /* Create the index prototype, using the passed in def, this is not a persistent operation. We pass 0 as the space id, and determine at @@ -4649,6 +4655,7 @@ row_merge_build_indexes( DBUG_ENTER("row_merge_build_indexes"); ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); ut_ad((old_table == new_table) == !col_map); ut_ad(!defaults || col_map); diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc index 01176ee8f7693..a8fc3d9b508ad 100644 --- a/storage/innobase/row/row0mysql.cc +++ b/storage/innobase/row/row0mysql.cc @@ -2250,8 +2250,10 @@ row_discard_tablespace_foreign_key_checks( const trx_t* trx, /*!< in: transaction handle */ const dict_table_t* table) /*!< in: table to be discarded */ { + ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); - if (srv_read_only_mode || !trx->check_foreigns) { + if (!trx->check_foreigns) { return(DB_SUCCESS); } diff --git a/storage/innobase/row/row0quiesce.cc b/storage/innobase/row/row0quiesce.cc index 36d7e5a9c450a..6b5228a03faa6 100644 --- a/storage/innobase/row/row0quiesce.cc +++ b/storage/innobase/row/row0quiesce.cc @@ -506,6 +506,7 @@ row_quiesce_table_start( ut_a(trx->mysql_thd != 0); ut_a(srv_n_purge_threads > 0); ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); ut_a(trx->mysql_thd != 0); @@ -604,8 +605,9 @@ row_quiesce_set_state( trx_t* trx) /*!< in/out: transaction */ { ut_a(srv_n_purge_threads > 0); + ut_ad(!srv_read_only_mode || recv_sys.rpo); - if (srv_read_only_mode) { + if (recv_sys.rpo) { ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE); diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index 151d5df0cc9dd..671475c3f0078 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -691,7 +691,7 @@ srv_printf_innodb_monitor( mysql_mutex_lock(&dict_foreign_err_mutex); - if (!srv_read_only_mode && ftell(dict_foreign_err_file) != 0L) { + if (!recv_sys.rpo && ftell(dict_foreign_err_file) != 0L) { fputs("------------------------\n" "LATEST FOREIGN KEY ERROR\n" "------------------------\n", file); @@ -816,9 +816,11 @@ void srv_export_innodb_status(void) /*==========================*/ { + ut_ad(!srv_read_only_mode || recv_sys.rpo); + fil_crypt_stat_t crypt_stat; - if (!srv_read_only_mode) { + if (!recv_sys.rpo) { fil_crypt_total_stat(&crypt_stat); } @@ -932,7 +934,7 @@ srv_export_innodb_status(void) export_vars.innodb_onlineddl_rowlog_pct_used = onlineddl_rowlog_pct_used; export_vars.innodb_onlineddl_pct_progress = onlineddl_pct_progress; - if (!srv_read_only_mode) { + if (!recv_sys.rpo) { export_vars.innodb_encryption_rotation_pages_read_from_cache = crypt_stat.pages_read_from_cache; export_vars.innodb_encryption_rotation_pages_read_from_disk = @@ -1012,7 +1014,7 @@ static void srv_monitor() /* We don't create the temp files or associated mutexes in read-only-mode */ - if (!srv_read_only_mode && srv_innodb_status) { + if (!recv_sys.rpo && srv_innodb_status) { mysql_mutex_lock(&srv_monitor_file_mutex); rewind(srv_monitor_file); if (!srv_printf_innodb_monitor(srv_monitor_file, @@ -1090,6 +1092,7 @@ bool srv_any_background_activity() if (purge_sys.enabled() || srv_master_timer.get()) { ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); return true; } return false; @@ -1183,6 +1186,7 @@ void purge_sys_t::resume() return; } ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); ut_ad(srv_force_recovery < SRV_FORCE_NO_BACKGROUND); purge_coordinator_task.enable(); latch.wr_lock(SRW_LOCK_CALL); @@ -1287,11 +1291,15 @@ void srv_master_callback(void*) ut_a(srv_shutdown_state <= SRV_SHUTDOWN_INITIATED); MONITOR_INC(MONITOR_MASTER_THREAD_SLEEP); - purge_sys.wake_if_not_active(); + if (!recv_sys.rpo) + purge_sys.wake_if_not_active(); ulonglong counter_time= microsecond_interval_timer(); - srv_sync_log_buffer_in_background(); - MONITOR_INC_TIME_IN_MICRO_SECS(MONITOR_SRV_LOG_FLUSH_MICROSECOND, - counter_time); + if (!recv_sys.rpo) + { + srv_sync_log_buffer_in_background(); + MONITOR_INC_TIME_IN_MICRO_SECS(MONITOR_SRV_LOG_FLUSH_MICROSECOND, + counter_time); + } if (srv_check_activity(&old_activity_count)) srv_master_do_active_tasks(counter_time); @@ -1344,6 +1352,7 @@ Fetch and execute a task from the work queue. static bool srv_task_execute(THD *thd) { ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); ut_ad(srv_force_recovery < SRV_FORCE_NO_BACKGROUND); mysql_mutex_lock(&srv_sys.tasks_mutex); @@ -1378,6 +1387,7 @@ void srv_update_purge_thread_count(uint n) inline void purge_coordinator_state::do_purge(trx_t *trx) { ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); if (!purge_sys.enabled() || purge_sys.paused()) return; @@ -1493,6 +1503,7 @@ static void purge_worker_callback(void*) { ut_ad(!current_thd); ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); ut_ad(srv_force_recovery < SRV_FORCE_NO_BACKGROUND); void *ctx; THD *thd= acquire_thd(&ctx); @@ -1543,6 +1554,7 @@ srv_que_task_enqueue_low( que_thr_t* thr) /*!< in: query thread */ { ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); mysql_mutex_lock(&srv_sys.tasks_mutex); UT_LIST_ADD_LAST(srv_sys.tasks, thr); @@ -1557,6 +1569,7 @@ ulint srv_get_task_queue_length() ulint n_tasks; ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); mysql_mutex_lock(&srv_sys.tasks_mutex); diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index e92e0ff54e078..dda74bff692f9 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -141,6 +141,9 @@ std::unique_ptr srv_master_timer; mysql_pfs_key_t thread_pool_thread_key; #endif /* UNIV_PFS_THREAD */ +/** Only created if !srv_read_only_mode. Protected by lock_sys.latch. */ +extern FILE *lock_latest_err_file; + #ifdef HAVE_PSI_STAGE_INTERFACE /** Array of all InnoDB stage events for monitoring activities via performance schema. */ @@ -160,17 +163,17 @@ static PSI_stage_info* srv_stages[] = static void delete_log_files() { for (size_t i= 1; i < 102; i++) - delete_log_file(std::to_string(i).c_str()); + os_file_delete_if_exists_func(log_sys.get_circular_path(i).c_str(), nullptr); } /** Creates log file. @param create_new_db whether the database is being initialized @param lsn log sequence number -@param logfile0 name of the log file @return DB_SUCCESS or error code */ static dberr_t create_log_file(bool create_new_db, lsn_t lsn) { ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); /* We will retain ib_logfile0 until we have written a new logically empty log as ib_logfile101 and atomically renamed it to @@ -192,7 +195,7 @@ static dberr_t create_log_file(bool create_new_db, lsn_t lsn) log_sys.latch.wr_lock(SRW_LOCK_CALL); log_sys.set_capacity(); - std::string logfile0{get_log_file_path("ib_logfile101")}; + const std::string logfile0{log_sys.get_circular_path(101)}; bool ret; os_file_t file{ os_file_create_func(logfile0.c_str(), @@ -218,7 +221,7 @@ static dberr_t create_log_file(bool create_new_db, lsn_t lsn) } log_sys.set_latest_format(srv_encrypt_log); - if (!log_sys.attach(file, srv_log_file_size)) { + if (!log_sys.attach(file, srv_log_file_size, false)) { goto close_and_exit; } @@ -265,8 +268,8 @@ static dberr_t create_log_file(bool create_new_db, lsn_t lsn) @return whether an error occurred */ bool log_t::resize_rename() noexcept { - std::string old_name{get_log_file_path("ib_logfile101")}; - std::string new_name{get_log_file_path()}; + const std::string old_name{get_circular_path(101)}; + const std::string new_name{log_sys.get_path()}; if (IF_WIN(MoveFileEx(old_name.c_str(), new_name.c_str(), MOVEFILE_REPLACE_EXISTING), @@ -803,7 +806,8 @@ srv_check_undo_redo_logs_exists() } /* Check if redo log file exists */ - auto logfilename = get_log_file_path(); + const std::string logfilename{log_sys.get_circular_path()}; + // FIXME: check for archived log as well fh = os_file_create_func(logfilename.c_str(), OS_FILE_OPEN_RETRY_SILENT, @@ -1032,6 +1036,7 @@ static void srv_shutdown_bg_undo_sources() if (srv_undo_sources) { ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); fts_optimize_shutdown(); dict_stats_shutdown(); srv_undo_sources= false; @@ -1204,12 +1209,16 @@ ATTRIBUTE_COLD static dberr_t srv_log_rebuild() /** Rebuild the redo log if needed. */ static dberr_t srv_log_rebuild_if_needed() { + ut_ad(!srv_read_only_mode || recv_sys.rpo); + if (srv_force_recovery == SRV_FORCE_NO_LOG_REDO) /* Completely ignore the redo log. */ return DB_SUCCESS; - if (srv_read_only_mode) + if (recv_sys.rpo) /* Leave the redo log alone. */ return DB_SUCCESS; + if (log_sys.archive) + return DB_SUCCESS; /* Never rebuild archived log files. */ if (log_sys.file_size == srv_log_file_size && log_sys.format == @@ -1370,61 +1379,12 @@ dberr_t srv_start(bool create_new_db) ib::info() << my_crc32c_implementation(); - if (!srv_read_only_mode) { - mysql_mutex_init(srv_monitor_file_mutex_key, - &srv_monitor_file_mutex, nullptr); - mysql_mutex_init(srv_misc_tmpfile_mutex_key, - &srv_misc_tmpfile_mutex, nullptr); - } - - if (!srv_read_only_mode) { - if (srv_innodb_status) { - - srv_monitor_file_name = static_cast( - ut_malloc_nokey( - strlen(fil_path_to_mysql_datadir) - + 20 + sizeof "/innodb_status.")); - - sprintf(srv_monitor_file_name, - "%s/innodb_status." ULINTPF, - fil_path_to_mysql_datadir, - static_cast - (IF_WIN(GetCurrentProcessId(), getpid()))); - - srv_monitor_file = my_fopen(srv_monitor_file_name, - O_RDWR|O_TRUNC|O_CREAT, - MYF(MY_WME)); - - if (!srv_monitor_file) { - ib::error() << "Unable to create " - << srv_monitor_file_name << ": " - << strerror(errno); - if (err == DB_SUCCESS) { - err = DB_ERROR; - } - } - } else { - - srv_monitor_file_name = NULL; - srv_monitor_file = os_file_create_tmpfile(); - - if (!srv_monitor_file && err == DB_SUCCESS) { - err = DB_ERROR; - } - } - - srv_misc_tmpfile = os_file_create_tmpfile(); - - if (!srv_misc_tmpfile && err == DB_SUCCESS) { - err = DB_ERROR; - } - } - - if (err != DB_SUCCESS) { - return(srv_init_abort(err)); + if (os_aio_init()) { + return(srv_init_abort(DB_ERROR)); } - if (os_aio_init()) { + if (!srv_read_only_mode + && !(lock_latest_err_file = os_file_create_tmpfile())) { return(srv_init_abort(DB_ERROR)); } @@ -1462,6 +1422,10 @@ dberr_t srv_start(bool create_new_db) || srv_operation == SRV_OPERATION_RESTORE || srv_operation == SRV_OPERATION_RESTORE_EXPORT); ut_ad(!recv_sys.recovery_on); + /* Suppress warnings in fil_space_t::create() for files + that are being read before dict_boot() has recovered + DICT_HDR_MAX_SPACE_ID. */ + fil_system.space_id_reuse_warned = true; if (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO) { sql_print_information("InnoDB: innodb_force_recovery=6" @@ -1527,7 +1491,6 @@ dberr_t srv_start(bool create_new_db) ? DB_SUCCESS : DB_ERROR; mysql_mutex_unlock(&recv_sys.mutex); - if (err == DB_SUCCESS) { mtr.start(); err= srv_undo_tablespaces_init(create_new_db, &mtr); @@ -1546,12 +1509,6 @@ dberr_t srv_start(bool create_new_db) return(srv_init_abort(err)); } - /* Initialize objects used by dict stats gathering thread, which - can also be used by recovery if it tries to drop some table */ - if (!srv_read_only_mode) { - dict_stats_init(); - } - trx_sys.create(); if (create_new_db) { @@ -1611,12 +1568,9 @@ dberr_t srv_start(bool create_new_db) if (log_sys.resize_rename()) { return(srv_init_abort(DB_ERROR)); } - } else { - /* Suppress warnings in fil_space_t::create() for files - that are being read before dict_boot() has recovered - DICT_HDR_MAX_SPACE_ID. */ - fil_system.space_id_reuse_warned = true; + if (log_sys.archive) log_sys.archive_set_size(); + } else { /* We always try to do a recovery, even if the database had been shut down normally: this is the normal startup path */ @@ -1714,25 +1668,23 @@ dberr_t srv_start(bool create_new_db) fil_system.space_id_reuse_warned = false; - if (srv_operation > SRV_OPERATION_EXPORT_RESTORED) { - ut_ad(srv_operation == SRV_OPERATION_RESTORE_EXPORT - || srv_operation == SRV_OPERATION_RESTORE); - return(err); - } - /* Upgrade or resize or rebuild the redo logs before generating any dirty pages, so that the old redo log file will not be written to. */ - err = srv_log_rebuild_if_needed(); + if (srv_operation <= SRV_OPERATION_EXPORT_RESTORED) { + err = srv_log_rebuild_if_needed(); - if (err != DB_SUCCESS) { - return srv_init_abort(err); + if (err != DB_SUCCESS) { + return srv_init_abort(err); + } } recv_sys.debug_free(); - if (!srv_read_only_mode) { + if (log_sys.archive) log_sys.archive_set_size(); + + if (!recv_sys.rpo) { const uint32_t flags = FSP_FLAGS_PAGE_SSIZE(); for (uint32_t id = srv_undo_space_id_start; id <= srv_undo_tablespaces; id++) { @@ -1821,6 +1773,56 @@ dberr_t srv_start(bool create_new_db) ut_ad(err == DB_SUCCESS); ut_a(sum_of_new_sizes != ULINT_UNDEFINED); + if (!recv_sys.rpo && srv_operation != SRV_OPERATION_RESTORE) { + if (srv_innodb_status) { + + srv_monitor_file_name = static_cast( + ut_malloc_nokey( + strlen(fil_path_to_mysql_datadir) + + 20 + sizeof "/innodb_status.")); + + sprintf(srv_monitor_file_name, + "%s/innodb_status." ULINTPF, + fil_path_to_mysql_datadir, + static_cast + (IF_WIN(GetCurrentProcessId(), getpid()))); + + srv_monitor_file = my_fopen(srv_monitor_file_name, + O_RDWR|O_TRUNC|O_CREAT, + MYF(MY_WME)); + + if (!srv_monitor_file) { + ib::error() << "Unable to create " + << srv_monitor_file_name << ": " + << strerror(errno); + } + } else { + srv_monitor_file_name = NULL; + srv_monitor_file = os_file_create_tmpfile(); + } + + if (!srv_monitor_file) { + return srv_init_abort(DB_ERROR); + } else { + mysql_mutex_init(srv_monitor_file_mutex_key, + &srv_monitor_file_mutex, nullptr); + } + + if (!(srv_misc_tmpfile = os_file_create_tmpfile())) { + return srv_init_abort(DB_ERROR); + } + + mysql_mutex_init(srv_misc_tmpfile_mutex_key, + &srv_misc_tmpfile_mutex, nullptr); + + } + + if (srv_operation > SRV_OPERATION_EXPORT_RESTORED) { + ut_ad(srv_operation == SRV_OPERATION_RESTORE_EXPORT + || srv_operation == SRV_OPERATION_RESTORE); + return(DB_SUCCESS); + } + /* Create the doublewrite buffer to a new tablespace */ if (!srv_read_only_mode && srv_force_recovery < SRV_FORCE_NO_TRX_UNDO && !buf_dblwr.create()) { @@ -1935,7 +1937,7 @@ dberr_t srv_start(bool create_new_db) } if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO - && !srv_read_only_mode) { + && !recv_sys.rpo) { /* Drop partially created indexes. */ row_merge_drop_temp_indexes(); /* Rollback incomplete non-DDL transactions */ @@ -1958,7 +1960,8 @@ dberr_t srv_start(bool create_new_db) ut_ad(srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN || !purge_sys.enabled()); - if (srv_force_recovery < SRV_FORCE_NO_BACKGROUND) { + if (srv_force_recovery < SRV_FORCE_NO_BACKGROUND + && !recv_sys.rpo) { srv_undo_sources = true; /* Create the dict stats gathering task */ dict_stats_start(); @@ -2010,7 +2013,9 @@ dberr_t srv_start(bool create_new_db) trx_sys.get_max_trx_id()); } - if (!srv_read_only_mode) { + ut_ad(!srv_read_only_mode || recv_sys.rpo); + + if (!recv_sys.rpo) { if (create_new_db) { srv_buffer_pool_load_at_startup = FALSE; } @@ -2034,6 +2039,8 @@ dberr_t srv_start(bool create_new_db) } #endif /* WITH_WSREP */ + dict_stats_init(); + /* Create thread(s) that handles key rotation. This is needed already here as log_preflush_pool_modified_pages will flush dirty pages and that might need e.g. @@ -2054,12 +2061,13 @@ PRAGMA_REENABLE_CHECK_STACK_FRAME */ void innodb_preshutdown() { + ut_ad(!srv_read_only_mode || recv_sys.rpo); static bool first_time= true; if (!first_time) return; first_time= false; - if (srv_read_only_mode) + if (recv_sys.rpo) return; if (!srv_fast_shutdown && srv_operation <= SRV_OPERATION_EXPORT_RESTORED) if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO && srv_was_started) @@ -2076,6 +2084,7 @@ void innodb_preshutdown() /** Shut down InnoDB. */ void innodb_shutdown() { + ut_ad(!srv_read_only_mode || recv_sys.rpo); innodb_preshutdown(); ut_ad(!srv_undo_sources); switch (srv_operation) { @@ -2106,35 +2115,21 @@ void innodb_shutdown() ut_ad(!buf_page_cleaner_is_active); srv_shutdown_threads(); - if (srv_monitor_file) { - my_fclose(srv_monitor_file, MYF(MY_WME)); - srv_monitor_file = 0; - if (srv_monitor_file_name) { - unlink(srv_monitor_file_name); - ut_free(srv_monitor_file_name); - } - } - - if (srv_misc_tmpfile) { - my_fclose(srv_misc_tmpfile, MYF(MY_WME)); - srv_misc_tmpfile = 0; - } - ut_ad(dict_sys.is_initialised() || !srv_was_started); ut_ad(trx_sys.is_initialised() || !srv_was_started); ut_ad(buf_dblwr.is_created() || !srv_was_started - || srv_read_only_mode + || recv_sys.rpo || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO); ut_ad(lock_sys.is_initialised() || !srv_was_started); ut_ad(log_sys.is_initialised() || !srv_was_started); - dict_stats_deinit(); - if (srv_started_redo) { ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); /* srv_shutdown_bg_undo_sources() already invoked fts_optimize_shutdown(); dict_stats_shutdown(); */ + dict_stats_deinit(); fil_crypt_threads_cleanup(); } @@ -2153,11 +2148,34 @@ void innodb_shutdown() lock_sys.close(); trx_pool_close(); - if (!srv_read_only_mode) { - mysql_mutex_destroy(&srv_monitor_file_mutex); - mysql_mutex_destroy(&srv_misc_tmpfile_mutex); + if (lock_latest_err_file) { + my_fclose(lock_latest_err_file, MYF(MY_WME)); + lock_latest_err_file = nullptr; + } + + if (!recv_sys.rpo && srv_operation != SRV_OPERATION_RESTORE) { + if (srv_monitor_file) { + my_fclose(srv_monitor_file, MYF(MY_WME)); + srv_monitor_file = nullptr; + if (srv_monitor_file_name) { + unlink(srv_monitor_file_name); + ut_free(srv_monitor_file_name); + srv_monitor_file_name = nullptr; + } + mysql_mutex_destroy(&srv_monitor_file_mutex); + } + + if (srv_misc_tmpfile) { + my_fclose(srv_misc_tmpfile, MYF(MY_WME)); + srv_misc_tmpfile = nullptr; + mysql_mutex_destroy(&srv_misc_tmpfile_mutex); + } } + ut_ad(!srv_monitor_file_name); + ut_ad(!srv_monitor_file); + ut_ad(!srv_misc_tmpfile); + dict_sys.close(); btr_search_sys_free(); srv_free(); diff --git a/storage/innobase/trx/trx0rec.cc b/storage/innobase/trx/trx0rec.cc index 957480ac3676c..f668d55228f91 100644 --- a/storage/innobase/trx/trx0rec.cc +++ b/storage/innobase/trx/trx0rec.cc @@ -1834,6 +1834,7 @@ trx_undo_report_row_operation( ut_ad(!update || rec); ut_ad(!rec || rec_offs_validate(rec, index, offsets)); ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo || index->table->is_temporary()); trx = thr_get_trx(thr); /* This function must not be invoked during rollback diff --git a/storage/innobase/trx/trx0roll.cc b/storage/innobase/trx/trx0roll.cc index 06368d4bd2fb6..204c79a98828d 100644 --- a/storage/innobase/trx/trx0roll.cc +++ b/storage/innobase/trx/trx0roll.cc @@ -486,6 +486,7 @@ Note: this is done in a background thread. */ void trx_rollback_all_recovered(void*) { ut_ad(!srv_read_only_mode); + ut_ad(!recv_sys.rpo); if (trx_sys.rw_trx_hash.size()) { ib::info() << "Starting in background the rollback of" diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index 106b9d94ddae3..0fbd7fa211ad8 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -513,13 +513,15 @@ inline void trx_t::release_locks() TRANSACTIONAL_TARGET void trx_free_at_shutdown(trx_t *trx) { ut_ad(trx->is_recovered); + ut_ad(!srv_read_only_mode || recv_sys.rpo); + ut_a(trx_state_eq(trx, TRX_STATE_PREPARED) || trx_state_eq(trx, TRX_STATE_PREPARED_RECOVERED) || (trx_state_eq(trx, TRX_STATE_ACTIVE) && (!srv_was_started || srv_operation == SRV_OPERATION_RESTORE || srv_operation == SRV_OPERATION_RESTORE_EXPORT - || srv_read_only_mode + || recv_sys.rpo || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO || (!srv_is_being_started && !srv_undo_sources && srv_fast_shutdown)))); diff --git a/storage/innobase/trx/trx0undo.cc b/storage/innobase/trx/trx0undo.cc index fc519e5572a1a..26a4d33e7f1f2 100644 --- a/storage/innobase/trx/trx0undo.cc +++ b/storage/innobase/trx/trx0undo.cc @@ -1494,6 +1494,8 @@ void trx_undo_set_state_at_prepare(trx_undo_t *undo, bool rollback, mtr_t *mtr) /** At shutdown, frees the undo logs of a transaction. */ void trx_undo_free_at_shutdown(trx_t *trx) { + ut_ad(!srv_read_only_mode || recv_sys.rpo); + if (trx_undo_t*& undo = trx->rsegs.m_redo.undo) { switch (undo->state) { case TRX_UNDO_PREPARED: @@ -1507,7 +1509,7 @@ void trx_undo_free_at_shutdown(trx_t *trx) /* trx_t::commit_state() assigns trx->state = TRX_STATE_COMMITTED_IN_MEMORY. */ ut_a(!srv_was_started - || srv_read_only_mode + || recv_sys.rpo || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO || srv_fast_shutdown); break;