MySQL 5.7 MHA 最佳实践

MHA 集群架构

参考文档：
MHA原理：https://code.google.com/p/mysql-master-ha/wiki/HowMHAWorks
MHA原理PPT：http://www.slideshare.net/matsunobu/automated-master-failover
Linux配置代理方法：http://blog.csdn.net/bojie5744/article/details/42148719

软件下载：
Centos Base Yum Repository： http://mirrors.163.com/.help/CentOS6-Base-163.repo
epel(RHEL 6)Yum Repository：http://dl.fedoraproject.org/pub/epel/6/x86_64/epel-release-6-8.noarch.rpm
MySQL5.7 Yum Repository：https://dev.mysql.com/get/mysql57-community-release-el6-11.noarch.rpm
mysql-master-ha(mgr)：https://github.com/linyue515/mysql-master-ha/raw/master/mha4mysql-manager-0.57-0.el7.noarch.rpm
mysql-master-ha(node)：https://github.com/linyue515/mysql-master-ha/raw/master/mha4mysql-node-0.57-0.el7.noarch.rpm

系统版本
CentOS release 6.7 (Final) x86_64
MySQL版本
mysql-5.7.20.-x86_64(RPM)
MHA版本
mha4mysql-manager-0.57
mha4mysql-node-0.57
---前期准备工作 --
[root@node1-slave2 mysql]# cat /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.137.140 mha-manager
192.168.137.141 node1-master
192.168.137.142 node1-slave1
192.168.137.143 node1-slave2
192.168.137.144 lvs-master
192.168.137.145 lvs-backup

mha-manager 节点上传相关软件

[root@mha-manager opt]# ls -al
total 156
drwxr-xr-x. 3 root root 4096 Dec 25 21:24 .
dr-xr-xr-x. 25 root root 4096 Dec 25 20:50 ..
-rw-r--r-- 1 root root 81080 Dec 25 21:22 mha4mysql-manager-0.57-0.el7.noarch.rpm
-rw-r--r-- 1 root root 35360 Dec 25 21:22 mha4mysql-node-0.57-0.el7.noarch.rpm
-rw-r--r-- 1 root root 25664 Dec 25 21:22 mysql57-community-release-el6-11.noarch.rpm
drwxr-xr-x. 2 root root 4096 Mar 26 2015 rh

scp -rp mha4mysql-* mysql57-community-release-el6-11.noarch.rpm node1-slave1:/opt/
scp -rp mha4mysql-* mysql57-community-release-el6-11.noarch.rpm node1-slave2:/opt/
scp -rp mha4mysql-* mysql57-community-release-el6-11.noarch.rpm node1-master:/opt/

mha-manager/node-master/slave 安装 mysql57-community-release-el6-11.noarch.rpm

MHA manager 安装 MySQL客服端
yum -y install mysql-community-client.x86_64
mha-node-master/slave 安装MySQL服务端
yum -y install mysql-community-server.x86_64

(2) master/slave
mkdir /etc/mysql
mkdir -p /data1/db3389
mkdir -p /data1/tmp
chown -R mysql:mysql /data1/db3389
chown -R mysql:mysql /data1/tmp
cd /etc/mysql
vim my3389.cnf

[mysqld]
# GENERAL #
user = mysql
port = 3389
default_storage_engine = InnoDB
socket = /data1/db3389/my3389.sock
pid_file = /data1/db3389/mysql.pid
#read-only =0
tmpdir = /data1/tmp
#key_buffer_size = 128M
max_allowed_packet = 32M
max_connect_errors = 1000000
datadir = /data1/db3389/
log_bin = 1371413389-bin
relay-log= 1371413389-relay-bin
expire_logs_days = 7
#sync_binlog = 0
tmp_table_size = 32M
max_heap_table_size = 32M
max_connections = 5000
thread_cache_size = 512
table_definition_cache = 4096
table_open_cache = 4096
wait_timeout = 28800
interactive_timeout = 28800
transaction-isolation = READ-COMMITTED
binlog-format=row
character-set-server=utf8
skip-name-resolve
back_log=1024
explicit_defaults_for_timestamp=true
server_id=1371403389

# INNODB #
innodb_flush_method = O_DIRECT
#innodb_data_home_dir = /data1/db3389
innodb_data_file_path = ibdata1:100M:autoextend
#redo log
#innodb_log_group_home_dir=./
innodb_log_files_in_group = 3
innodb_log_file_size = 128M
#innodb performance
innodb_flush_log_at_trx_commit = 0
innodb_file_per_table = 1
innodb_buffer_pool_instances = 8
innodb_io_capacity = 2000
innodb_lock_wait_timeout = 30
binlog_error_action = ABORT_SERVER
innodb_buffer_pool_size = 128M
innodb_max_dirty_pages_pct=90
innodb_file_format=Barracuda
innodb_support_xa=0
innodb_buffer_pool_dump_at_shutdown = 1
innodb_buffer_pool_load_at_startup = 1
#innodb undo log
innodb_undo_tablespaces=4
innodb_undo_logs=2048
innodb_purge_rseg_truncate_frequency=512
innodb_max_undo_log_size=2G
innodb_undo_log_truncate=1

log_error = error.log
#log_queries_not_using_indexes = 1
slow_query_log = 1
slow_query_log_file = slow-queries.log
long_query_time=2
gtid_mode=ON
enforce-gtid-consistency
log-slave-updates
master-info-repository=TABLE
relay-log-info-repository=TABLE
sync_master_info = 10000
slave_sql_verify_checksum=1
skip-slave-start
init-connect='SET NAMES utf8'
character-set-server=utf8
skip-character-set-client-handshake
bind-address=0.0.0.0
skip-external-locking
slave-parallel-workers=6

[mysql5.6]
myisam_recover = FORCE,BACKUP

scp -rp my3389.cnf node1-slave1:/etc/mysql/

node1-slave1/slave2 修改一些东西即可
log_bin = 1371413389-bin
relay-log= 1371413389-relay-bin
server_id=1371413389

mysqld --defaults-file=/etc/mysql/my3389.cnf --initialize --user=mysql 初始化数据库

mysqld_safe --defaults-file=/etc/mysql/my3389.cnf & 启动MySQL数据库

echo "mysqld_safe --defaults-file=/etc/mysql/my3389.cnf &" >> /etc/rc.local

cat /data1/db3389/error.log | grep temp
mysql -S /data1/db3389/my3389.sock -p'srbe,bLde3sp'
mysql> set password=''; #重置密码为空
Query OK, 0 rows affected (0.01 sec)

master主库检查 GTID
mysql> show master status ;
+-----------------------+----------+--------------+------------------+----------------------------------------+
| File | Position | Binlog_Do_DB | Binlog_Ignore_DB | Executed_Gtid_Set |
+-----------------------+----------+--------------+------------------+----------------------------------------+
| 1371413389-bin.000002 | 357 | | | 3503c65e-e949-11e7-b2c3-000c295345bc:1 |
+-----------------------+----------+--------------+------------------+----------------------------------------+
1 row in set (0.00 sec)

mysql> show master status\G ;
*************************** 1. row ***************************
File: 1371413389-bin.000002
Position: 357
Binlog_Do_DB:
Binlog_Ignore_DB:
Executed_Gtid_Set: 3503c65e-e949-11e7-b2c3-000c295345bc:1
1 row in set (0.00 sec)

ERROR:
No query specified

master主库操作
grant replication slave, replication client on *.* to replica@'192.168.137.%' identified by 'mycatDBA';
grant all privileges on *.* to mha@'192.168.137.140' identified by 'mysqlDBA';
flush privileges;

mysqldump -S /data1/db3389/my3389.sock --single-transaction --master-data=2 --opt -A | gzip > /data1/tmp/full_3389.tar.gz

mysqldump -S /data1/db3389/my3389.sock --single-transaction --master-data=2 --opt -A > /tmp/full3389.sql
scp -rp /tmp/full3389.sql node1-slave1:/tmp/

node1-slave1端MySQL操作
mysql> reset master ;
Query OK, 0 rows affected (0.02 sec)
mysql -S /data1/db3389/my3389.sock < /tmp/full3389.sql

change master to master_host='192.168.137.141',master_port=3389,master_user='replica',master_password='mycatDBA',master_auto_position=1;

manager/master/slave 都需要安装的
yum -y install perl-DBD-MySQL perl-Config-Tiny perl-Log-Dispatch perl-Parallel-ForkManager perl-Time-HiRes

#根据MHA角色安装对应的软件包即可
yum -y --nogpgcheck install mha4mysql-node-0.57-0.el7.noarch.rpm 所有节点安装
yum -y install --nogpgcheck mha4mysql-manager-0.57-0.el7.noarch.rpm --manager节点安装

master添加VIP

/sbin/ifconfig eth0:1 192.168.137.200 broadcast 192.168.137.255 netmask 255.255.255.0
/sbin/arping -f -q -c 5 -w 5 -I eth0 -s 192.168.137.200 -U 192.168.137.1

配置SSH互信
mha-manager/node1-slave1 操作
ssh-keygen -t rsa
rm -rf ~/.ssh/*

node-master操作
ssh-keygen -t rsa
cd .ssh/
mv id_rsa.pub authorized_keys
scp -rp authorized_keys id_rsa mha-manager:~/.ssh/
scp -rp authorized_keys id_rsa node1-slave1:~/.ssh/

然后相互验证
ssh node1-slave1 date
ssh node1-slave2 date
ssh mha-manager date

所有节点配置mysql用户sudo权限

cd /etc/sudoers.d/
vim mysql 添加如下内容
User_Alias MYSQL_USERS = ALL
Runas_Alias MYSQL_RUNAS = root
Cmnd_Alias MYSQL_CMNDS = ALL
MYSQL_USERS ALL = (MYSQL_RUNAS) NOPASSWD: MYSQL_CMNDS

manager 配置MHA 相关文件
mkdir /etc/mha

# cat app3389.cnf
[server default]
user=mha
password=mysqlDBA
manager_workdir=/data1/mha/masterha/app3389
manager_log=/data1/mha/masterha/app3389/app3389.log
remote_workdir=/data1/mha/masterha/app3389
ssh_user=mysql
repl_user=replica
repl_password=mycatDBA
ping_interval=3

secondary_check_script="masterha_secondary_check -s 192.168.137.140 -s 192.168.137.140"
master_ip_failover_script="/etc/mha/master_ip_failover.sh 192.168.137.200 1"
master_ip_online_change_script="/etc/mha/master_ip_online_change.sh 192.168.137.200 1"
shutdown_script="/etc/mha/power_manager"
#report_script="/etc/mha/end_report"

[server1]
hostname=192.168.137.141
port=3389
master_binlog_dir=/data1/db3389
candidate_master=1
master_pid_file=/data1/db3389/mysql.pid

[server2]
hostname=192.168.137.142
port=3389
master_binlog_dir=/data1/db3389
candidate_master=1
master_pid_file=/data1/db3389/mysql.pid

[server3]
hostname=192.168.137.143
port=3389
master_binlog_dir=/data1/db3389
candidate_master=1
master_pid_file=/data1/db3389/mysql.pid

[binlog1]
hostname=192.168.137.140
master_binlog_dir=/data1/mha/binlog/3389
no_master=1
ignore_fail=1

上传MHA 脚本到 /etc/mha 路径下

chmod 755 master_ip_*
chmod 755 power_manager

创建MHA、BINLOG工作目录
mkdir -p /data1/mha/masterha/app3389
mkdir -p /data1/mha/binlog/3389
chown -R mysql:mysql /data1/mha/binlog/3389

chown -R mysql:mysql /data1/mha/masterha/app3389

.4.8. manager 端启动BINLOG SERVER
查看node1-master的 binglog位置

mysql> show master status
-> ;
+-----------------------+----------+--------------+------------------+------------------------------------------+
| File | Position | Binlog_Do_DB | Binlog_Ignore_DB | Executed_Gtid_Set |
+-----------------------+----------+--------------+------------------+------------------------------------------+
| 1371413389-bin.000002 | 968 | | | 3503c65e-e949-11e7-b2c3-000c295345bc:1-3 |
+-----------------------+----------+--------------+------------------+------------------------------------------+
1 row in set (0.00 sec)

mha-manager 端操作
su - mysql
cd /data1/mha/binlog/3389
nohup mysqlbinlog -R --host=192.168.137.141 -P3389 --user=mha --password=mysqlDBA --raw --stop-never 1371413389-bin.000002 &
ps -ef | grep mysqlbinlog | grep -v grep # 验证binlog server进程是否存在

[mysql@mha-manager 3389]$ masterha_check_ssh --conf=/etc/mha/app3389.cnf
Tue Dec 26 08:37:50 2017 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Tue Dec 26 08:37:50 2017 - [info] Reading application default configuration from /etc/mha/app3389.cnf..
Tue Dec 26 08:37:50 2017 - [info] Reading server configuration from /etc/mha/app3389.cnf..
Tue Dec 26 08:37:50 2017 - [info] Starting SSH connection tests..
Tue Dec 26 08:37:51 2017 - [debug]
Tue Dec 26 08:37:50 2017 - [debug] Connecting via SSH from mysql@192.168.137.141(192.168.137.141:22) to mysql@192.168.137.142(192.168.137.142:22)..
Tue Dec 26 08:37:51 2017 - [debug] ok.
Tue Dec 26 08:37:51 2017 - [debug] Connecting via SSH from mysql@192.168.137.141(192.168.137.141:22) to mysql@192.168.137.143(192.168.137.143:22)..
Tue Dec 26 08:37:51 2017 - [debug] ok.
Tue Dec 26 08:37:51 2017 - [debug]
Tue Dec 26 08:37:51 2017 - [debug] Connecting via SSH from mysql@192.168.137.142(192.168.137.142:22) to mysql@192.168.137.141(192.168.137.141:22)..
Tue Dec 26 08:37:51 2017 - [debug] ok.
Tue Dec 26 08:37:51 2017 - [debug] Connecting via SSH from mysql@192.168.137.142(192.168.137.142:22) to mysql@192.168.137.143(192.168.137.143:22)..
Tue Dec 26 08:37:51 2017 - [debug] ok.
Tue Dec 26 08:37:52 2017 - [debug]
Tue Dec 26 08:37:51 2017 - [debug] Connecting via SSH from mysql@192.168.137.143(192.168.137.143:22) to mysql@192.168.137.141(192.168.137.141:22)..
Tue Dec 26 08:37:51 2017 - [debug] ok.
Tue Dec 26 08:37:51 2017 - [debug] Connecting via SSH from mysql@192.168.137.143(192.168.137.143:22) to mysql@192.168.137.142(192.168.137.142:22)..
Tue Dec 26 08:37:51 2017 - [debug] ok.
Tue Dec 26 08:37:52 2017 - [info] All SSH connection tests passed successfully.

[mysql@mha-manager 3389]$ masterha_check_repl --conf=/etc/mha/app3389.cnf
Mon Dec 25 22:47:07 2017 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Mon Dec 25 22:47:07 2017 - [info] Reading application default configuration from /etc/mha/app3389.cnf..
Mon Dec 25 22:47:07 2017 - [info] Reading server configuration from /etc/mha/app3389.cnf..
Mon Dec 25 22:47:07 2017 - [info] MHA::MasterMonitor version 0.57.
Mon Dec 25 22:47:07 2017 - [info] GTID failover mode = 1
Mon Dec 25 22:47:07 2017 - [info] Dead Servers:
Mon Dec 25 22:47:07 2017 - [info] Alive Servers:
Mon Dec 25 22:47:07 2017 - [info] 192.168.137.141(192.168.137.141:3389)
Mon Dec 25 22:47:07 2017 - [info] 192.168.137.142(192.168.137.142:3389)
Mon Dec 25 22:47:07 2017 - [info] 192.168.137.143(192.168.137.143:3389)
Mon Dec 25 22:47:07 2017 - [info] Alive Slaves:
Mon Dec 25 22:47:07 2017 - [info] 192.168.137.142(192.168.137.142:3389) Version=5.7.21-log (oldest major version between slaves) log-bin:enabled
Mon Dec 25 22:47:07 2017 - [info] GTID ON
Mon Dec 25 22:47:07 2017 - [info] Replicating from 192.168.137.141(192.168.137.141:3389)
Mon Dec 25 22:47:07 2017 - [info] Primary candidate for the new Master (candidate_master is set)
Mon Dec 25 22:47:07 2017 - [info] 192.168.137.143(192.168.137.143:3389) Version=5.7.21-log (oldest major version between slaves) log-bin:enabled
Mon Dec 25 22:47:07 2017 - [info] GTID ON
Mon Dec 25 22:47:07 2017 - [info] Replicating from 192.168.137.141(192.168.137.141:3389)
Mon Dec 25 22:47:07 2017 - [info] Primary candidate for the new Master (candidate_master is set)
Mon Dec 25 22:47:07 2017 - [info] Current Alive Master: 192.168.137.141(192.168.137.141:3389)
Mon Dec 25 22:47:07 2017 - [info] Checking slave configurations..
Mon Dec 25 22:47:07 2017 - [info] read_only=1 is not set on slave 192.168.137.142(192.168.137.142:3389).
Mon Dec 25 22:47:07 2017 - [info] read_only=1 is not set on slave 192.168.137.143(192.168.137.143:3389).
Mon Dec 25 22:47:07 2017 - [info] Checking replication filtering settings..
Mon Dec 25 22:47:07 2017 - [info] binlog_do_db= , binlog_ignore_db=
Mon Dec 25 22:47:07 2017 - [info] Replication filtering check ok.
Mon Dec 25 22:47:07 2017 - [info] GTID (with auto-pos) is supported. Skipping all SSH and Node package checking.
Warning: Permanently added '192.168.137.140' (RSA) to the list of known hosts.
Mon Dec 25 22:47:07 2017 - [info] HealthCheck: SSH to 192.168.137.140 is reachable.
Mon Dec 25 22:47:07 2017 - [info] Binlog server 192.168.137.140 is reachable.
Mon Dec 25 22:47:07 2017 - [info] Checking recovery script configurations on 192.168.137.140(192.168.137.140:3306)..
Mon Dec 25 22:47:07 2017 - [info] Executing command: save_binary_logs --command=test --start_pos=4 --binlog_dir=/data1/mha/binlog/3389 --output_file=/data1/mha/masterha/app3389/save_binary_logs_test --manager_version=0.57 --start_file=1371413389-bin.000002
Mon Dec 25 22:47:07 2017 - [info] Connecting to mysql@192.168.137.140(192.168.137.140:22)..
Creating /data1/mha/masterha/app3389 if not exists.. ok.
Checking output directory is accessible or not..
ok.
Binlog found at /data1/mha/binlog/3389, up to 1371413389-bin.000002
Mon Dec 25 22:47:08 2017 - [info] Binlog setting check done.
Mon Dec 25 22:47:08 2017 - [info] Checking SSH publickey authentication settings on the current master..
Mon Dec 25 22:47:08 2017 - [info] HealthCheck: SSH to 192.168.137.141 is reachable.
Mon Dec 25 22:47:08 2017 - [info]
192.168.137.141(192.168.137.141:3389) (current master)
+--192.168.137.142(192.168.137.142:3389)
+--192.168.137.143(192.168.137.143:3389)

Mon Dec 25 22:47:08 2017 - [info] Checking replication health on 192.168.137.142..
Mon Dec 25 22:47:08 2017 - [info] ok.
Mon Dec 25 22:47:08 2017 - [info] Checking replication health on 192.168.137.143..
Mon Dec 25 22:47:08 2017 - [info] ok.
Mon Dec 25 22:47:08 2017 - [info] Checking master_ip_failover_script status:
Mon Dec 25 22:47:08 2017 - [info] /etc/mha/master_ip_failover.sh 192.168.137.200 1 --command=status --ssh_user=mysql --orig_master_host=192.168.137.141 --orig_master_ip=192.168.137.141 --orig_master_port=3389
Checking the Status of the script.. OK
Mon Dec 25 22:47:08 2017 - [info] OK.
Mon Dec 25 22:47:08 2017 - [info] Checking shutdown script status:
Mon Dec 25 22:47:08 2017 - [info] /etc/mha/power_manager --command=status --ssh_user=mysql --host=192.168.137.141 --ip=192.168.137.141
Mon Dec 25 22:47:08 2017 - [info] OK.
Mon Dec 25 22:47:08 2017 - [info] Got exit code 0 (Not master dead).

MySQL Replication Health is OK.

启动 MHA
[mysql@mha-manager 3389]$ nohup masterha_manager --conf=/etc/mha/app3389.cnf --ignore_last_failover &
[2] 3240
[mysql@mha-manager 3389]$ nohup: ignoring input and appending output to `nohup.out'

检查MHA 状态

[mysql@mha-manager 3389]$ masterha_check_status --conf=/etc/mha/app3389.cnf
app3389 (pid:3240) is running(0:PING_OK), master:192.168.137.141

======MHA切换测试 =
故障自动切换

主库down或者主机down，然后测试切换是否成功。

[root@node1-master db3389]# mysqladmin -S /data1/db3389/my3389.sock shutdown

[mysql@mha-manager app3389]$ pwd
/data1/mha/masterha/app3389
[mysql@mha-manager app3389]$ tail -f app3389.log
Tue Dec 26 15:58:42 2017 - [info] /etc/mha/master_ip_failover.sh 192.168.137.200 1 --command=status --ssh_user=mysql --orig_master_host=192.168.137.141 --orig_master_ip=192.168.137.141 --orig_master_port=3389
Checking the Status of the script.. OK
Tue Dec 26 15:58:42 2017 - [info] OK.
Tue Dec 26 15:58:42 2017 - [info] Checking shutdown script status:
Tue Dec 26 15:58:42 2017 - [info] /etc/mha/power_manager --command=status --ssh_user=mysql --host=192.168.137.141 --ip=192.168.137.141
Tue Dec 26 15:58:42 2017 - [info] OK.
Tue Dec 26 15:58:42 2017 - [info] Set master ping interval 3 seconds.
Tue Dec 26 15:58:42 2017 - [info] Set secondary check script: masterha_secondary_check -s 192.168.137.140 -s 192.168.137.140
Tue Dec 26 15:58:42 2017 - [info] Starting ping health check on 192.168.137.141(192.168.137.141:3389)..
Tue Dec 26 15:58:42 2017 - [info] Ping(SELECT) succeeded, waiting until MySQL doesn't respond..
Tue Dec 26 16:02:33 2017 - [warning] Got error on MySQL select ping: 2006 (MySQL server has gone away)
Tue Dec 26 16:02:33 2017 - [info] Executing SSH check script: exit 0
Tue Dec 26 16:02:33 2017 - [info] Executing secondary network check script: masterha_secondary_check -s 192.168.137.140 -s 192.168.137.140 --user=mysql --master_host=192.168.137.141 --master_ip=192.168.137.141 --master_port=3389 --master_user=mha --master_password=mysqlDBA --ping_type=SELECT
Tue Dec 26 16:02:33 2017 - [info] HealthCheck: SSH to 192.168.137.141 is reachable.
Monitoring server 192.168.137.140 is reachable, Master is not reachable from 192.168.137.140. OK.
Monitoring server 192.168.137.140 is reachable, Master is not reachable from 192.168.137.140. OK.
Tue Dec 26 16:02:33 2017 - [info] Master is not reachable from all other monitoring servers. Failover should start.
Tue Dec 26 16:02:36 2017 - [warning] Got error on MySQL connect: 2013 (Lost connection to MySQL server at 'reading initial communication packet', system error: 111)
Tue Dec 26 16:02:36 2017 - [warning] Connection failed 2 time(s)..
Tue Dec 26 16:02:39 2017 - [warning] Got error on MySQL connect: 2013 (Lost connection to MySQL server at 'reading initial communication packet', system error: 111)
Tue Dec 26 16:02:39 2017 - [warning] Connection failed 3 time(s)..
Tue Dec 26 16:02:42 2017 - [warning] Got error on MySQL connect: 2013 (Lost connection to MySQL server at 'reading initial communication packet', system error: 111)
Tue Dec 26 16:02:42 2017 - [warning] Connection failed 4 time(s)..
Tue Dec 26 16:02:42 2017 - [warning] Master is not reachable from health checker!
Tue Dec 26 16:02:42 2017 - [warning] Master 192.168.137.141(192.168.137.141:3389) is not reachable!
Tue Dec 26 16:02:42 2017 - [warning] SSH is reachable.
Tue Dec 26 16:02:42 2017 - [info] Connecting to a master server failed. Reading configuration file /etc/masterha_default.cnf and /etc/mha/app3389.cnf again, and trying to connect to all servers to check server status..
Tue Dec 26 16:02:42 2017 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Tue Dec 26 16:02:42 2017 - [info] Reading application default configuration from /etc/mha/app3389.cnf..
Tue Dec 26 16:02:42 2017 - [info] Reading server configuration from /etc/mha/app3389.cnf..
Tue Dec 26 16:02:42 2017 - [info] GTID failover mode = 1
Tue Dec 26 16:02:42 2017 - [info] Dead Servers:
Tue Dec 26 16:02:42 2017 - [info] 192.168.137.141(192.168.137.141:3389)
Tue Dec 26 16:02:42 2017 - [info] Alive Servers:
Tue Dec 26 16:02:42 2017 - [info] 192.168.137.142(192.168.137.142:3389)
Tue Dec 26 16:02:42 2017 - [info] 192.168.137.143(192.168.137.143:3389)
Tue Dec 26 16:02:42 2017 - [info] Alive Slaves:
Tue Dec 26 16:02:42 2017 - [info] 192.168.137.142(192.168.137.142:3389) Version=5.7.21-log (oldest major version between slaves) log-bin:enabled
Tue Dec 26 16:02:42 2017 - [info] GTID ON
Tue Dec 26 16:02:42 2017 - [info] Replicating from 192.168.137.141(192.168.137.141:3389)
Tue Dec 26 16:02:42 2017 - [info] Primary candidate for the new Master (candidate_master is set)
Tue Dec 26 16:02:42 2017 - [info] 192.168.137.143(192.168.137.143:3389) Version=5.7.21-log (oldest major version between slaves) log-bin:enabled
Tue Dec 26 16:02:42 2017 - [info] GTID ON
Tue Dec 26 16:02:42 2017 - [info] Replicating from 192.168.137.141(192.168.137.141:3389)
Tue Dec 26 16:02:42 2017 - [info] Primary candidate for the new Master (candidate_master is set)
Tue Dec 26 16:02:42 2017 - [info] Checking slave configurations..
Tue Dec 26 16:02:42 2017 - [info] read_only=1 is not set on slave 192.168.137.142(192.168.137.142:3389).
Tue Dec 26 16:02:42 2017 - [info] read_only=1 is not set on slave 192.168.137.143(192.168.137.143:3389).
Tue Dec 26 16:02:42 2017 - [info] Checking replication filtering settings..
Tue Dec 26 16:02:42 2017 - [info] Replication filtering check ok.
Tue Dec 26 16:02:42 2017 - [info] Master is down!
Tue Dec 26 16:02:42 2017 - [info] Terminating monitoring script.
Tue Dec 26 16:02:42 2017 - [info] Got exit code 20 (Master dead).
Tue Dec 26 16:02:42 2017 - [info] MHA::MasterFailover version 0.57.
Tue Dec 26 16:02:42 2017 - [info] Starting master failover.
Tue Dec 26 16:02:42 2017 - [info]
Tue Dec 26 16:02:42 2017 - [info] * Phase 1: Configuration Check Phase..
Tue Dec 26 16:02:42 2017 - [info]
Tue Dec 26 16:02:42 2017 - [info] HealthCheck: SSH to 192.168.137.140 is reachable.
Tue Dec 26 16:02:42 2017 - [info] Binlog server 192.168.137.140 is reachable.
Tue Dec 26 16:02:43 2017 - [info] GTID failover mode = 1
Tue Dec 26 16:02:43 2017 - [info] Dead Servers:
Tue Dec 26 16:02:43 2017 - [info] 192.168.137.141(192.168.137.141:3389)
Tue Dec 26 16:02:43 2017 - [info] Checking master reachability via MySQL(double check)...
Tue Dec 26 16:02:43 2017 - [info] ok.
Tue Dec 26 16:02:43 2017 - [info] Alive Servers:
Tue Dec 26 16:02:43 2017 - [info] 192.168.137.142(192.168.137.142:3389)
Tue Dec 26 16:02:43 2017 - [info] 192.168.137.143(192.168.137.143:3389)
Tue Dec 26 16:02:43 2017 - [info] Alive Slaves:
Tue Dec 26 16:02:43 2017 - [info] 192.168.137.142(192.168.137.142:3389) Version=5.7.21-log (oldest major version between slaves) log-bin:enabled
Tue Dec 26 16:02:43 2017 - [info] GTID ON
Tue Dec 26 16:02:43 2017 - [info] Replicating from 192.168.137.141(192.168.137.141:3389)
Tue Dec 26 16:02:43 2017 - [info] Primary candidate for the new Master (candidate_master is set)
Tue Dec 26 16:02:43 2017 - [info] 192.168.137.143(192.168.137.143:3389) Version=5.7.21-log (oldest major version between slaves) log-bin:enabled
Tue Dec 26 16:02:43 2017 - [info] GTID ON
Tue Dec 26 16:02:43 2017 - [info] Replicating from 192.168.137.141(192.168.137.141:3389)
Tue Dec 26 16:02:43 2017 - [info] Primary candidate for the new Master (candidate_master is set)
Tue Dec 26 16:02:43 2017 - [info] Starting GTID based failover.
Tue Dec 26 16:02:43 2017 - [info]
Tue Dec 26 16:02:43 2017 - [info] ** Phase 1: Configuration Check Phase completed.
Tue Dec 26 16:02:43 2017 - [info]
Tue Dec 26 16:02:43 2017 - [info] * Phase 2: Dead Master Shutdown Phase..
Tue Dec 26 16:02:43 2017 - [info]
Tue Dec 26 16:02:43 2017 - [info] Forcing shutdown so that applications never connect to the current master..
Tue Dec 26 16:02:43 2017 - [info] Executing master IP deactivation script:
Tue Dec 26 16:02:43 2017 - [info] /etc/mha/master_ip_failover.sh 192.168.137.200 1 --orig_master_host=192.168.137.141 --orig_master_ip=192.168.137.141 --orig_master_port=3389 --command=stopssh --ssh_user=mysql
Disabling the VIP on old master: 192.168.137.141
=======sudo /sbin/ifconfig eth0:1 down==================
Tue Dec 26 16:02:43 2017 - [info] done.
Tue Dec 26 16:02:43 2017 - [info] Executing SHUTDOWN script:
Tue Dec 26 16:02:43 2017 - [info] /etc/mha/power_manager --command=stopssh --ssh_user=mysql --host=192.168.137.141 --ip=192.168.137.141 --port=3389 --pid_file=/data1/db3389/mysql.pid
command-line line 0: invalid time value.
Killing mysqld instance based on /data1/db3389/mysql.pid failed.
Killing all mysqld instances on 192.168.137.141..
command-line line 0: invalid time value.
ssh NOT reachable. Power off needed (rc1=255, rc2=0).
Tue Dec 26 16:02:43 2017 - [info] Power off done.
Tue Dec 26 16:02:43 2017 - [info] * Phase 2: Dead Master Shutdown Phase completed.
Tue Dec 26 16:02:43 2017 - [info]
Tue Dec 26 16:02:43 2017 - [info] * Phase 3: Master Recovery Phase..
Tue Dec 26 16:02:43 2017 - [info]
Tue Dec 26 16:02:43 2017 - [info] * Phase 3.1: Getting Latest Slaves Phase..
Tue Dec 26 16:02:43 2017 - [info]
Tue Dec 26 16:02:43 2017 - [info] The latest binary log file/position on all slaves is 1371413389-bin.000004:194
Tue Dec 26 16:02:43 2017 - [info] Latest slaves (Slaves that received relay log files to the latest):
Tue Dec 26 16:02:43 2017 - [info] 192.168.137.142(192.168.137.142:3389) Version=5.7.21-log (oldest major version between slaves) log-bin:enabled
Tue Dec 26 16:02:43 2017 - [info] GTID ON
Tue Dec 26 16:02:43 2017 - [info] Replicating from 192.168.137.141(192.168.137.141:3389)
Tue Dec 26 16:02:43 2017 - [info] Primary candidate for the new Master (candidate_master is set)
Tue Dec 26 16:02:43 2017 - [info] 192.168.137.143(192.168.137.143:3389) Version=5.7.21-log (oldest major version between slaves) log-bin:enabled
Tue Dec 26 16:02:43 2017 - [info] GTID ON
Tue Dec 26 16:02:43 2017 - [info] Replicating from 192.168.137.141(192.168.137.141:3389)
Tue Dec 26 16:02:43 2017 - [info] Primary candidate for the new Master (candidate_master is set)
Tue Dec 26 16:02:43 2017 - [info] The oldest binary log file/position on all slaves is 1371413389-bin.000004:194
Tue Dec 26 16:02:43 2017 - [info] Oldest slaves:
Tue Dec 26 16:02:43 2017 - [info] 192.168.137.142(192.168.137.142:3389) Version=5.7.21-log (oldest major version between slaves) log-bin:enabled
Tue Dec 26 16:02:43 2017 - [info] GTID ON
Tue Dec 26 16:02:43 2017 - [info] Replicating from 192.168.137.141(192.168.137.141:3389)
Tue Dec 26 16:02:43 2017 - [info] Primary candidate for the new Master (candidate_master is set)
Tue Dec 26 16:02:43 2017 - [info] 192.168.137.143(192.168.137.143:3389) Version=5.7.21-log (oldest major version between slaves) log-bin:enabled
Tue Dec 26 16:02:43 2017 - [info] GTID ON
Tue Dec 26 16:02:43 2017 - [info] Replicating from 192.168.137.141(192.168.137.141:3389)
Tue Dec 26 16:02:43 2017 - [info] Primary candidate for the new Master (candidate_master is set)
Tue Dec 26 16:02:43 2017 - [info]
Tue Dec 26 16:02:43 2017 - [info] * Phase 3.3: Determining New Master Phase..
Tue Dec 26 16:02:43 2017 - [info]
Tue Dec 26 16:02:43 2017 - [info] Searching new master from slaves..
Tue Dec 26 16:02:43 2017 - [info] Candidate masters from the configuration file:
Tue Dec 26 16:02:43 2017 - [info] 192.168.137.142(192.168.137.142:3389) Version=5.7.21-log (oldest major version between slaves) log-bin:enabled
Tue Dec 26 16:02:43 2017 - [info] GTID ON
Tue Dec 26 16:02:43 2017 - [info] Replicating from 192.168.137.141(192.168.137.141:3389)
Tue Dec 26 16:02:43 2017 - [info] Primary candidate for the new Master (candidate_master is set)
Tue Dec 26 16:02:43 2017 - [info] 192.168.137.143(192.168.137.143:3389) Version=5.7.21-log (oldest major version between slaves) log-bin:enabled
Tue Dec 26 16:02:43 2017 - [info] GTID ON
Tue Dec 26 16:02:43 2017 - [info] Replicating from 192.168.137.141(192.168.137.141:3389)
Tue Dec 26 16:02:43 2017 - [info] Primary candidate for the new Master (candidate_master is set)
Tue Dec 26 16:02:43 2017 - [info] Non-candidate masters:
Tue Dec 26 16:02:43 2017 - [info] Searching from candidate_master slaves which have received the latest relay log events..
Tue Dec 26 16:02:43 2017 - [info] New master is 192.168.137.142(192.168.137.142:3389)
Tue Dec 26 16:02:43 2017 - [info] Starting master failover..
Tue Dec 26 16:02:43 2017 - [info]
From:
192.168.137.141(192.168.137.141:3389) (current master)
+--192.168.137.142(192.168.137.142:3389)
+--192.168.137.143(192.168.137.143:3389)

To:
192.168.137.142(192.168.137.142:3389) (new master)
+--192.168.137.143(192.168.137.143:3389)
Tue Dec 26 16:02:43 2017 - [info]
Tue Dec 26 16:02:43 2017 - [info] * Phase 3.3: New Master Recovery Phase..
Tue Dec 26 16:02:43 2017 - [info]
Tue Dec 26 16:02:43 2017 - [info] Waiting all logs to be applied..
Tue Dec 26 16:02:43 2017 - [info] done.
Tue Dec 26 16:02:43 2017 - [info] -- Saving binlog from host 192.168.137.140 started, pid: 27235
Tue Dec 26 16:02:43 2017 - [info]
Tue Dec 26 16:02:43 2017 - [info] Log messages from 192.168.137.140 ...
Tue Dec 26 16:02:43 2017 - [info]
Tue Dec 26 16:02:43 2017 - [info] Fetching binary logs from binlog server 192.168.137.140..
Tue Dec 26 16:02:43 2017 - [info] Executing binlog save command: save_binary_logs --command=save --start_file=1371413389-bin.000004 --start_pos=194 --output_file=/data1/mha/masterha/app3389/saved_binlog_binlog1_20171226160242.binlog --handle_raw_binlog=0 --skip_filter=1 --disable_log_bin=0 --manager_version=0.57 --oldest_version=5.7.21-log --binlog_dir=/data1/mha/binlog/3389
Creating /data1/mha/masterha/app3389 if not exists.. ok.
Concat binary/relay logs from 1371413389-bin.000004 pos 194 to 1371413389-bin.000004 EOF into /data1/mha/masterha/app3389/saved_binlog_binlog1_20171226160242.binlog ..
No additional binlog events found.
Event not exists.
Tue Dec 26 16:02:43 2017 - [info] Additional events were not found from the binlog server. No need to save.
Tue Dec 26 16:02:43 2017 - [info] End of log messages from 192.168.137.140.
Tue Dec 26 16:02:43 2017 - [info] No binlog events found from 192.168.137.140. Skipping
Tue Dec 26 16:02:43 2017 - [info] Getting new master's binlog name and position..
Tue Dec 26 16:02:43 2017 - [info] 1371423389-bin.000001:154
Tue Dec 26 16:02:43 2017 - [info] All other slaves should start replication from here. Statement should be: CHANGE MASTER TO MASTER_HOST='192.168.137.142', MASTER_PORT=3389, MASTER_AUTO_POSITION=1, MASTER_USER='replica', MASTER_PASSWORD='xxx';
Tue Dec 26 16:02:43 2017 - [info] Master Recovery succeeded. File:Pos:Exec_Gtid_Set: 1371423389-bin.000001, 154, ca6abe2a-e932-11e7-b2cd-000c295345bc:1-7
Tue Dec 26 16:02:43 2017 - [info] Executing master IP activate script:
Tue Dec 26 16:02:43 2017 - [info] /etc/mha/master_ip_failover.sh 192.168.137.200 1 --command=start --ssh_user=mysql --orig_master_host=192.168.137.141 --orig_master_ip=192.168.137.141 --orig_master_port=3389 --new_master_host=192.168.137.142 --new_master_ip=192.168.137.142 --new_master_port=3389 --new_master_user='mha' --new_master_password=xxx
Unknown option: new_master_user
Unknown option: new_master_password
Enabling the VIP - 192.168.137.200 on the new master - 192.168.137.142
=======sudo /sbin/ifconfig eth0:1 192.168.137.200 broadcast 192.168.137.255 netmask 255.255.255.0 && sudo /sbin/arping -f -q -c 5 -w 5 -I eth0 -s 192.168.137.200 -U 192.168.137.2=================
Tue Dec 26 16:02:44 2017 - [info] OK.
Tue Dec 26 16:02:44 2017 - [info] ** Finished master recovery successfully.
Tue Dec 26 16:02:44 2017 - [info] * Phase 3: Master Recovery Phase completed.
Tue Dec 26 16:02:44 2017 - [info]
Tue Dec 26 16:02:44 2017 - [info] * Phase 4: Slaves Recovery Phase..
Tue Dec 26 16:02:44 2017 - [info]
Tue Dec 26 16:02:44 2017 - [info]
Tue Dec 26 16:02:44 2017 - [info] * Phase 4.1: Starting Slaves in parallel..
Tue Dec 26 16:02:44 2017 - [info]
Tue Dec 26 16:02:44 2017 - [info] -- Slave recovery on host 192.168.137.143(192.168.137.143:3389) started, pid: 27273. Check tmp log /data1/mha/masterha/app3389/192.168.137.143_3389_20171226160242.log if it takes time..
Tue Dec 26 16:02:44 2017 - [info]
Tue Dec 26 16:02:44 2017 - [info] Log messages from 192.168.137.143 ...
Tue Dec 26 16:02:44 2017 - [info]
Tue Dec 26 16:02:44 2017 - [info] Resetting slave 192.168.137.143(192.168.137.143:3389) and starting replication from the new master 192.168.137.142(192.168.137.142:3389)..
Tue Dec 26 16:02:44 2017 - [info] Executed CHANGE MASTER.
Tue Dec 26 16:02:44 2017 - [info] Slave started.
Tue Dec 26 16:02:44 2017 - [info] gtid_wait(ca6abe2a-e932-11e7-b2cd-000c295345bc:1-7) completed on 192.168.137.143(192.168.137.143:3389). Executed 0 events.
Tue Dec 26 16:02:44 2017 - [info] End of log messages from 192.168.137.143.
Tue Dec 26 16:02:44 2017 - [info] -- Slave on host 192.168.137.143(192.168.137.143:3389) started.
Tue Dec 26 16:02:44 2017 - [info] All new slave servers recovered successfully.
Tue Dec 26 16:02:44 2017 - [info]
Tue Dec 26 16:02:44 2017 - [info] * Phase 5: New master cleanup phase..
Tue Dec 26 16:02:44 2017 - [info]
Tue Dec 26 16:02:44 2017 - [info] Resetting slave info on the new master..
Tue Dec 26 16:02:44 2017 - [info] 192.168.137.142: Resetting slave info succeeded.
Tue Dec 26 16:02:44 2017 - [info] Master failover to 192.168.137.142(192.168.137.142:3389) completed successfully.
Tue Dec 26 16:02:44 2017 - [info]

----- Failover Report -----

app3389: MySQL Master failover 192.168.137.141(192.168.137.141:3389) to 192.168.137.142(192.168.137.142:3389) succeeded

Master 192.168.137.141(192.168.137.141:3389) is down!

Check MHA Manager logs at mha-manager:/data1/mha/masterha/app3389/app3389.log for details.

Started automated(non-interactive) failover.
Invalidated master IP address on 192.168.137.141(192.168.137.141:3389)
Power off 192.168.137.141.
Selected 192.168.137.142(192.168.137.142:3389) as a new master.
192.168.137.142(192.168.137.142:3389): OK: Applying all logs succeeded.
192.168.137.142(192.168.137.142:3389): OK: Activated master IP address.
192.168.137.143(192.168.137.143:3389): OK: Slave started, replicating from 192.168.137.142(192.168.137.142:3389)
192.168.137.142(192.168.137.142:3389): Resetting slave info succeeded.
Master failover to 192.168.137.142(192.168.137.142:3389) completed successfully.

[root@mha-manager app3389]# su - mysql
[mysql@mha-manager ~]$ masterha_check_status --conf=/etc/mha/app3389.cnf
app3389 is stopped(2:NOT_RUNNING).

重新把MHA 启动

cd /data1/mha/binlog/3389

MHA 在线切换==
[mysql@mha-manager 3389]$ masterha_stop --conf=/etc/mha/app3389.cnf
Stopped app3389 successfully.
[2]+ Exit 1 nohup masterha_manager --conf=/etc/mha/app3389.cnf --ignore_last_failover
[mysql@mha-manager 3389]$ masterha_check_status --conf=/etc/mha/app3389.cnf
app3389 is stopped(2:NOT_RUNNING).

在线切换(Mha manager进程(binlog server进程可选)是关闭的,Mha结构是正常的环境,适用于生产系统硬件、软件升级维护等场景)
--orig_master_is_new_slave
切换时加上此参数是讲原master变成slave节点,不加该参数,原master将不启动
--running_updates_limit=10000
切换时选master 如果有延迟的话,mha切换不会成功,加上此参数表示切换在此时间范围内都可以切换(单位为 s),但是切换的时间长短是由recover时relay日志大小决定
注意：在备库先执行DDL,一般先stop slave,一般不记录mysql日志,可以通过set session sql_log_bin=0实现,然后进行一次主备切换操作,再在原来的主库上执行DDL.这种方法适用于增减索引.

[mysql@mha-manager 3389]$ masterha_master_switch --master_state=alive --conf=/etc/mha/app3389.cnf --orig_master_is_new_slave --running_updates_limit=10000
Tue Dec 26 18:47:39 2017 - [info] MHA::MasterRotate version 0.57.
Tue Dec 26 18:47:39 2017 - [info] Starting online master switch..
Tue Dec 26 18:47:39 2017 - [info]
Tue Dec 26 18:47:39 2017 - [info] * Phase 1: Configuration Check Phase..
Tue Dec 26 18:47:39 2017 - [info]
Tue Dec 26 18:47:39 2017 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Tue Dec 26 18:47:39 2017 - [info] Reading application default configuration from /etc/mha/app3389.cnf..
Tue Dec 26 18:47:39 2017 - [info] Reading server configuration from /etc/mha/app3389.cnf..
Tue Dec 26 18:47:40 2017 - [info] GTID failover mode = 1
Tue Dec 26 18:47:40 2017 - [info] Current Alive Master: 192.168.137.141(192.168.137.141:3389)
Tue Dec 26 18:47:40 2017 - [info] Alive Slaves:
Tue Dec 26 18:47:40 2017 - [info] 192.168.137.142(192.168.137.142:3389) Version=5.7.21-log (oldest major version between slaves) log-bin:enabled
Tue Dec 26 18:47:40 2017 - [info] GTID ON
Tue Dec 26 18:47:40 2017 - [info] Replicating from 192.168.137.141(192.168.137.141:3389)
Tue Dec 26 18:47:40 2017 - [info] Primary candidate for the new Master (candidate_master is set)
Tue Dec 26 18:47:40 2017 - [info] 192.168.137.143(192.168.137.143:3389) Version=5.7.21-log (oldest major version between slaves) log-bin:enabled
Tue Dec 26 18:47:40 2017 - [info] GTID ON
Tue Dec 26 18:47:40 2017 - [info] Replicating from 192.168.137.141(192.168.137.141:3389)
Tue Dec 26 18:47:40 2017 - [info] Primary candidate for the new Master (candidate_master is set)

It is better to execute FLUSH NO_WRITE_TO_BINLOG TABLES on the master before switching. Is it ok to execute on 192.168.137.141(192.168.137.141:3389)? (YES/no): yes
Tue Dec 26 18:47:43 2017 - [info] Executing FLUSH NO_WRITE_TO_BINLOG TABLES. This may take long time..
Tue Dec 26 18:47:43 2017 - [info] ok.
Tue Dec 26 18:47:43 2017 - [info] Checking MHA is not monitoring or doing failover..
Tue Dec 26 18:47:43 2017 - [info] Checking replication health on 192.168.137.142..
Tue Dec 26 18:47:43 2017 - [info] ok.
Tue Dec 26 18:47:43 2017 - [info] Checking replication health on 192.168.137.143..
Tue Dec 26 18:47:43 2017 - [info] ok.
Tue Dec 26 18:47:43 2017 - [info] Searching new master from slaves..
Tue Dec 26 18:47:43 2017 - [info] Candidate masters from the configuration file:
Tue Dec 26 18:47:43 2017 - [info] 192.168.137.141(192.168.137.141:3389) Version=5.7.21-log log-bin:enabled
Tue Dec 26 18:47:43 2017 - [info] GTID ON
Tue Dec 26 18:47:43 2017 - [info] 192.168.137.142(192.168.137.142:3389) Version=5.7.21-log (oldest major version between slaves) log-bin:enabled
Tue Dec 26 18:47:43 2017 - [info] GTID ON
Tue Dec 26 18:47:43 2017 - [info] Replicating from 192.168.137.141(192.168.137.141:3389)
Tue Dec 26 18:47:43 2017 - [info] Primary candidate for the new Master (candidate_master is set)
Tue Dec 26 18:47:43 2017 - [info] 192.168.137.143(192.168.137.143:3389) Version=5.7.21-log (oldest major version between slaves) log-bin:enabled
Tue Dec 26 18:47:43 2017 - [info] GTID ON
Tue Dec 26 18:47:43 2017 - [info] Replicating from 192.168.137.141(192.168.137.141:3389)
Tue Dec 26 18:47:43 2017 - [info] Primary candidate for the new Master (candidate_master is set)
Tue Dec 26 18:47:43 2017 - [info] Non-candidate masters:
Tue Dec 26 18:47:43 2017 - [info] Searching from candidate_master slaves which have received the latest relay log events..
Tue Dec 26 18:47:43 2017 - [info]
From:
192.168.137.141(192.168.137.141:3389) (current master)
+--192.168.137.142(192.168.137.142:3389)
+--192.168.137.143(192.168.137.143:3389)

To:
192.168.137.142(192.168.137.142:3389) (new master)
+--192.168.137.143(192.168.137.143:3389)
+--192.168.137.141(192.168.137.141:3389)

Starting master switch from 192.168.137.141(192.168.137.141:3389) to 192.168.137.142(192.168.137.142:3389)? (yes/NO): yes
Tue Dec 26 18:47:45 2017 - [info] Checking whether 192.168.137.142(192.168.137.142:3389) is ok for the new master..
Tue Dec 26 18:47:45 2017 - [info] ok.
Tue Dec 26 18:47:45 2017 - [info] 192.168.137.141(192.168.137.141:3389): SHOW SLAVE STATUS returned empty result. To check replication filtering rules, temporarily executing CHANGE MASTER to a dummy host.
Tue Dec 26 18:47:45 2017 - [info] 192.168.137.141(192.168.137.141:3389): Resetting slave pointing to the dummy host.
Tue Dec 26 18:47:45 2017 - [info] ** Phase 1: Configuration Check Phase completed.
Tue Dec 26 18:47:45 2017 - [info]
Tue Dec 26 18:47:45 2017 - [info] * Phase 2: Rejecting updates Phase..
Tue Dec 26 18:47:45 2017 - [info]
Tue Dec 26 18:47:45 2017 - [info] Executing master ip online change script to disable write on the current master:
Tue Dec 26 18:47:45 2017 - [info] /etc/mha/master_ip_online_change.sh 192.168.137.200 1 --command=stop --orig_master_host=192.168.137.141 --orig_master_ip=192.168.137.141 --orig_master_port=3389 --orig_master_user='mha' --new_master_host=192.168.137.142 --new_master_ip=192.168.137.142 --new_master_port=3389 --new_master_user='mha' --orig_master_ssh_user=mysql --new_master_ssh_user=mysql --orig_master_is_new_slave --orig_master_password=xxx --new_master_password=xxx
Unknown option: orig_master_ssh_user
Unknown option: new_master_ssh_user
Unknown option: orig_master_is_new_slave
Tue Dec 26 18:47:45 2017 879681 Set read_only on the new master.. ok.
Tue Dec 26 18:47:45 2017 888325 Waiting all running 2 threads are disconnected.. (max 1500 milliseconds)
{'Time' => '8407','Command' => 'Binlog Dump GTID','db' => undef,'Id' => '5','Info' => undef,'User' => 'replica','State' => 'Master has sent all binlog to slave; waiting for more updates','Host' => '192.168.137.142:34542'}
{'Time' => '8386','Command' => 'Binlog Dump GTID','db' => undef,'Id' => '6','Info' => undef,'User' => 'replica','State' => 'Master has sent all binlog to slave; waiting for more updates','Host' => '192.168.137.143:59488'}
Tue Dec 26 18:47:46 2017 391501 Waiting all running 2 threads are disconnected.. (max 1000 milliseconds)
{'Time' => '8408','Command' => 'Binlog Dump GTID','db' => undef,'Id' => '5','Info' => undef,'User' => 'replica','State' => 'Master has sent all binlog to slave; waiting for more updates','Host' => '192.168.137.142:34542'}
{'Time' => '8387','Command' => 'Binlog Dump GTID','db' => undef,'Id' => '6','Info' => undef,'User' => 'replica','State' => 'Master has sent all binlog to slave; waiting for more updates','Host' => '192.168.137.143:59488'}
Tue Dec 26 18:47:46 2017 894128 Waiting all running 2 threads are disconnected.. (max 500 milliseconds)
{'Time' => '8408','Command' => 'Binlog Dump GTID','db' => undef,'Id' => '5','Info' => undef,'User' => 'replica','State' => 'Master has sent all binlog to slave; waiting for more updates','Host' => '192.168.137.142:34542'}
{'Time' => '8387','Command' => 'Binlog Dump GTID','db' => undef,'Id' => '6','Info' => undef,'User' => 'replica','State' => 'Master has sent all binlog to slave; waiting for more updates','Host' => '192.168.137.143:59488'}
Tue Dec 26 18:47:47 2017 398219 Set read_only=1 on the orig master.. ok.
Tue Dec 26 18:47:47 2017 401894 Waiting all running 2 queries are disconnected.. (max 500 milliseconds)
{'Time' => '8409','Command' => 'Binlog Dump GTID','db' => undef,'Id' => '5','Info' => undef,'User' => 'replica','State' => 'Master has sent all binlog to slave; waiting for more updates','Host' => '192.168.137.142:34542'}
{'Time' => '8388','Command' => 'Binlog Dump GTID','db' => undef,'Id' => '6','Info' => undef,'User' => 'replica','State' => 'Master has sent all binlog to slave; waiting for more updates','Host' => '192.168.137.143:59488'}
Disabling the VIP on old master: 192.168.137.141
===========sudo /sbin/ifconfig eth0:1 down===========================
Tue Dec 26 18:47:48 2017 266087 Killing all application threads..
Tue Dec 26 18:47:48 2017 269697 done.
Tue Dec 26 18:47:48 2017 - [info] ok.
Tue Dec 26 18:47:48 2017 - [info] Locking all tables on the orig master to reject updates from everybody (including root):
Tue Dec 26 18:47:48 2017 - [info] Executing FLUSH TABLES WITH READ LOCK..
Tue Dec 26 18:47:48 2017 - [info] ok.
Tue Dec 26 18:47:48 2017 - [info] Orig master binlog:pos is 1371413389-bin.000002:1272.
Tue Dec 26 18:47:48 2017 - [info] Waiting to execute all relay logs on 192.168.137.142(192.168.137.142:3389)..
Tue Dec 26 18:47:48 2017 - [info] master_pos_wait(1371413389-bin.000002:1272) completed on 192.168.137.142(192.168.137.142:3389). Executed 0 events.
Tue Dec 26 18:47:48 2017 - [info] done.
Tue Dec 26 18:47:48 2017 - [info] Getting new master's binlog name and position..
Tue Dec 26 18:47:48 2017 - [info] 1371423389-bin.000001:458
Tue Dec 26 18:47:48 2017 - [info] All other slaves should start replication from here. Statement should be: CHANGE MASTER TO MASTER_HOST='192.168.137.142', MASTER_PORT=3389, MASTER_AUTO_POSITION=1, MASTER_USER='replica', MASTER_PASSWORD='xxx';
Tue Dec 26 18:47:48 2017 - [info] Executing master ip online change script to allow write on the new master:
Tue Dec 26 18:47:48 2017 - [info] /etc/mha/master_ip_online_change.sh 192.168.137.200 1 --command=start --orig_master_host=192.168.137.141 --orig_master_ip=192.168.137.141 --orig_master_port=3389 --orig_master_user='mha' --new_master_host=192.168.137.142 --new_master_ip=192.168.137.142 --new_master_port=3389 --new_master_user='mha' --orig_master_ssh_user=mysql --new_master_ssh_user=mysql --orig_master_is_new_slave --orig_master_password=xxx --new_master_password=xxx
Unknown option: orig_master_ssh_user
Unknown option: new_master_ssh_user
Unknown option: orig_master_is_new_slave
Tue Dec 26 18:47:48 2017 408616 Set read_only=0 on the new master.
Enabling the VIP - 192.168.137.200 on the new master - 192.168.137.142
===========sudo /sbin/ifconfig eth0:1 192.168.137.200 broadcast 192.168.137.255 netmask 255.255.255.0 && sudo /sbin/arping -f -q -c 5 -w 5 -I eth0 -s 192.168.137.200 -U 192.168.137.2===========================
Tue Dec 26 18:47:48 2017 - [info] ok.
Tue Dec 26 18:47:48 2017 - [info]
Tue Dec 26 18:47:48 2017 - [info] * Switching slaves in parallel..
Tue Dec 26 18:47:48 2017 - [info]
Tue Dec 26 18:47:48 2017 - [info] -- Slave switch on host 192.168.137.143(192.168.137.143:3389) started, pid: 30499
Tue Dec 26 18:47:48 2017 - [info]
Tue Dec 26 18:47:48 2017 - [info] Log messages from 192.168.137.143 ...
Tue Dec 26 18:47:48 2017 - [info]
Tue Dec 26 18:47:48 2017 - [info] Waiting to execute all relay logs on 192.168.137.143(192.168.137.143:3389)..
Tue Dec 26 18:47:48 2017 - [info] master_pos_wait(1371413389-bin.000002:1272) completed on 192.168.137.143(192.168.137.143:3389). Executed 0 events.
Tue Dec 26 18:47:48 2017 - [info] done.
Tue Dec 26 18:47:48 2017 - [info] Resetting slave 192.168.137.143(192.168.137.143:3389) and starting replication from the new master 192.168.137.142(192.168.137.142:3389)..
Tue Dec 26 18:47:48 2017 - [info] Executed CHANGE MASTER.
Tue Dec 26 18:47:48 2017 - [info] Slave started.
Tue Dec 26 18:47:48 2017 - [info] End of log messages from 192.168.137.143 ...
Tue Dec 26 18:47:48 2017 - [info]
Tue Dec 26 18:47:48 2017 - [info] -- Slave switch on host 192.168.137.143(192.168.137.143:3389) succeeded.
Tue Dec 26 18:47:48 2017 - [info] Unlocking all tables on the orig master:
Tue Dec 26 18:47:48 2017 - [info] Executing UNLOCK TABLES..
Tue Dec 26 18:47:49 2017 - [info] ok.
Tue Dec 26 18:47:49 2017 - [info] Starting orig master as a new slave..
Tue Dec 26 18:47:49 2017 - [info] Resetting slave 192.168.137.141(192.168.137.141:3389) and starting replication from the new master 192.168.137.142(192.168.137.142:3389)..
Tue Dec 26 18:47:49 2017 - [info] Executed CHANGE MASTER.
Tue Dec 26 18:47:49 2017 - [info] Slave started.
Tue Dec 26 18:47:49 2017 - [info] All new slave servers switched successfully.
Tue Dec 26 18:47:49 2017 - [info]
Tue Dec 26 18:47:49 2017 - [info] * Phase 5: New master cleanup phase..
Tue Dec 26 18:47:49 2017 - [info]
Tue Dec 26 18:47:49 2017 - [info] 192.168.137.142: Resetting slave info succeeded.
Tue Dec 26 18:47:49 2017 - [info] Switching master to 192.168.137.142(192.168.137.142:3389) completed successfully.

原来的主库查询数据同步状态
mysql> show slave status \G
*************************** 1. row ***************************
Slave_IO_State: Waiting for master to send event
Master_Host: 192.168.137.142
Master_User: replica
Master_Port: 3389
Connect_Retry: 60
Master_Log_File: 1371423389-bin.000001
Read_Master_Log_Pos: 458
Relay_Log_File: 1371413389-relay-bin.000002
Relay_Log_Pos: 581
Relay_Master_Log_File: 1371423389-bin.000001
Slave_IO_Running: Yes
Slave_SQL_Running: Yes
Replicate_Do_DB:
Replicate_Ignore_DB:
Replicate_Do_Table:
Replicate_Ignore_Table:
Replicate_Wild_Do_Table:
Replicate_Wild_Ignore_Table:
Last_Errno: 0
Last_Error:
Skip_Counter: 0
Exec_Master_Log_Pos: 458
Relay_Log_Space: 793
Until_Condition: None
Until_Log_File:
Until_Log_Pos: 0
Master_SSL_Allowed: No
Master_SSL_CA_File:
Master_SSL_CA_Path:
Master_SSL_Cert:
Master_SSL_Cipher:
Master_SSL_Key:
Seconds_Behind_Master: 0
Master_SSL_Verify_Server_Cert: No
Last_IO_Errno: 0
Last_IO_Error:
Last_SQL_Errno: 0
Last_SQL_Error:
Replicate_Ignore_Server_Ids:
Master_Server_Id: 1371423389
Master_UUID: 13f17e7a-06ba-11e8-bb85-000c290905ad
Master_Info_File: mysql.slave_master_info
SQL_Delay: 0
SQL_Remaining_Delay: NULL
Slave_SQL_Running_State: Slave has read all relay log; waiting for more updates
Master_Retry_Count: 86400
Master_Bind:
Last_IO_Error_Timestamp:
Last_SQL_Error_Timestamp:
Master_SSL_Crl:
Master_SSL_Crlpath:
Retrieved_Gtid_Set: 13f17e7a-06ba-11e8-bb85-000c290905ad:1
Executed_Gtid_Set: 13f17e7a-06ba-11e8-bb85-000c290905ad:1,
c72c9819-e9cb-11e7-8797-000c295345bc:1-5
Auto_Position: 1
Replicate_Rewrite_DB:
Channel_Name:
Master_TLS_Version:
1 row in set (0.00 sec)

[mysql@mha-manager 3389]$ ps -ef | grep mysql
root 23495 2327 0 13:04 pts/0 00:00:00 su - mysql
mysql 23496 23495 0 13:04 pts/0 00:00:00 -bash
root 27308 23048 0 16:05 pts/1 00:00:00 su - mysql
mysql 27309 27308 0 16:05 pts/1 00:00:00 -bash
mysql 27416 23496 0 16:29 pts/0 00:00:00 mysqlbinlog -R --host=192.168.137.141 -P3389 --user=mha --password=x xxxxxx --raw --stop-never 1371413389-bin.000002
mysql 30557 23496 0 19:02 pts/0 00:00:00 ps -ef
mysql 30558 23496 0 19:02 pts/0 00:00:00 grep mysql
[mysql@mha-manager 3389]$ kill -9 27416

重新把MHA启动

cd /data1/mha/binlog/3389
nohup mysqlbinlog -R --host=192.168.137.142 -P3389 --user=mha --password=mysqlDBA --raw --stop-never 1371423389-bin.000001 &
1371423389-bin.000001

[mysql@mha-manager 3389]$ nohup masterha_manager --conf=/etc/mha/app3389.cnf --ignore_last_failover &

[mysql@mha-manager 3389]$ masterha_check_status --conf=/etc/mha/app3389.cnf
app3389 (pid:30636) is running(0:PING_OK), master:192.168.137.142

mysqladmin -S /data1/db3389/my3389.sock shutdown 原来的备库down 并发现日志变化

[root@node1-slave1 db3389]# mysqld_safe --defaults-file=/etc/mysql/my3389.cnf &

mysql> change master to master_host='192.168.137.141',master_port=3389,master_user='replica',master_password='mycatDBA',master_auto_position=1;
Query OK, 0 rows affected, 2 warnings (0.13 sec)

mysql> start slave;
Query OK, 0 rows affected (0.01 sec)

mysql> show slave status \G

nohup mysqlbinlog -R --host=192.168.137.141 -P3389 --user=mha --password=mysqlDBA --raw --stop-never 1371413389-bin.000002 &

[mysql@mha-manager 3389]$ nohup masterha_manager --conf=/etc/mha/app3389.cnf --ignore_last_failover &

更多相关文章

随机推荐