1.MHA高可用架构原理
(1)MHA 支持一主多从的架构,要搭建MHA,要求一个复制集群必须最少有3台数据库服务器,一主二从,即一台充当master,一台充当备用masetr,一台充当从库;
(2)MHA Node运行在每台MYSQL服务器上;
(3)MHA manager会定时探测集群中的master节点;
(4)当master出现故障时,他看可以自动将最新数据的从slave提升为新的master;然后将所有其他的从slave重新指向新的master,VIP自动漂移到新的master;整个故障转移过程对应用程序完全透明。
2.实现MHA高可用架构
2.1环境说明
主机名 | ip地址 | 操作系统版本 | 说明 |
mha-manager | 192.168.31.220 | rhel7.9 | manager控制器 |
mysql-master | 192.168.31.221 | rhel7.9/mysql8.0.40 | 数据库主服务器 |
mysql-rep1 | 192.168.31.222 | rhel7.9/mysql8.0.40 | 数据库从服务器 |
mysql-rep2 | 192.168.31.223 | rhel7.9/mysql8.0.40 | 数据库从服务器 |
2.2准备工作
2.2.1初始化主节点master的配置
[root@mysql-master]# vim /etc/my.cnf #编辑配置文件
[mysqld]
datadir=/data/mysql
socket=/data/mysql/mysql.sock
server_id=221 #集群中的各节点的id必须唯一
gtid-mode=on #启用gtid类型
enforce-gtid-consistency=true #强制GTID一致性
log-bin=binlog #开启二进制日志
relay-log=relay-log #开启中继日志
relay_log_purge=0 #是否自动清空不再需要的中继日志
log-slave-updates=true #slave更新的信息是否记入二进制日志中
[root@mysql-master]# /etc/init.d/mysqld start #启动mysql
2.2.2初始化从节点rep1和rep2的配置
#定位rep1
[root@mysql-rep1]# vim /etc/my.cnf
[mysqld]
datadir=/data/mysql
socket=/data/mysql/mysql.sock
server_id=222
gtid-mode=on
enforce-gtid-consistency=true
log-bin=binlog
relay-log=relay-log
relay_log_purge=0
log-slave-updates=true
[root@rep1 mysql]# /etc/init.d/mysqld start
#定位rep2
[root@mysql-rep2~]# cat /etc/my.cnf
[mysqld]
datadir=/data/mysql
socket=/data/mysql/mysql.sock
server_id=223
gtid-mode=on
enforce-gtid-consistency=true
log-bin=mysql-bin
relay-log=relay-log
relay_log_purge=0
log-slave-updates=true
[root@rep2 mysql]# /etc/init.d/mysqld start
2.2.3配置一主多层复制架构
#定位master
[root@mysql-master]# mysql -uroot -p #进入mysql
mysql> create user 'rep'@'%' identified WITH mysql_native_password by '123'; #创建用于共享的rep账户,MHA只支持mysql_native_password密码认证插件
mysql> grant replication slave on *.* to 'rep'@'%'; #给rep账户赋复制的权限
#定位rep1
[root@mysql-rep1]# mysql -uroot -p
mysql> CHANGE REPLICATION SOURCE TO #复制同步master
SOURCE_HOST='192.168.31.221',
SOURCE_USER='rep',
SOURCE_PASSWORD='123',
master_auto_position=1,
SOURCE_SSL=1;
mysql> start replica; #开启同步
mysql> show replica status \G #查看同步状态
Replica_IO_Running: Yes
Replica_SQL_Running: Yes
#定位rep2
[root@mysql-rep2]# mysql -uroot -p
mysql> CHANGE REPLICATION SOURCE TO
SOURCE_HOST='192.168.31.221',
SOURCE_USER='rep',
SOURCE_PASSWORD='123',
master_auto_position=1,
SOURCE_SSL=1;
mysql> start replica;
mysql> show replica status \G
Replica_IO_Running: Yes
Replica_SQL_Running: Yes
2.3配置MHA
#定位mha-manager
[root@manager ~]# wget https://github.com/yoshinorim/mha4mysql-node/releases/download/v0.58/mha4mysql-node-0.58-0.el7.centos.noarch.rpm #mha-manager上安装软件包,如果使用上面的命令下载不下来可通过xftp将包移至/root目录下
[root@mha-manager ~]# ls MHA-7.zip
MHA-7.zip
[root@mha-manager ~]# yum install unzip -y
[root@mha-manager ~]# unzip MHA-7.zip
[root@mha-manager ~]# cd MHA-7/
[root@mha-manager MHA-7]# ls
mha4mysql-manager-0.58-0.el7.centos.noarch.rpm
mha4mysql-node-0.58-0.el7.centos.noarch.rpm
perl-Config-Tiny-2.14-7.el7.noarch.rpm
perl-Email-Date-Format-1.002-15.el7.noarch.rpm
perl-Log-Dispatch-2.41-1.el7.1.noarch.rpm
perl-Mail-Sender-0.8.23-1.el7.noarch.rpm
perl-Mail-Sendmail-0.79-21.el7.noarch.rpm
perl-MIME-Lite-3.030-1.el7.noarch.rpm
perl-MIME-Types-1.38-2.el7.noarch.rpm
perl-Net-Telnet-3.03-19.el7.noarch.rpm
perl-Parallel-ForkManager-1.18-2.el7.noarch.rpm
[root@mha-manager MHA-7]# yum localinstall *.rpm
#所有主机配置域名解析
#定位mha-manager
[root@mha-manager MHA-7]# vim /etc/hosts
192.168.31.220 mha-manager
192.168.31.221 master
192.168.31.222 rep1
192.168.31.223 rep2
#定位mha-master
#其他三个mysql服务器上安装mha4mysql-node,复制mha4mysql-node到所有数据库服务器即可
[root@mha-manager MHA-7]# for i in {master,rep1,rep2};do scp mha4mysql-node-0.58-0.el7.centos.noarch.rpm root@$i:/root;done
#所有数据库上安装mha4mysql-node
[root@mysql-master ~]# yum localinstall mha4mysql-node-0.58-0.el7.centos.noarch.rpm
#定位master;设置可以远程访问mysql的用户,并设置管理员权限。
[root@mysql-master]# mysql -uroot -p
mysql> create user 'mhaadm'@'%' identified WITH mysql_native_password by '123';
mysql> grant all on *.* to 'mhaadm'@'%';
MHA集群中的各节点彼此之间均需要基于ssh互信通信,以实现远程控制及数据管理功能。
#所有节点生成公私钥
ssh-keygen -f /root/.ssh/id_rsa -P '' -q
#将所有节点的公钥上传至manager节点,最后将manager节点的authorized_keys分发到其他节点
[root@mysql-master ~]# ssh-copy-id root@mha-manager
[root@mysql-rep1 ~]# ssh-copy-id root@mha-manager
[root@mysql-rep2 ~]# ssh-copy-id root@mha-manager
[root@mha-manager ~]# for i in {master,rep1,rep2};do scp /root/.ssh/authorized_keys root@$i:/root/.ssh;done
#测试
[root@mha-manager ~]# for i in {master,rep1,rep2};do ssh $i hostname ;done
master
rep1
rep2
#在manager上创建配置文件目录
[root@mha-manager ~]# mkdir -p /etc/mha /var/log/mha/app1
[root@mha-manager ~]# vim /etc/mha/app1.cnf
[server default] #适用于server1,2,3个server的配置
user=mhaadm #mha管理用户
password=123 #mha管理用户密码
manager_workdir=/var/log/mha/app1 #mha的工作路径
manager_log=/var/log/mha/app1/manager.log #mha的日志文件
ssh_user=root #基于ssh的秘钥认证
repl_user=rep #主从复制的账号
repl_password=123
ping_interval=1 #ping间隔时长
[server1] #mysql主机信息
hostname=192.168.31.221
ssh_port=22
candidate_master=1 #设置该主机将来可能成为master候选节点
[server2]
hostname=192.168.31.222
ssh_port=22
candidate_master=1
[server3]
hostname=192.168.31.223
ssh_port=22
no_master=1
#测试
[root@mha-manager ~]# masterha_check_ssh --conf=/etc/mha/app1.cnf #检测各节点间ssh互信通信配置是否ok
[root@mha-manager ~]# masterha_check_repl --conf=/etc/mha/app1.cnf #检查mysql复制集群的连接配置参数是否ok
2.4MHA故障切换
#模拟master故障
[root@mysql-master]# /etc/init.d/mysqld stop
#在mha-manager中做故障切换
[root@mha-manager ~]# masterha_master_switch --master_state=dead --conf=/etc/mha/app1.cnf --dead_master_host=192.168.31.221 --dead_master_port=3306 --new_master_host=192.168.31.222 --new_master_port=3306 --ignore_last_failover #--ignore_last_failover 表示忽略在/var/log/mha/app1/目录中在切换过程中生成的锁文件
#恢复故障mysql节点
[root@mysql-mater]# /etc/init.d/mysqld start
mysql> CHANGE REPLICATION SOURCE TO #将该主机重新加入集群中
SOURCE_HOST='192.168.31.222',
SOURCE_USER='rep',
SOURCE_PASSWORD='123',
master_auto_position=1,
SOURCE_SSL=1;
Query OK, 0 rows affected, 3 warnings (0.00 sec)
mysql> start replica;
Query OK, 0 rows affected (0.01 sec)
mysql> show replica status\G
*************************** 1. row ***************************
Replica_IO_State: Waiting for source to send event
Source_Host: 192.168.31.222
Source_User: rep
Source_Port: 3306
Connect_Retry: 60
Source_Log_File: binlog.000003
Read_Source_Log_Pos: 1249
Relay_Log_File: relay-log.000003
Relay_Log_Pos: 451
Relay_Source_Log_File: binlog.000003
Replica_IO_Running: Yes
Replica_SQL_Running: Yes
#启动mha,使msyql故障后自动切换
[root@mha-manager ~]# rm -rf /var/log/mha/app1/app1.failover.complete #删除锁文件,如果不删除的话mha无法故障转移成功
[root@mha-manager ~]# masterha_manager --conf=/etc/mha/app1.cnf #启动mha。可以使用--ignore_last_failover选项选择忽略锁文件
[root@mha-manager ~]# masterha_check_status --conf=/etc/mha/app1.cnf #查看mha的状态
app1 (pid:2510) is running(0:PING_OK), master:192.168.31.222
#模拟主mysql宕机
[root@mysql-rep1 ~]# /etc/init.d/mysqld stop #故障转移完成后,mha-manager将会自动停止运行
[root@mha-manager ~]# cat /var/log/mha/app1/manager.log #查看转移日志
#删除锁文件
[root@mha-manager ~]# rm -rf /var/log/mha/app1/app1.failover.complete
[root@mysql-rep1 ~]# /etc/init.d/mysqld start #恢复故障的从
mysql> CHANGE REPLICATION SOURCE TO
SOURCE_HOST='192.168.31.221',
SOURCE_USER='rep',
SOURCE_PASSWORD='123',
master_auto_position=1,
SOURCE_SSL=1;
mysql> start replica;
2.5配置VIP
vip配置可以采用两种方式,一种通过keepalived的方式管理虚拟ip的浮动;另外一种通过脚本方式启动虚拟ip的方式 (即不需要keepalived或者heartbeat类似的软件)。为了防止脑裂发生,推荐生产环境采用脚本的方式来管理虚拟ip,而不是使用keepalived来完成。
#定位manager编写脚本
[root@mha-manager ~]# vim /usr/local/bin/master_ip_failover
#!/usr/bin/env perl
use strict;
use warnings FATAL => 'all';
use Getopt::Long;
my (
$command, $ssh_user, $orig_master_host, $orig_master_ip,
$orig_master_port, $new_master_host, $new_master_ip, $new_master_port
);
#注意此处配置的ip地址和网卡名称
my $vip = '192.168.31.88/24';
my $ssh_start_vip = "/sbin/ip a add $vip dev ens32";
my $ssh_stop_vip = "/sbin/ip a del $vip dev ens32";
GetOptions(
'command=s' => \$command,
'ssh_user=s' => \$ssh_user,
'orig_master_host=s' => \$orig_master_host,
'orig_master_ip=s' => \$orig_master_ip,
'orig_master_port=i' => \$orig_master_port,
'new_master_host=s' => \$new_master_host,
'new_master_ip=s' => \$new_master_ip,
'new_master_port=i' => \$new_master_port,
);
exit &main();
sub main {
print "\n\nIN SCRIPT TEST====$ssh_stop_vip==$ssh_start_vip===\n\n";
if ( $command eq "stop" || $command eq "stopssh" ) {
my $exit_code = 1;
eval {
print "Disabling the VIP on old master: $orig_master_host \n";
&stop_vip();
$exit_code = 0;
};
if ($@) {
warn "Got Error: $@\n";
exit $exit_code;
}
exit $exit_code;
}
elsif ( $command eq "start" ) {
my $exit_code = 10;
eval {
print "Enabling the VIP - $vip on the new master - $new_master_host
\n";
&start_vip();
$exit_code = 0;
};
if ($@) {
warn $@;
exit $exit_code;
}
exit $exit_code;
}
elsif ( $command eq "status" ) {
print "Checking the Status of the script.. OK \n";
exit 0;
}
else {
&usage();
exit 1;
}
}
sub start_vip() {
`ssh $ssh_user\@$new_master_host \" $ssh_start_vip \"`;
}
sub stop_vip() {
return 0 unless ($ssh_user);
`ssh $ssh_user\@$orig_master_host \" $ssh_stop_vip \"`;
}
sub usage {
print "Usage: master_ip_failover --command=start|stop|stopssh|status --orig_master_host=host --orig_master_ip=ip --orig_master_port=port --new_master_host=host --new_master_ip=ip --new_master_port=port\n";
}
[root@mha-manager ~]# chmod +x /usr/local/bin/master_ip_failover
#更改manager配置文件
[root@mha-manager ~]# vim /etc/mha/app1.cnf
[server default]
master_ip_failover_script=/usr/local/bin/master_ip_failover #添加此语句
#在master上手动配置第一次的VIP地址
[root@mysql-master]# ip a a 192.168.31.88/24 dev ens32
[root@mysql-master]# ip a show ens32
2: ens32: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
link/ether 00:0c:29:d6:ad:c7 brd ff:ff:ff:ff:ff:ff
inet 192.168.168.128/24 brd 192.168.168.255 scope global noprefixroute ens32
valid_lft forever preferred_lft forever
inet 192.168.31.88/24 scope global secondary ens32
#在mha-manager上启动MHA
[root@mha-manager ~]# masterha_manager --conf=/etc/mha/app1.cnf --ignore_last_failover
#在master节点关闭mysql服务模拟主节点数据崩溃。
[root@mysql-master]# /etc/init.d/mysqld stop
#在rep1上查看VIP
[root@mysql-rep1 ~]# ip a show ens32
2: ens32: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
link/ether 00:0c:29:f3:43:54 brd ff:ff:ff:ff:ff:ff
inet 192.168.31.222/24 brd 192.168.168.255 scope global noprefixroute ens32
valid_lft forever preferred_lft forever
inet 192.168.31.88/24 scope global secondary ens32
valid_lft forever preferred_lft forever
inet6 fe80::104a:bcda:d57c:4ec5/64 scope link tentative noprefixroute dadfailed