#!/bin/env python
import datetime
import subprocess
import time
def __do_archiver(file_name, where_condition):
file = f'/tmp/{file_name}.txt'
subprocess.call(["/usr/bin/touch", file])
command = [
"/root/percona-toolkit-3.7.0/bin/pt-archiver",
"--source",
"t=api_third_receive_record,i=idx_create_time",
"--database=db0",
"--password=9527",
"--user=root",
"--port=3306",
"--host=192.168.10.11",
f'--file={file}',
f"--where={where_condition}",
"--charset=utf8",
"--no-version-check",
"--no-check-charset",
"--progress=500",
"--limit=500",
"--txn-size=500",
"--statistics",
"--no-safe-auto-increment",
"--bulk-delete",
"--purge"
]
print('Command', command)
subprocess.call(command)
def do_archiver(start_date, end_date):
date_format = '%Y-%m-%d'
file_format = '%Y_%m_%d'
start_date_time = datetime.datetime.strptime(start_date, date_format)
end_date_time = datetime.datetime.strptime(end_date, date_format)
while start_date_time <= end_date_time:
file_name = start_date_time.strftime(file_format)
prev_date_time = start_date_time + datetime.timedelta(days=-1)
next_date_time = start_date_time + datetime.timedelta(days=1)
print('Executing', start_date_time.strftime(date_format))
where_condition = f'create_time>="{start_date_time.strftime(date_format)} 00:00:00" AND create_time<="{start_date_time.strftime(date_format)} 23:59:59"'
__do_archiver(file_name, where_condition)
time.sleep(5)
start_date_time = next_date_time
if __name__ == '__main__':
do_archiver("2022-11-21", "2022-11-21")
使用 pt-archiver 命令, 按天将 api_third_receive_record 表数据归档到文件里, 并删除表数据. WHERE 条件是按照创建时间筛选, 并使用 idx_create_time 索引.
版本 percona-toolkit-3.7.0
在实际执行时, 会出现表数据已经归档到文件里了, 但是表里还有少许数据没有被删除掉.
通过查询通用日志, 查看到执行的部分SQL语句
SELECT * FROM `db0`.`api_third_receive_record` FORCE INDEX(`idx_create_time`) WHERE (create_time>="2022-11-21 00:00:00" AND create_time<="2022-11-21 23:59:59") ORDER BY `create_time` LIMIT 500
DELETE FROM `db0`.`api_third_receive_record` WHERE (((`create_time` >= '2022-11-21 00:03:19.000'))) AND (((`create_time` <= '2022-11-21 15:27:53.000'))) AND (create_time>="2022-11-21 00:00:00" AND create_time<="2022-11-21 23:59:59") LIMIT 500
SELECT * FROM `db0`.`api_third_receive_record` FORCE INDEX(`idx_create_time`) WHERE (create_time>="2022-11-21 00:00:00" AND create_time<="2022-11-21 23:59:59") AND ((`create_time` >= '2022-11-21 15:27:53.000')) ORDER BY `create_time` LIMIT 500
DELETE FROM `db0`.`api_third_receive_record` WHERE (((`create_time` >= '2022-11-21 15:28:20.000'))) AND (((`create_time` <= '2022-11-21 23:56:28.000'))) AND (create_time>="2022-11-21 00:00:00" AND create_time<="2022-11-21 23:59:59") LIMIT 500
SELECT 查询时是使用索引并排序, 但DELETE 时没有使用索引
修改 pt-archiver 脚本文件, 在删除的逻辑里新增 ORDER BY $index_cols 内容
再次执行归档动作, DELETE 语句使用ORDER BY 排序, 数据也和期望一样.
DELETE FROM `db0`.`api_third_receive_record` WHERE (((`create_time` >= '2022-11-21 00:03:19.000'))) AND (((`create_time` <= '2022-11-21 15:27:53.000'))) AND (create_time>="2022-11-21 00:00:00" AND create_time<="2022-11-21 23:59:59") ORDER BY `create_time` LIMIT 500
执行计划, 也使用了 idx_create_time 索引