12.8 使用连接补全或识别列表的缺口
mysql> select * from mail;
+---------------------+---------+---------+---------+---------+---------+
| t | srcuser | srchost | dstuser | dsthost | size |
+---------------------+---------+---------+---------+---------+---------+
| 2006-05-11 10:15:08 | barb | saturn | tricia | mars | 58274 |
| 2006-05-12 12:48:13 | tricia | mars | gene | venus | 194925 |
| 2006-05-12 15:02:49 | phil | mars | phil | saturn | 1048 |
| 2006-05-13 13:59:18 | barb | saturn | tricia | venus | 271 |
| 2006-05-14 09:31:37 | gene | venus | barb | mars | 2291 |
| 2006-05-14 11:52:17 | phil | mars | tricia | saturn | 5781 |
| 2006-05-14 14:42:21 | barb | venus | barb | venus | 98151 |
| 2006-05-14 17:03:01 | tricia | saturn | phil | venus | 2394482 |
| 2006-05-15 07:17:48 | gene | mars | gene | saturn | 3824 |
| 2006-05-15 08:50:57 | phil | venus | phil | venus | 978 |
| 2006-05-15 10:25:52 | gene | mars | tricia | saturn | 998532 |
| 2006-05-15 17:35:31 | gene | saturn | gene | mars | 3856 |
| 2006-05-16 09:00:28 | gene | venus | barb | mars | 613 |
| 2006-05-16 23:04:19 | phil | venus | barb | venus | 10294 |
| 2006-05-17 12:49:23 | phil | mars | tricia | saturn | 873 |
| 2006-05-19 22:21:51 | gene | saturn | gene | venus | 23992 |
+---------------------+---------+---------+---------+---------+---------+
16 rows in set (0.01 sec)
mysql> select hour(t) as hour, count(hour(t)) as count
-> from mail group by hour;
+------+-------+
| hour | count |
+------+-------+
| 10 | 2 |
| 12 | 2 |
| 15 | 1 |
| 13 | 1 |
| 9 | 2 |
| 11 | 1 |
| 14 | 1 |
| 17 | 2 |
| 7 | 1 |
| 8 | 1 |
| 23 | 1 |
| 22 | 1 |
+------+-------+
12 rows in set (0.02 sec)
mysql> create table ref (h int);
Query OK, 0 rows affected (0.17 sec)
mysql> insert into ref (h)
-> values (0), (1), (2), (3), (4), (5), (6), (7), (8), (9), (10)
-> , (11), (12), (13), (14), (15), (16), (17), (18), (19), (20), (21), (22), (23);
Query OK, 24 rows affected (0.03 sec)
Records: 24 Duplicates: 0 Warnings: 0
mysql> select ref.h as hour, count(mail.t) as count\
-> from ref left join mail on ref.h = hour(mail.t)
-> group by hour;
+------+-------+
| hour | count |
+------+-------+
| 0 | 0 |
| 1 | 0 |
| 2 | 0 |
| 3 | 0 |
| 4 | 0 |
| 5 | 0 |
| 6 | 0 |
| 7 | 1 |
| 8 | 1 |
| 9 | 2 |
| 10 | 2 |
| 11 | 1 |
| 12 | 2 |
| 13 | 1 |
| 14 | 1 |
| 15 | 1 |
| 16 | 0 |
| 17 | 2 |
| 18 | 0 |
| 19 | 0 |
| 20 | 0 |
| 21 | 0 |
| 22 | 1 |
| 23 | 1 |
+------+-------+
24 rows in set (0.01 sec)
mysql> select ref.h as hour
-> from ref left join mail on ref.h = hour(mail.t)
-> where mail.t is null;
+------+
| hour |
+------+
| 0 |
| 1 |
| 2 |
| 3 |
| 4 |
| 5 |
| 6 |
| 16 |
| 18 |
| 19 |
| 20 |
| 21 |
+------+
12 rows in set (0.01 sec)
mysql> select * from driver_log order by rec_id;
+--------+-------+------------+-------+
| rec_id | name | trav_date | miles |
+--------+-------+------------+-------+
| 1 | Ben | 2006-08-30 | 152 |
| 2 | Suzi | 2006-08-29 | 391 |
| 3 | Henry | 2006-08-29 | 300 |
| 4 | Henry | 2006-08-27 | 96 |
| 5 | Ben | 2006-08-29 | 131 |
| 6 | Henry | 2006-08-26 | 115 |
| 7 | Suzi | 2006-09-02 | 502 |
| 8 | Henry | 2006-09-01 | 197 |
| 9 | Ben | 2006-09-02 | 79 |
| 10 | Henry | 2006-08-30 | 203 |
+--------+-------+------------+-------+
10 rows in set (0.01 sec)
mysql> select * from driver_log order by rec_id;
+--------+-------+------------+-------+
| rec_id | name | trav_date | miles |
+--------+-------+------------+-------+
| 1 | Ben | 2006-08-30 | 152 |
| 2 | Suzi | 2006-08-29 | 391 |
| 3 | Henry | 2006-08-29 | 300 |
| 4 | Henry | 2006-08-27 | 96 |
| 5 | Ben | 2006-08-29 | 131 |
| 6 | Henry | 2006-08-26 | 115 |
| 7 | Suzi | 2006-09-02 | 502 |
| 8 | Henry | 2006-09-01 | 197 |
| 9 | Ben | 2006-09-02 | 79 |
| 10 | Henry | 2006-08-30 | 203 |
+--------+-------+------------+-------+
10 rows in set (0.01 sec)
mysql> select trav_date, count(trav_date) as drivers
-> from driver_log group by trav_date;
+------------+---------+
| trav_date | drivers |
+------------+---------+
| 2006-08-30 | 2 |
| 2006-08-29 | 3 |
| 2006-08-27 | 1 |
| 2006-08-26 | 1 |
| 2006-09-02 | 2 |
| 2006-09-01 | 1 |
+------------+---------+
6 rows in set (0.00 sec)
mysql> WITH date_range AS (
-> SELECT DISTINCT trav_date AS d FROM driver_log
-> UNION SELECT '2006-08-26' UNION SELECT '2006-08-27'
-> UNION SELECT '2006-08-29' UNION SELECT '2006-08-30'
-> UNION SELECT '2006-09-01' UNION SELECT '2006-09-02'
-> )
-> SELECT d, COUNT(driver_log.trav_date) AS drivers
-> FROM date_range LEFT JOIN driver_log ON d = driver_log.trav_date
-> GROUP BY d;
+------------+---------+
| d | drivers |
+------------+---------+
| 2006-08-30 | 2 |
| 2006-08-29 | 3 |
| 2006-08-27 | 1 |
| 2006-08-26 | 1 |
| 2006-09-02 | 2 |
| 2006-09-01 | 1 |
+------------+---------+
6 rows in set (0.01 sec)
mysql> -- 1. 删除现有表(如果不需要数据)
mysql> DROP TABLE IF EXISTS ref;
Query OK, 0 rows affected (0.05 sec)
mysql>
mysql> -- 2. 重新创建表
mysql> CREATE TABLE ref (d DATE);
Query OK, 0 rows affected (0.05 sec)
mysql>
mysql> -- 3. 插入数据
mysql> INSERT INTO ref (d) VALUES
-> ('2006-08-26'), ('2006-08-27'), ('2006-08-29'),
-> ('2006-08-30'), ('2006-09-01'), ('2006-09-02');
Query OK, 6 rows affected (0.01 sec)
Records: 6 Duplicates: 0 Warnings: 0
mysql>
mysql> -- 4. 运行查询
mysql> SELECT ref.d
-> FROM ref
-> LEFT JOIN driver_log ON ref.d = driver_log.trav_date
-> WHERE driver_log.trav_date IS NULL
-> ORDER BY d;
Empty set (0.01 sec)
mysql> SELECT
-> ref.d AS ref_date,
-> driver_log.trav_date AS log_date,
-> CASE WHEN driver_log.trav_date IS NULL THEN 'Missing' ELSE 'Present' END AS status
-> FROM ref
-> LEFT JOIN driver_log ON ref.d = driver_log.trav_date
-> ORDER BY ref.d;
+------------+------------+---------+
| ref_date | log_date | status |
+------------+------------+---------+
| 2006-08-26 | 2006-08-26 | Present |
| 2006-08-27 | 2006-08-27 | Present |
| 2006-08-29 | 2006-08-29 | Present |
| 2006-08-29 | 2006-08-29 | Present |
| 2006-08-29 | 2006-08-29 | Present |
| 2006-08-30 | 2006-08-30 | Present |
| 2006-08-30 | 2006-08-30 | Present |
| 2006-09-01 | 2006-09-01 | Present |
| 2006-09-02 | 2006-09-02 | Present |
| 2006-09-02 | 2006-09-02 | Present |
+------------+------------+---------+
10 rows in set (0.00 sec)
mysql> -- 添加更多日期到ref表
mysql> INSERT INTO ref (d) VALUES
-> ('2006-08-28'), ('2006-08-31'), ('2006-09-03');
Query OK, 3 rows affected (0.01 sec)
Records: 3 Duplicates: 0 Warnings: 0
mysql>
mysql> -- 再次运行查询
mysql> SELECT ref.d
-> FROM ref
-> LEFT JOIN driver_log ON ref.d = driver_log.trav_date
-> WHERE driver_log.trav_date IS NULL
-> ORDER BY d;
+------------+
| d |
+------------+
| 2006-08-28 |
| 2006-08-31 |
| 2006-09-03 |
+------------+
3 rows in set (0.00 sec)
use strict;
use warnings;
use DBI;
# 检查参数
@ARGV == 5 or die "Usage: perl make_date_list.pl <db_name> <table_name> <column_name> <min_date> <max_date>\n";
my ($db_name, $tb1_name, $col_name, $min_date, $max_date) = @ARGV;
# 检查日期格式(简单示例)
unless ($min_date =~ /^\d{4}-\d{2}-\d{2}$/ && $max_date =~ /^\d{4}-\d{2}-\d{2}$/) {
die "Error: Dates must be in YYYY-MM-DD format.\n";
}
# 先连接到MySQL服务器(不指定数据库)
my $dbh = DBI->connect("DBI:mysql:host=localhost;mysql_client_found_rows=1;mysql_enable_utf8=1;mysql_auth=mysql_native_password", "root", "root")
or die "Could not connect to MySQL server: $DBI::errstr";
# 检查数据库是否存在,如果不存在则创建
my $db_exists = $dbh->selectrow_array("SELECT SCHEMA_NAME FROM INFORMATION_SCHEMA.SCHEMATA WHERE SCHEMA_NAME = ?", undef, $db_name);
if (!$db_exists) {
print "Creating database '$db_name'...\n";
$dbh->do("CREATE DATABASE $db_name") or die "Failed to create database: $DBI::errstr";
}
# 连接到指定数据库
$dbh->disconnect;
$dbh = DBI->connect("DBI:mysql:database=$db_name;host=localhost;mysql_client_found_rows=1;mysql_enable_utf8=1;mysql_auth=mysql_native_password", "root", "root")
or die "Could not connect to database: $DBI::errstr";
# 计算天数
my $days = $dbh->selectrow_array(qq{ SELECT DATEDIFF(?, ?) + 1 }, undef, $max_date, $min_date);
print "Date range: $min_date to $max_date ($days days)\n";
die "Error: Date range is invalid (days < 1)\n" if $days < 1;
# 创建表
$dbh->do("DROP TABLE IF EXISTS $tb1_name"); # 注意这里不再需要数据库名前缀
$dbh->do(qq{
CREATE TABLE $tb1_name (
$col_name DATE NOT NULL,
PRIMARY KEY ($col_name)
)
});
# 插入每一天
my $sth = $dbh->prepare(qq{
INSERT INTO $tb1_name ($col_name) VALUES(? + INTERVAL ? DAY)
});
foreach my $i (0 .. $days-1) {
$sth->execute($min_date, $i);
}
print "Success: Table '$tb1_name' created with $days dates.\n";
$dbh->disconnect;
C:\Users\lenovo>perl make_date_list.pl test_db my_dates day_date “2024-01-01” “2024-01-03”
C:\Users\lenovo>mysql -u root -p
Enter password: ****
Welcome to the MySQL monitor. Commands end with ; or \g.
Your MySQL connection id is 20
Server version: 8.0.40 MySQL Community Server - GPL
Copyright (c) 2000, 2024, Oracle and/or its affiliates.
Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.
Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.
mysql> USE test_db;
Database changed
mysql> SHOW TABLES;
+-------------------+
| Tables_in_test_db |
+-------------------+
| my_dates |
+-------------------+
1 row in set (0.02 sec)
mysql> DESCRIBE my_dates;
+----------+------+------+-----+---------+-------+
| Field | Type | Null | Key | Default | Extra |
+----------+------+------+-----+---------+-------+
| day_date | date | NO | PRI | NULL | |
+----------+------+------+-----+---------+-------+
1 row in set (0.01 sec)
mysql> SELECT * FROM my_dates;
+------------+
| day_date |
+------------+
| 2024-01-01 |
| 2024-01-02 |
| 2024-01-03 |
+------------+
3 rows in set (0.00 sec)
12.9 计算连续行的差值
mysql> select seq, city, miles from trip_log order by seq;
+-----+------------------+-------+
| seq | city | miles |
+-----+------------------+-------+
| 1 | San Antonio, TX | 0 |
| 2 | Dallas, TX | 263 |
| 3 | Benton, AR | 566 |
| 4 | Memphis, TN | 745 |
| 5 | Portageville, MO | 878 |
| 6 | Champaign, IL | 1164 |
| 7 | Madison, WI | 1412 |
+-----+------------------+-------+
7 rows in set (0.02 sec)
mysql> select t1.seq as seq1, t2.seq as seq2,
-> t1.city as city1, t2.city as city2,
-> t1.miles as miles1, t2.miles as miles2,
-> t2.miles - t1.miles as dist
-> from trip_log as t1 inner join trip_log as t2
-> on t1.seq+1 = t2.seq
-> order by t1.seq;
+------+------+------------------+------------------+--------+--------+------+
| seq1 | seq2 | city1 | city2 | miles1 | miles2 | dist |
+------+------+------------------+------------------+--------+--------+------+
| 1 | 2 | San Antonio, TX | Dallas, TX | 0 | 263 | 263 |
| 2 | 3 | Dallas, TX | Benton, AR | 263 | 566 | 303 |
| 3 | 4 | Benton, AR | Memphis, TN | 566 | 745 | 179 |
| 4 | 5 | Memphis, TN | Portageville, MO | 745 | 878 | 133 |
| 5 | 6 | Portageville, MO | Champaign, IL | 878 | 1164 | 286 |
| 6 | 7 | Champaign, IL | Madison, WI | 1164 | 1412 | 248 |
+------+------+------------------+------------------+--------+--------+------+
6 rows in set (0.00 sec)
mysql> select id,date,ab, h, truncate(ifnull(h/ab, 0), 3) as ba
-> from player_stats order by id;
+----+------------+-----+----+-------+
| id | date | ab | h | ba |
+----+------------+-----+----+-------+
| 1 | 2013-04-30 | 0 | 0 | 0.000 |
| 2 | 2013-05-31 | 38 | 13 | 0.342 |
| 3 | 2013-06-30 | 109 | 31 | 0.284 |
| 4 | 2013-07-31 | 196 | 49 | 0.250 |
| 5 | 2013-08-31 | 304 | 98 | 0.322 |
+----+------------+-----+----+-------+
5 rows in set, 1 warning (0.01 sec)
mysql> select
-> t1.id as id1, t2.id as id2
-> , t2.date,
-> t1.ab as ab1, t2.ab as ab2,
-> t1.h as h1, t2.h as h2,
-> t2.ab - t1.ab as abdiff,
-> t2.h - t1.h as hdiff,
-> truncate(ifnull((t2.h - t1.h)/ (t2.ab - t1.ab), 0), 3) as ba
-> from player_stats as t1 inner join player_stats as t2
-> on t1.id+1 = t2.id
-> order by t1.id;
+-----+-----+------------+-----+-----+----+----+--------+-------+-------+
| id1 | id2 | date | ab1 | ab2 | h1 | h2 | abdiff | hdiff | ba |
+-----+-----+------------+-----+-----+----+----+--------+-------+-------+
| 1 | 2 | 2013-05-31 | 0 | 38 | 0 | 13 | 38 | 13 | 0.342 |
| 2 | 3 | 2013-06-30 | 38 | 109 | 13 | 31 | 71 | 18 | 0.253 |
| 3 | 4 | 2013-07-31 | 109 | 196 | 31 | 49 | 87 | 18 | 0.206 |
| 4 | 5 | 2013-08-31 | 196 | 304 | 49 | 98 | 108 | 49 | 0.453 |
+-----+-----+------------+-----+-----+----+----+--------+-------+-------+
4 rows in set (0.00 sec)
12.10 发现累积和与动态均值
mysql> select date, precip from rainfall order by date;
+------------+--------+
| date | precip |
+------------+--------+
| 2014-06-01 | 1.50 |
| 2014-06-02 | 0.00 |
| 2014-06-03 | 0.50 |
| 2014-06-04 | 0.00 |
| 2014-06-05 | 1.00 |
+------------+--------+
5 rows in set (0.01 sec)
mysql> select sum(precip) from rainfall where date <= '2014-06-01';
+-------------+
| sum(precip) |
+-------------+
| 1.50 |
+-------------+
1 row in set (0.01 sec)
mysql> select t1.date, t1.precip as 'daily precip',
-> sum(t2.precip) as 'cum.precip'
-> from rainfall as t1 inner join rainfall as t2
-> on t1.date >= t2.date
-> group by t1.date;
+------------+--------------+------------+
| date | daily precip | cum.precip |
+------------+--------------+------------+
| 2014-06-01 | 1.50 | 1.50 |
| 2014-06-02 | 0.00 | 1.50 |
| 2014-06-03 | 0.50 | 2.00 |
| 2014-06-04 | 0.00 | 2.00 |
| 2014-06-05 | 1.00 | 3.00 |
+------------+--------------+------------+
5 rows in set (0.00 sec)
mysql> select t1.date, t1.precip as 'daily precip',
-> sum(t2.precip) as 'cum.precip',
-> count(t2.precip) as 'days elapsed',
-> avg(t2.precip) as 'avg.precip'
-> from rainfall as t1 inner join rainfall as t2
-> on t1.date >= t2.date
-> group by t1.date;
+------------+--------------+------------+--------------+------------+
| date | daily precip | cum.precip | days elapsed | avg.precip |
+------------+--------------+------------+--------------+------------+
| 2014-06-01 | 1.50 | 1.50 | 1 | 1.500000 |
| 2014-06-02 | 0.00 | 1.50 | 2 | 0.750000 |
| 2014-06-03 | 0.50 | 2.00 | 3 | 0.666667 |
| 2014-06-04 | 0.00 | 2.00 | 4 | 0.500000 |
| 2014-06-05 | 1.00 | 3.00 | 5 | 0.600000 |
+------------+--------------+------------+--------------+------------+
5 rows in set (0.00 sec)
mysql> delete from rainfall where precip = 0;
Query OK, 2 rows affected (0.01 sec)
mysql> select date, precip from rainfall order by date;
+------------+--------+
| date | precip |
+------------+--------+
| 2014-06-01 | 1.50 |
| 2014-06-03 | 0.50 |
| 2014-06-05 | 1.00 |
+------------+--------+
3 rows in set (0.00 sec)
mysql> select t1.date, t1.precip as 'daily precip',
-> sum(t2.precip) as 'cum.precip',
-> count(t2.precip) as 'days elapsed',
-> avg(t2.precip) as 'avg.precip'
-> from rainfall as t1 inner join rainfall as t2
-> on t1.date >= t2.date
-> group by t1.date;
+------------+--------------+------------+--------------+------------+
| date | daily precip | cum.precip | days elapsed | avg.precip |
+------------+--------------+------------+--------------+------------+
| 2014-06-01 | 1.50 | 1.50 | 1 | 1.500000 |
| 2014-06-03 | 0.50 | 2.00 | 2 | 1.000000 |
| 2014-06-05 | 1.00 | 3.00 | 3 | 1.000000 |
+------------+--------------+------------+--------------+------------+
3 rows in set (0.00 sec)
mysql> select t1.date, t1.precip as 'daily precip',
-> sum(t2.precip) as 'cum.precip',
-> datediff(max(t2.date), min(t2.date))+ 1 as 'days elapsed',
-> sum(t2.precip) / (datediff(max(t2.date), min(t2.date))+ 1)
-> as 'avg.precip'
-> from rainfall as t1 inner join rainfall as t2
-> on t1.date >= t2.date
-> group by t1.date;
+------------+--------------+------------+--------------+------------+
| date | daily precip | cum.precip | days elapsed | avg.precip |
+------------+--------------+------------+--------------+------------+
| 2014-06-01 | 1.50 | 1.50 | 1 | 1.500000 |
| 2014-06-03 | 0.50 | 2.00 | 3 | 0.666667 |
| 2014-06-05 | 1.00 | 3.00 | 5 | 0.600000 |
+------------+--------------+------------+--------------+------------+
3 rows in set (0.00 sec)
mysql> select stage, km, t from marathon order by stage;
+-------+----+----------+
| stage | km | t |
+-------+----+----------+
| 1 | 5 | 00:15:00 |
| 2 | 7 | 00:19:30 |
| 3 | 9 | 00:29:20 |
| 4 | 5 | 00:17:50 |
+-------+----+----------+
4 rows in set (0.01 sec)
mysql> select t1.stage, t1.km, sum(t2.km) as 'cum.km'
-> from marathon as t1 inner join marathon as t2
-> on t1.stage >= t2.stage
-> group by t1.stage;
+-------+----+--------+
| stage | km | cum.km |
+-------+----+--------+
| 1 | 5 | 5 |
| 2 | 7 | 12 |
| 3 | 9 | 21 |
| 4 | 5 | 26 |
+-------+----+--------+
4 rows in set (0.00 sec)
mysql> select t1.stage, t1.km, t1.t
-> , sum(t2.km) as 'cum.km',
-> sec_to_time(sum(time_to_sec(t2.t))) as 'cum.t',
-> sum(t2.km) / (sum(time_to_sec(t2.t))/(60*60)) as 'avg.km/hour'
-> from marathon as t1 inner join marathon as t2
-> on t1.stage >= t2.stage
-> group by t1.stage;
+-------+----+----------+--------+----------+-------------+
| stage | km | t | cum.km | cum.t | avg.km/hour |
+-------+----+----------+--------+----------+-------------+
| 1 | 5 | 00:15:00 | 5 | 00:15:00 | 20.0000 |
| 2 | 7 | 00:19:30 | 12 | 00:34:30 | 20.8696 |
| 3 | 9 | 00:29:20 | 21 | 01:03:50 | 19.7389 |
| 4 | 5 | 00:17:50 | 26 | 01:21:40 | 19.1020 |
+-------+----+----------+--------+----------+-------------+
4 rows in set (0.00 sec)