8.9 使用确定的特性选择组群
mysql> select count(*), name
-> from driver_log
-> where count(*) > 3
-> group by name;
ERROR 1111 (HY000): Invalid use of group function
mysql> select count(*), name
-> from driver_log
-> group by name;
+----------+-------+
| count(*) | name |
+----------+-------+
| 3 | Ben |
| 2 | Suzi |
| 5 | Henry |
+----------+-------+
3 rows in set (0.00 sec)
mysql> select count(*), name
-> from driver_log
-> group by name
-> having count(*) > 3;
+----------+-------+
| count(*) | name |
+----------+-------+
| 5 | Henry |
+----------+-------+
1 row in set (0.00 sec)
mysql> select count(*) as count, name
-> from driver_log
-> group by name
-> having count > 3;
+-------+-------+
| count | name |
+-------+-------+
| 5 | Henry |
+-------+-------+
1 row in set (0.00 sec)
8.10 使用计数确定数值是否唯一
mysql> select trav_date, count(trav_date)
-> from driver_log
-> group by trav_date
-> having count(trav_date) = 1;
+------------+------------------+
| trav_date | count(trav_date) |
+------------+------------------+
| 2014-07-27 | 1 |
| 2014-07-26 | 1 |
| 2014-08-01 | 1 |
+------------+------------------+
3 rows in set (0.00 sec)
mysql> select trav_date, count(trav_date)
-> from driver_log
-> group by trav_date
-> having count(trav_date) > 1;
+------------+------------------+
| trav_date | count(trav_date) |
+------------+------------------+
| 2014-07-30 | 2 |
| 2014-07-29 | 3 |
| 2014-08-02 | 2 |
+------------+------------------+
3 rows in set (0.00 sec)
mysql> select srcuser, dstuser
-> from mail
-> group by srcuser, dstuser
-> having count(*) = 1;
+---------+---------+
| srcuser | dstuser |
+---------+---------+
| tricia | gene |
| barb | barb |
| tricia | phil |
| gene | tricia |
| phil | barb |
+---------+---------+
5 rows in set (0.01 sec)
8.11 使用表达式结果分组
mysql> select char_length(name), count(*)
-> from states group by char_length(name);
+-------------------+----------+
| char_length(name) | count(*) |
+-------------------+----------+
| 6 | 5 |
| 7 | 8 |
| 8 | 12 |
| 10 | 4 |
| 11 | 2 |
| 4 | 3 |
| 5 | 3 |
| 9 | 4 |
| 13 | 3 |
| 14 | 2 |
| 12 | 4 |
+-------------------+----------+
11 rows in set (0.01 sec)
mysql> select
-> monthname(statehood) as month,
-> dayofmonth(statehood) as day,
-> count(*) as count
-> from states group by month, day having count > 1;
+----------+------+-------+
| month | day | count |
+----------+------+-------+
| February | 14 | 2 |
| June | 1 | 2 |
| November | 2 | 2 |
| March | 1 | 2 |
| May | 29 | 2 |
+----------+------+-------+
5 rows in set (0.00 sec)
8.12 分类无类别数据
mysql> select count(pop), count(distinct pop) from states;
+------------+---------------------+
| count(pop) | count(distinct pop) |
+------------+---------------------+
| 50 | 50 |
+------------+---------------------+
1 row in set (0.00 sec)
mysql> select min(pop), max(pop) from states;
+----------+----------+
| min(pop) | max(pop) |
+----------+----------+
| 578803 | 39237836 |
+----------+----------+
1 row in set (0.00 sec)
mysql> select floor(pop/5000000)as 'max population (millions)',
-> count(*) as 'number of states'
-> from states group by 'max population (millions)';
ERROR 1055 (42000): Expression #1 of SELECT list is not in GROUP BY clause and contains nonaggregated column 'cookbook.states.pop' which is not functionally dependent on columns in GROUP BY clause; this is incompatible with sql_mode=only_full_group_by
mysql> select floor(pop/5000000)as 'max population (millions)',
-> count(*) as 'number of states'
-> from states group by floor(pop/5000000);
+---------------------------+------------------+
| max population (millions) | number of states |
+---------------------------+------------------+
| 0 | 26 |
| 1 | 14 |
| 7 | 1 |
| 4 | 1 |
| 2 | 6 |
| 3 | 1 |
| 5 | 1 |
+---------------------------+------------------+
7 rows in set (0.00 sec)
mysql> SELECT
-> FLOOR(pop/5000000) AS 'max population (millions)',
-> COUNT(*) AS 'number of states'
-> FROM states
-> GROUP BY 1; -- 1表示SELECT列表中的第一个表达式
+---------------------------+------------------+
| max population (millions) | number of states |
+---------------------------+------------------+
| 0 | 26 |
| 1 | 14 |
| 7 | 1 |
| 4 | 1 |
| 2 | 6 |
| 3 | 1 |
| 5 | 1 |
+---------------------------+------------------+
7 rows in set (0.00 sec)
mysql> SELECT
-> pop_group AS 'max population (millions)',
-> COUNT(*) AS 'number of states'
-> FROM (
-> SELECT FLOOR(pop/5000000) AS pop_group
-> FROM states
-> ) t
-> GROUP BY pop_group;
+---------------------------+------------------+
| max population (millions) | number of states |
+---------------------------+------------------+
| 0 | 26 |
| 1 | 14 |
| 7 | 1 |
| 4 | 1 |
| 2 | 6 |
| 3 | 1 |
| 5 | 1 |
+---------------------------+------------------+
7 rows in set (0.00 sec)
为什么原SQL报错?
GROUP BY 'max population (millions)' 实际是按字符串字面量分组,而不是按计算值分组
由于 ONLY_FULL_GROUP_BY 模式启用,要求所有非聚合列必须出现在GROUP BY中
mysql> select
-> floor(pop / 5000000) * 5 as 'max population (millions)',
-> count(*) as 'number of states'
-> from states
-> group by 1;
+---------------------------+------------------+
| max population (millions) | number of states |
+---------------------------+------------------+
| 0 | 26 |
| 5 | 14 |
| 35 | 1 |
| 20 | 1 |
| 10 | 6 |
| 15 | 1 |
| 25 | 1 |
+---------------------------+------------------+
7 rows in set (0.00 sec)
mysql> select
-> floor((pop+4999999) / 5000000) * 5 as 'max population (millions)',
-> count(*) as 'number of states'
-> from states group by 1;
+---------------------------+------------------+
| max population (millions) | number of states |
+---------------------------+------------------+
| 5 | 26 |
| 10 | 14 |
| 40 | 1 |
| 25 | 1 |
| 15 | 6 |
| 20 | 1 |
| 30 | 1 |
+---------------------------+------------------+
7 rows in set (0.00 sec)
mysql> select floor((size+99999)/100000) as 'size (100kb)',
-> count(*) as 'number of messages'
-> from mail group by 1;
+--------------+--------------------+
| size (100kb) | number of messages |
+--------------+--------------------+
| 1 | 13 |
| 2 | 1 |
| 24 | 1 |
| 10 | 1 |
+--------------+--------------------+
4 rows in set (0.00 sec)
mysql> select floor(log10(pop)) as 'log10(population)',
-> count(*) as 'number of states'
-> from states group by 1;
+-------------------+------------------+
| log10(population) | number of states |
+-------------------+------------------+
| 5 | 5 |
| 6 | 35 |
| 7 | 10 |
+-------------------+------------------+
3 rows in set (0.00 sec)
8.13 控制摘要显示顺序
mysql> select name, count(*) as days, sum(miles) as mileage
-> from driver_log group by name;
+-------+------+---------+
| name | days | mileage |
+-------+------+---------+
| Ben | 3 | 362 |
| Suzi | 2 | 893 |
| Henry | 5 | 911 |
+-------+------+---------+
3 rows in set (0.00 sec)
mysql> select name, count(*) as days, sum(miles) as mileage
-> from driver_log group by name order by mileage desc;
+-------+------+---------+
| name | days | mileage |
+-------+------+---------+
| Henry | 5 | 911 |
| Suzi | 2 | 893 |
| Ben | 3 | 362 |
+-------+------+---------+
3 rows in set (0.00 sec)
mysql> select dayname(statehood), count(*) from states
-> group by dayname(statehood);
+--------------------+----------+
| dayname(statehood) | count(*) |
+--------------------+----------+
| Saturday | 11 |
| Tuesday | 6 |
| Wednesday | 11 |
| Monday | 9 |
| Friday | 8 |
| Thursday | 5 |
+--------------------+----------+
6 rows in set (0.00 sec)
mysql> SELECT DAYNAME(statehood), COUNT(*)
-> FROM states
-> GROUP BY DAYNAME(statehood), DAYOFWEEK(statehood);
+--------------------+----------+
| DAYNAME(statehood) | COUNT(*) |
+--------------------+----------+
| Saturday | 11 |
| Tuesday | 6 |
| Wednesday | 11 |
| Monday | 9 |
| Friday | 8 |
| Thursday | 5 |
+--------------------+----------+
6 rows in set (0.00 sec)
mysql> SELECT day_name, COUNT(*)
-> FROM (
-> SELECT
-> DAYNAME(statehood) AS day_name,
-> DAYOFWEEK(statehood) AS day_num
-> FROM states
-> ) t
-> GROUP BY day_num, day_name;
+-----------+----------+
| day_name | COUNT(*) |
+-----------+----------+
| Saturday | 11 |
| Tuesday | 6 |
| Wednesday | 11 |
| Monday | 9 |
| Friday | 8 |
| Thursday | 5 |
+-----------+----------+
6 rows in set (0.00 sec)
最佳实践
保持一致性:GROUP BY 中的表达式应与 SELECT 中的非聚合列匹配
使用别名:对于复杂表达式,使用派生表或 CTE 提高可读性
明确分组逻辑:确保分组依据与您想要的分析维度一致
mysql> SET SESSION sql_mode=(SELECT REPLACE(@@sql_mode,'ONLY_FULL_GROUP_BY',''));
Query OK, 0 rows affected (0.00 sec)
mysql> -- 执行查询
mysql> SET SESSION sql_mode=@@global.sql_mode;
Query OK, 0 rows affected (0.00 sec)
mysql> select name, count(*) as days, sum(miles) as mileage
-> from driver_log group by name;
+-------+------+---------+
| name | days | mileage |
+-------+------+---------+
| Ben | 3 | 362 |
| Suzi | 2 | 893 |
| Henry | 5 | 911 |
+-------+------+---------+
3 rows in set (0.00 sec)
mysql> select name, count(*) as days, sum(miles) as mileage
-> from driver_log group by name order by null;
+-------+------+---------+
| name | days | mileage |
+-------+------+---------+
| Ben | 3 | 362 |
| Suzi | 2 | 893 |
| Henry | 5 | 911 |
+-------+------+---------+
3 rows in set (0.00 sec)
8.14 查找最小或最大的摘要数值
mysql> select name, sum(miles)
-> from driver_log
-> group by name;
+-------+------------+
| name | sum(miles) |
+-------+------------+
| Ben | 362 |
| Suzi | 893 |
| Henry | 911 |
+-------+------------+
3 rows in set (0.00 sec)
mysql> select name, sum(miles)
-> from driver_log
-> group by name
-> having sum(miles) = max(sum(miles));
ERROR 1111 (HY000): Invalid use of group function
mysql> select name, sum(miles) as 'total miles'
-> from driver_log
-> group by name
-> order by 'total miles' desc limit 1;
+------+-------------+
| name | total miles |
+------+-------------+
| Ben | 362 |
+------+-------------+
1 row in set (0.00 sec)
mysql> select leet(name, 1) as letter, count(*) as count from states
-> group by letter order by count desc limit 1;
ERROR 1305 (42000): FUNCTION cookbook.leet does not exist
1. SUBSTRING(str, pos, len) 是 MySQL 内置函数,从字符串 str 的位置 pos 开始提取 len 个字符
2. 创建自定义函数需要有足够的权限(通常需要 ADMIN 或 CREATE ROUTINE 权限)
mysql> -- 提取第一个字母
mysql> SELECT SUBSTRING(name, 1, 1) AS letter, COUNT(*) AS count
-> FROM states
-> GROUP BY letter
-> ORDER BY count DESC
-> LIMIT 1;
+--------+-------+
| letter | count |
+--------+-------+
| M | 8 |
+--------+-------+
1 row in set (0.00 sec)
mysql>
mysql> -- 或者提取任意位置的字母(例如第2个字母)
mysql> SELECT SUBSTRING(name, 2, 1) AS letter, COUNT(*) AS count
-> FROM states
-> GROUP BY letter
-> ORDER BY count DESC
-> LIMIT 1;
+--------+-------+
| letter | count |
+--------+-------+
| e | 14 |
+--------+-------+
1 row in set (0.00 sec)
1. 创建 leet() 函数
首先需要创建一个函数来提取字符串的特定字母:
mysql> use cookbook
Database changed
mysql> DELIMITER //
mysql> CREATE FUNCTION leet(str VARCHAR(255), pos INT)
-> RETURNS CHAR(1)
-> DETERMINISTIC
-> BEGIN
-> RETURN SUBSTRING(str, pos, 1);
-> END //
Query OK, 0 rows affected (0.04 sec)
mysql> DELIMITER ;
然后执行您的查询
mysql> SELECT leet(name, 1) AS letter, COUNT(*) AS count
-> FROM states
-> GROUP BY letter
-> ORDER BY count DESC
-> LIMIT 1;
+--------+-------+
| letter | count |
+--------+-------+
| M | 8 |
+--------+-------+
1 row in set (0.01 sec)
mysql> set @max= (select count(*) from states
-> group by left(name, 1) order by count(*) desc limit 1);
Query OK, 0 rows affected (0.00 sec)
mysql> select left(name, 1) as letter, count(*) as count from states
-> group by letter having count= @max;
+--------+-------+
| letter | count |
+--------+-------+
| M | 8 |
| N | 8 |
+--------+-------+
2 rows in set (0.00 sec)
mysql> select left(name, 1) as letter, count(*) as count from states
-> group by letter having count =
-> (select count(*) from states
-> group by left(name, 1) order by count(*) desc limit 1);
+--------+-------+
| letter | count |
+--------+-------+
| M | 8 |
| N | 8 |
+--------+-------+
2 rows in set (0.00 sec)