MySQL 分组函数全面详解与最佳实践

发布于:2025-06-20 ⋅ 阅读:(21) ⋅ 点赞:(0)

MySQL 分组函数全面详解与最佳实践

MySQL 分组函数(聚合函数)的核心知识、注意事项和高级应用技巧:


📊 分组函数核心列表

函数 描述 示例
COUNT() 计算行数 COUNT(*)
SUM() 计算数值总和 SUM(salary)
AVG() 计算平均值 AVG(score)
MAX() 获取最大值 MAX(price)
MIN() 获取最小值 MIN(price)
GROUP_CONCAT() 连接分组字符串 GROUP_CONCAT(name)
STDDEV() 计算标准差 STDDEV(price)
VAR_POP() 计算总体方差 VAR_POP(sales)

⚠️ 分组函数十大注意事项

1. NULL 值处理

SELECT 
  COUNT(*),        -- 所有行数(包含NULL)
  COUNT(bonus),    -- 非NULL行数
  AVG(COALESCE(bonus, 0)) -- NULL转为0计算
FROM employees;

2. 分组字段选择

-- 错误:非分组字段出现在SELECT
SELECT department, name, AVG(salary) 
FROM employees; -- 报错或未定义行为

-- 正确:所有非聚合字段必须出现在GROUP BY
SELECT department, name, AVG(salary)
FROM employees
GROUP BY department, name;

3. WHERE vs HAVING

-- WHERE:分组前过滤行
SELECT department, AVG(salary)
FROM employees
WHERE hire_date > '2020-01-01' -- 先过滤
GROUP BY department;

-- HAVING:分组后过滤组
SELECT department, AVG(salary) avg_sal
FROM employees
GROUP BY department
HAVING avg_sal > 5000; -- 后过滤

4. 性能优化策略

-- 低效:全表扫描
SELECT department, AVG(salary)
FROM employees
GROUP BY department;

-- 高效:添加索引
ALTER TABLE employees ADD INDEX idx_dept (department);

5. 隐式排序问题

-- 结果顺序不保证
SELECT department, COUNT(*)
FROM employees
GROUP BY department;

-- 显式排序
SELECT department, COUNT(*) AS emp_count
FROM employees
GROUP BY department
ORDER BY emp_count DESC;

6. 分组函数嵌套限制

-- 允许:单层分组函数
SELECT AVG(MAX(salary)) -- ❌ 错误嵌套

-- 正确:使用子查询
SELECT AVG(max_sal)
FROM (
  SELECT department, MAX(salary) AS max_sal
  FROM employees
  GROUP BY department
) dept_max;

7. DISTINCT 用法

-- 统计不重复值
SELECT 
  COUNT(DISTINCT department), -- 不同部门数量
  COUNT(DISTINCT CASE WHEN salary > 5000 THEN 1 END) -- 高薪人数
FROM employees;

8. 空分组处理

-- 使用 COALESCE 处理空分组
SELECT 
  COALESCE(department, '未分配') AS dept,
  COUNT(*) 
FROM employees
GROUP BY department;

9. 多列分组顺序

-- 分组顺序影响结果
SELECT 
  YEAR(hire_date) AS hire_year,
  department,
  COUNT(*)
FROM employees
GROUP BY hire_year, department; -- 先按年再按部门

10. GROUP_CONCAT 限制

-- 默认截断长度1024字符
SET SESSION group_concat_max_len = 10000;

SELECT 
  department,
  GROUP_CONCAT(name ORDER BY salary DESC SEPARATOR '|') 
FROM employees
GROUP BY department;

🚀 高级分组技巧

1. 多级分组分析

SELECT 
  YEAR(order_date) AS order_year,
  QUARTER(order_date) AS quarter,
  product_category,
  SUM(amount) AS total_sales,
  COUNT(DISTINCT customer_id) AS customers
FROM orders
GROUP BY order_year, quarter, product_category
WITH ROLLUP; -- 添加小计和总计行

2. 分组百分比计算

SELECT 
  department,
  COUNT(*) AS emp_count,
  ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (), 2) AS pct
FROM employees
GROUP BY department;

3. 分组排名

SELECT 
  department,
  name,
  salary,
  RANK() OVER (PARTITION BY department ORDER BY salary DESC) AS dept_rank
FROM employees;

4. 分组比较分析

SELECT 
  department,
  AVG(salary) AS avg_salary,
  AVG(salary) - (SELECT AVG(salary) FROM employees) AS diff_from_avg
FROM employees
GROUP BY department;

5. 时间序列分组

SELECT 
  DATE_FORMAT(order_date, '%Y-%m') AS month,
  SUM(amount) AS monthly_sales,
  LAG(SUM(amount)) OVER (ORDER BY DATE_FORMAT(order_date, '%Y-%m')) AS prev_month
FROM orders
GROUP BY month;

🔧 性能优化指南

1. 索引策略

-- 复合索引优化分组
ALTER TABLE orders 
  ADD INDEX idx_category_date (product_category, order_date);

-- 覆盖索引
EXPLAIN SELECT product_category, COUNT(*) 
FROM orders 
GROUP BY product_category; -- 使用索引

2. 临时表优化

-- 增大临时表内存
SET tmp_table_size = 256*1024*1024; -- 256MB
SET max_heap_table_size = 256*1024*1024;

-- 监控临时表使用
SHOW STATUS LIKE 'Created_tmp%';

3. 分区表优化

-- 按日期分区
CREATE TABLE sales (
    sale_id INT,
    sale_date DATE,
    amount DECIMAL(10,2)
) PARTITION BY RANGE (YEAR(sale_date)) (
    PARTITION p2020 VALUES LESS THAN (2021),
    PARTITION p2021 VALUES LESS THAN (2022),
    PARTITION p2022 VALUES LESS THAN (2023)
);

-- 分区分组查询
SELECT YEAR(sale_date), SUM(amount)
FROM sales
GROUP BY YEAR(sale_date); -- 仅扫描相关分区

4. 物化视图(MySQL 8.0+)

-- 创建分组结果缓存
CREATE TABLE sales_summary AS
SELECT 
  product_id, 
  YEAR(order_date) AS year, 
  SUM(amount) AS total
FROM orders
GROUP BY product_id, year;

-- 定期刷新
REPLACE INTO sales_summary
SELECT product_id, YEAR(order_date), SUM(amount)
FROM orders
WHERE order_date > (SELECT MAX(order_date) FROM sales_summary)
GROUP BY product_id, YEAR(order_date);

💡 最佳实践总结

1. 分组设计原则

-- 明确分组粒度
SELECT 
  DATE(order_date) AS day,  -- 按天
  HOUR(order_time) AS hour, -- 按小时
  COUNT(*) 
FROM orders
GROUP BY day, hour;

2. 安全处理大数据集

-- 分页处理大结果集
SELECT department, AVG(salary)
FROM employees
GROUP BY department
LIMIT 10 OFFSET 20; -- 第三页

3. 结果验证技巧

-- 验证分组总数
SELECT COUNT(DISTINCT department) 
FROM employees; -- 应与分组行数一致

-- 交叉验证
SELECT 
  (SELECT COUNT(*) FROM employees) AS total,
  SUM(emp_count) AS group_total
FROM (
  SELECT department, COUNT(*) AS emp_count
  FROM employees
  GROUP BY department
) dept_groups;

4. 执行计划分析

-- 检查分组性能
EXPLAIN 
SELECT department, AVG(salary)
FROM employees
GROUP BY department;

-- 关注以下指标:
-- 1. Using temporary (是否使用临时表)
-- 2. Using filesort (是否文件排序)
-- 3. key (使用的索引)

5. 避免常见陷阱

-- 陷阱1:错误处理NULL
SELECT department, AVG(bonus) -- 忽略NULL
FROM employees;

-- 陷阱2:混淆WHERE和HAVING
SELECT department, AVG(salary)
FROM employees
WHERE AVG(salary) > 5000; -- 错误!WHERE不能使用聚合函数

-- 陷阱3:未排序的分页
SELECT department, COUNT(*)
FROM employees
GROUP BY department
LIMIT 10; -- 结果随机

🚀 综合应用案例

销售分析报告

SELECT 
  c.country,
  p.category,
  YEAR(o.order_date) AS order_year,
  COUNT(DISTINCT o.customer_id) AS customers,
  COUNT(*) AS orders,
  SUM(o.amount) AS revenue,
  AVG(o.amount) AS avg_order_value,
  GROUP_CONCAT(DISTINCT p.product_name ORDER BY p.product_name SEPARATOR ', ') AS products
FROM orders o
JOIN products p ON o.product_id = p.id
JOIN customers c ON o.customer_id = c.id
WHERE o.order_date BETWEEN '2023-01-01' AND '2023-12-31'
GROUP BY c.country, p.category, order_year WITH ROLLUP
HAVING revenue > 10000
ORDER BY country, category, order_year DESC;

网站公告

今日签到

点亮在社区的每一天
去签到