mysql> show databases;
+--------------------+
| Database |
+--------------------+
| cookbook |
| employees |
| information_schema |
| mysql |
| performance_schema |
| sys |
+--------------------+
6 rows in set (0.01 sec)
mysql> use employees;
Database changed
#–基本SQL语句练习
- 基本联表查询(获取当前薪资)
sql
SELECT e.first_name, s.salary, s.salary+1000 AS increased_salary
FROM employees e
JOIN salaries s ON e.emp_no = s.emp_no
WHERE e.emp_no IN (
SELECT emp_no FROM dept_emp WHERE dept_no = 'd005' -- 假设 department_id 50 对应 dept_no 'd005'
)
AND s.to_date = '9999-01-01'; -- 获取当前有效薪资
2. 如果你需要按部门查询(假设部门50对应 dept_no 'd005'):
sql
SELECT e.first_name, s.salary, s.salary+1000 AS increased_salary
FROM employees e
JOIN salaries s ON e.emp_no = s.emp_no
JOIN dept_emp de ON e.emp_no = de.emp_no
WHERE de.dept_no = 'd005' -- 部门编号可能需要调整
AND s.to_date = '9999-01-01'
AND de.to_date = '9999-01-01'; -- 当前部门任职
3. 如果不知道部门编号对应关系:
sql
SELECT * FROM departments; -- 先查看部门编号对应关系
mysql> SELECT
-> d.dept_no,
-> d.dept_name,
-> COUNT(DISTINCT e.emp_no) AS employee_count,
-> SUM(s.salary) AS total_salary,
-> AVG(s.salary) AS avg_salary
-> FROM
-> departments d
-> JOIN
-> dept_emp de ON d.dept_no = de.dept_no
-> JOIN
-> employees e ON de.emp_no = e.emp_no
-> JOIN
-> salaries s ON e.emp_no = s.emp_no
-> WHERE
-> de.to_date = '9999-01-01'
-> AND s.to_date = '9999-01-01'
-> GROUP BY
-> d.dept_no, d.dept_name;
+---------+--------------------+----------------+--------------+------------+
| dept_no | dept_name | employee_count | total_salary | avg_salary |
+---------+--------------------+----------------+--------------+------------+
| d001 | Marketing | 14842 | 1188233434 | 80058.8488 |
| d002 | Finance | 12437 | 977049936 | 78559.9370 |
| d003 | Human Resources | 12898 | 824464664 | 63921.8998 |
| d004 | Production | 53304 | 3616319369 | 67843.3020 |
| d005 | Development | 61386 | 4153249050 | 67657.9196 |
| d006 | Quality Management | 14546 | 951919236 | 65441.9934 |
| d007 | Sales | 37701 | 3349845802 | 88852.9695 |
| d008 | Research | 15441 | 1048650423 | 67913.3750 |
| d009 | Customer Service | 17569 | 1182134209 | 67285.2302 |
+---------+--------------------+----------------+--------------+------------+
9 rows in set (3.55 sec)
mysql> SELECT
-> d.dept_no,
-> d.dept_name,
-> COUNT(DISTINCT e.emp_no) AS employee_count,
-> SUM(s.salary) AS total_salary,
-> AVG(s.salary) AS avg_salary,
-> CONCAT(FORMAT(SUM(s.salary)/10000, 2), '万') AS total_salary_wan,
-> CONCAT(
-> SUM(CASE WHEN s.salary < 50000 THEN 1 ELSE 0 END), '人',
-> '(', ROUND(SUM(CASE WHEN s.salary < 50000 THEN 1 ELSE 0 END)/COUNT(*)*100, 0), '%)'
-> ) AS low_salary_stats,
-> CONCAT(
-> SUM(CASE WHEN s.salary BETWEEN 50000 AND 100000 THEN 1 ELSE 0 END), '人',
-> '(', ROUND(SUM(CASE WHEN s.salary BETWEEN 50000 AND 100000 THEN 1 ELSE 0 END)/COUNT(*)*100, 0), '%)'
-> ) AS medium_salary_stats
-> FROM
-> departments d
-> JOIN
-> dept_emp de ON d.dept_no = de.dept_no
-> JOIN
-> employees e ON de.emp_no = e.emp_no
-> JOIN
-> salaries s ON e.emp_no = s.emp_no
-> WHERE
-> de.to_date = '9999-01-01'
-> AND s.to_date = '9999-01-01'
-> GROUP BY
-> d.dept_no, d.dept_name;
+---------+--------------------+----------------+--------------+------------+------------------+------------------+---------------------+
| dept_no | dept_name | employee_count | total_salary | avg_salary | total_salary_wan | low_salary_stats | medium_salary_stats |
+---------+--------------------+----------------+--------------+------------+------------------+------------------+---------------------+
| d001 | Marketing | 14842 | 1188233434 | 80058.8488 | 118,823.34万 | 481人(3%) | 12369人(83%) |
| d002 | Finance | 12437 | 977049936 | 78559.9370 | 97,704.99万 | 456人(4%) | 10560人(85%) |
| d003 | Human Resources | 12898 | 824464664 | 63921.8998 | 82,446.47万 | 1825人(14%) | 10927人(85%) |
| d004 | Production | 53304 | 3616319369 | 67843.3020 | 361,631.94万 | 5292人(10%) | 46794人(88%) |
| d005 | Development | 61386 | 4153249050 | 67657.9196 | 415,324.91万 | 6229人(10%) | 53805人(88%) |
| d006 | Quality Management | 14546 | 951919236 | 65441.9934 | 95,191.92万 | 1821人(13%) | 12546人(86%) |
| d007 | Sales | 37701 | 3349845802 | 88852.9695 | 334,984.58万 | 397人(1%) | 27240人(72%) |
| d008 | Research | 15441 | 1048650423 | 67913.3750 | 104,865.04万 | 1584人(10%) | 13487人(87%) |
| d009 | Customer Service | 17569 | 1182134209 | 67285.2302 | 118,213.42万 | 2220人(13%) | 14594人(83%) |
+---------+--------------------+----------------+--------------+------------+------------------+------------------+---------------------+
9 rows in set (4.37 sec)
mysql> SHOW VARIABLES LIKE 'secure_file_priv';
+------------------+---------------------------------+
| Variable_name | Value |
+------------------+---------------------------------+
| secure_file_priv | D:\software\MySql\Data\Uploads\ |
+------------------+---------------------------------+
1 row in set, 1 warning (0.10 sec)
SELECT
d.dept_name,
COUNT(*) AS employee_count,
AVG(s.salary) AS avg_salary,
SUM(s.salary) AS total_salary
FROM
departments d
JOIN
dept_emp de ON d.dept_no = de.dept_no
JOIN
salaries s ON de.emp_no = s.emp_no
WHERE
de.to_date = '9999-01-01'
AND s.to_date = '9999-01-01'
GROUP BY
d.dept_name
INTO OUTFILE 'D:\\software\\MySql\\Data\\Uploads\\department_salary.csv'
FIELDS TERMINATED BY ',' ENCLOSED BY '"'
LINES TERMINATED BY '\n';
在 MySQL 中,确保使用 utf8mb4 字符集导出数据:
SELECT
'dept_name' AS dept_name,
'employee_count' AS employee_count,
'avg_salary' AS avg_salary,
'total_salary' AS total_salary
UNION ALL
SELECT
d.dept_name,
COUNT(*) AS employee_count,
AVG(s.salary) AS avg_salary,
SUM(s.salary) AS total_salary
FROM
departments d
JOIN
dept_emp de ON d.dept_no = de.dept_no
JOIN
salaries s ON de.emp_no = s.emp_no
WHERE
de.to_date = '9999-01-01'
AND s.to_date = '9999-01-01'
GROUP BY
d.dept_name
INTO OUTFILE 'D:\\software\\MySql\\Data\\Uploads\\department_salary.csv'
CHARACTER SET utf8mb4 -- 显式指定字符集
FIELDS TERMINATED BY ',' ENCLOSED BY '"'
LINES TERMINATED BY '\n';
import pandas as pd
import matplotlib.pyplot as plt
# 方法1:使用 utf-8-sig 编码(自动处理 BOM 头)
df = pd.read_csv(
'D:/software/MySql/Data/Uploads/department_salary.csv',
encoding='utf-8-sig', # 推荐使用,兼容带 BOM 的 UTF-8 文件
header=None,
names=['dept_name', 'employee_count', 'avg_salary', 'total_salary']
)
# 方法2:使用 utf-8 编码(如果文件无 BOM 头)
df = pd.read_csv(
'D:/software/MySql/Data/Uploads/department_salary.csv',
encoding='utf-8',
header=None,
names=['dept_name', 'employee_count', 'avg_salary', 'total_salary']
)
# 验证中文显示
print(df.head())
# 绘图代码(保持不变)
plt.figure(figsize=(12, 6))
plt.bar(df['dept_name'], df['avg_salary'], color='skyblue')
plt.xticks(rotation=45, ha='right')
plt.title('各部门平均薪资对比', fontsize=14)
plt.xlabel('部门名称', fontsize=12)
plt.ylabel('平均薪资', fontsize=12)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()
mysql> SELECT
-> dept_name,
-> COUNT(*) AS total_employees,
-> CONCAT(ROUND(SUM(salary < 40000)/COUNT(*)*100), '%') AS "低薪(<4万)",
-> CONCAT(ROUND(SUM(salary BETWEEN 40000 AND 80000)/COUNT(*)*100), '%') AS "中薪(4-8万)",
-> CONCAT(ROUND(SUM(salary > 80000)/COUNT(*)*100), '%') AS "高薪(>8万)"
-> FROM (
-> SELECT
-> d.dept_name, s.salary
-> FROM
-> salaries s
-> JOIN
-> dept_emp de ON s.emp_no = de.emp_no
-> JOIN
-> departments d ON de.dept_no = d.dept_no
-> WHERE
-> s.to_date = '9999-01-01'
-> AND de.to_date = '9999-01-01'
-> ) AS current_salaries
-> GROUP BY dept_name
-> ORDER BY SUM(salary > 80000)/COUNT(*) DESC; -- 按高薪比例排序
+--------------------+-----------------+------------+-------------+------------+
| dept_name | total_employees | 低薪(<4万) | 中薪(4-8万) | 高薪(>8万) |
+--------------------+-----------------+------------+-------------+------------+
| Sales | 37701 | 0% | 32% | 68% |
| Marketing | 14842 | 0% | 52% | 48% |
| Finance | 12437 | 0% | 55% | 45% |
| Production | 53304 | 0% | 81% | 19% |
| Research | 15441 | 0% | 81% | 19% |
| Development | 61386 | 0% | 81% | 19% |
| Customer Service | 17569 | 0% | 81% | 18% |
| Quality Management | 14546 | 0% | 86% | 14% |
| Human Resources | 12898 | 0% | 89% | 11% |
+--------------------+-----------------+------------+-------------+------------+
9 rows in set (7.02 sec)
mysql> WITH ranked_salaries AS (
-> SELECT
-> d.dept_name,
-> s.salary,
-> ROW_NUMBER() OVER (
-> PARTITION BY d.dept_no
-> ORDER BY s.salary
-> ) AS row_num,
-> COUNT(*) OVER (PARTITION BY d.dept_no) AS total_employees
-> FROM departments d
-> JOIN dept_emp de ON d.dept_no = de.dept_no
-> JOIN salaries s ON de.emp_no = s.emp_no
-> WHERE de.to_date = '9999-01-01'
-> AND s.to_date = '9999-01-01'
-> )
-> SELECT
-> dept_name,
-> AVG(salary) AS avg_salary,
-> MAX(CASE WHEN row_num = CEIL(total_employees/2.0) THEN salary END) AS median_salary
-> FROM ranked_salaries
-> GROUP BY dept_name;
+--------------------+------------+---------------+
| dept_name | avg_salary | median_salary |
+--------------------+------------+---------------+
| Marketing | 80058.8488 | 79125 |
| Finance | 78559.9370 | 77858 |
| Human Resources | 63921.8998 | 62810 |
| Production | 67843.3020 | 66725 |
| Development | 67657.9196 | 66449 |
| Quality Management | 65441.9934 | 64381 |
| Sales | 88852.9695 | 88516 |
| Research | 67913.3750 | 66667 |
| Customer Service | 67285.2302 | 65149 |
+--------------------+------------+---------------+
9 rows in set (11.74 sec)