python+MySQL组合实现生成销售财务报告

发布于:2025-08-06 ⋅ 阅读:(18) ⋅ 点赞:(0)

一、最终生成的word报告:

   销售数据分析报告

报告生成时间:2025年08月05日 15:15:07

==================================================

1. 部门销售分析

部门

总销售额

平均销售额

交易次数

预算使用率

市场部

20,000.00

10,000.00

2

4.00%

技术部

81,000.00

27,000.00

3

10.12%

销售部

30,000.00

5,000.00

6

3.00%

2. 员工绩效分析

姓名

部门

总销售额

平均销售额

销售笔数

销售/薪资比

张明

销售部

17,700.00

5,900.00

3

1.18

李华

销售部

12,300.00

4,100.00

3

1.54

王芳

技术部

18,000.00

18,000.00

1

1.50

赵刚

技术部

63,000.00

31,500.00

2

3.50

刘洋

市场部

20,000.00

10,000.00

2

2.22

3. 销售趋势分析

最高单日销售额:131,000.00

最低单日销售额:131,000.00

平均日销售额:131,000.00

4. 数据可视化

以下是销售数据的可视化分析:

5. 分析结论

  1.  各部门销售情况总体表现良好,预算执行率保持在合理水平。
  2. 员工销售业绩分布相对均衡,团队协作效果显著。
  3.  销售趋势呈现稳定增长态势,市场反应积极。

二、实现代码:

1. 首先数据库中得有下面几张表:

2、详细的建表语句

SET FOREIGN_KEY_CHECKS = 0;
SET NAMES utf8mb4;
-- departments DDL
CREATE TABLE `departments` (`department_id` INT(10) NOT NULL AUTO_INCREMENT,
`department_name` VARCHAR(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL,
`manager_id` INT(10) NULL,
`budget` DECIMAL(15,2) NULL,
UNIQUE INDEX `department_name`(`department_name` ASC) USING BTREE,
PRIMARY KEY (`department_id`)) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci AUTO_INCREMENT = 4 ROW_FORMAT = Dynamic;
-- employees DDL
CREATE TABLE `employees` (`employee_id` INT(10) NOT NULL,
`name` VARCHAR(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL,
`department` VARCHAR(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL,
`position` VARCHAR(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL,
`hire_date` DATE NULL,
`salary` DECIMAL(10,2) NULL,
PRIMARY KEY (`employee_id`)) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci ROW_FORMAT = Dynamic;
-- sales DDL
CREATE TABLE `sales` (`sale_id` INT(10) NOT NULL AUTO_INCREMENT,
`employee_id` INT(10) NULL,
`product` VARCHAR(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL,
`amount` DECIMAL(10,2) NULL,
`sale_date` DATE NULL,
INDEX `employee_id`(`employee_id` ASC) USING BTREE,
PRIMARY KEY (`sale_id`)) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci AUTO_INCREMENT = 12 ROW_FORMAT = Dynamic;
-- sales Indexes
ALTER TABLE `sales` 
 ADD CONSTRAINT `sales_ibfk_1` FOREIGN KEY (`employee_id`) REFERENCES `employees` (`employee_id`) ON DELETE NO ACTION ON UPDATE NO ACTION;
-- departments DML
INSERT INTO `departments` (`department_id`,`department_name`,`manager_id`,`budget`) VALUES (1,'销售部',101,1000000.00),(2,'技术部',104,800000.00),(3,'市场部',105,500000.00);
-- employees DML
INSERT INTO `employees` (`employee_id`,`name`,`department`,`position`,`hire_date`,`salary`) VALUES (101,'张明','销售部','销售经理','2025-08-05',15000.00),(102,'李华','销售部','销售代表','2025-08-05',8000.00),(103,'王芳','技术部','工程师','2025-08-05',12000.00),(104,'赵刚','技术部','高级工程师','2025-08-05',18000.00),(105,'刘洋','市场部','市场专员','2025-08-05',9000.00);
-- sales DML
INSERT INTO `sales` (`sale_id`,`employee_id`,`product`,`amount`,`sale_date`) VALUES (1,101,'笔记本电脑',12000.00,'2025-08-05'),(2,102,'智能手机',6000.00,'2025-08-05'),(3,101,'显示器',2500.00,'2025-08-05'),(4,103,'软件授权',18000.00,'2025-08-05'),(5,102,'平板电脑',4500.00,'2025-08-05'),(6,104,'服务器',35000.00,'2025-08-05'),(7,105,'广告位',8000.00,'2025-08-05'),(8,101,'打印机',3200.00,'2025-08-05'),(9,102,'路由器',1800.00,'2025-08-05'),(10,104,'工作站',28000.00,'2025-08-05'),(11,105,'推广活动',12000.00,'2025-08-05');
SET FOREIGN_KEY_CHECKS = 1;

3、详细的代码

import pymysql
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import numpy as np
from scipy.interpolate import make_interp_spline
from decimal import Decimal
import matplotlib as mpl
from docx import Document
from docx.shared import Inches, Pt, RGBColor
from docx.enum.text import WD_ALIGN_PARAGRAPH
import io
from matplotlib.patches import Patch  # 添加这行

# 设置全局中文字体支持
plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'KaiTi', 'Arial Unicode MS']
plt.rcParams['axes.unicode_minus'] = False

def get_conn():
    return pymysql.connect(
        host='localhost',
        user='root',
        password='root',
        database='yggx',
        charset='utf8mb4',
        cursorclass=pymysql.cursors.DictCursor
    )

def run_query(sql):
    """执行SQL查询并返回DataFrame"""
    conn = get_conn()
    try:
        with conn.cursor() as cursor:
            cursor.execute(sql)
            result = cursor.fetchall()
            return pd.DataFrame(result)
    finally:
        conn.close()

def convert_decimal_to_float(df):
    """将DataFrame中的Decimal类型转换为float"""
    for col in df.columns:
        if df[col].dtype == object and any(isinstance(x, Decimal) for x in df[col]):
            df[col] = df[col].astype(float)
    return df

def analyze_data():
    """执行关系数据分析 - 修复版"""
    print("\n开始数据分析...")
    
    # 1. 获取所有数据
    employees_df = run_query("SELECT * FROM employees")
    sales_df = run_query("SELECT * FROM sales")
    dept_df = run_query("SELECT * FROM departments")
    
    # 转换Decimal类型为float
    employees_df = convert_decimal_to_float(employees_df)
    sales_df = convert_decimal_to_float(sales_df)
    dept_df = convert_decimal_to_float(dept_df)
    
    print("\n员工数据 (前5行):")
    print(employees_df.head())
    
    print("\n销售数据 (前5行):")
    print(sales_df.head())
    
    print("\n部门数据:")
    print(dept_df)
    
    # 检查数据是否为空
    if employees_df.empty or sales_df.empty or dept_df.empty:
        print("\n错误: 从数据库读取的数据为空")
        return {}
    
    # 2. 关系连接:员工销售明细
    sales_detail = pd.merge(
        sales_df, 
        employees_df[['employee_id', 'name', 'department', 'position']],
        on='employee_id'
    )
    
    # 添加部门预算信息
    sales_detail = pd.merge(
        sales_detail,
        dept_df[['department_name', 'budget']],
        left_on='department',
        right_on='department_name'
    ).drop(columns='department_name')
    
    print("\n销售明细 (前5行):")
    print(sales_detail.head())
    
    # 3. 部门销售分析
    dept_sales = sales_detail.groupby('department').agg(
        total_sales=('amount', 'sum'),
        avg_sale=('amount', 'mean'),
        transaction_count=('sale_id', 'count'),
        budget=('budget', 'first')
    ).reset_index()
    
    dept_sales['budget_utilization'] = dept_sales['total_sales'] / dept_sales['budget']
    print("\n部门销售分析:")
    print(dept_sales)
    
    # 4. 员工绩效分析
    employee_performance = sales_detail.groupby(['employee_id', 'name', 'department']).agg(
        total_sales=('amount', 'sum'),
        avg_sale=('amount', 'mean'),
        sales_count=('sale_id', 'count')
    ).reset_index()
    
    # 添加员工薪资信息
    employee_performance = pd.merge(
        employee_performance,
        employees_df[['employee_id', 'salary']],
        on='employee_id'
    )
    
    employee_performance['sales_per_salary'] = employee_performance['total_sales'] / employee_performance['salary']
    print("\n员工绩效分析:")
    print(employee_performance.sort_values('total_sales', ascending=False))
    
    # 5. 时间序列分析 (最近30天销售趋势)
    # 确保日期格式正确
    sales_detail['sale_date'] = pd.to_datetime(sales_detail['sale_date'])
    last_30_days = datetime.now() - timedelta(days=30)
    recent_sales = sales_detail[sales_detail['sale_date'] > last_30_days]
    
    if not recent_sales.empty:
        daily_sales = recent_sales.groupby('sale_date')['amount'].sum().reset_index()
        print("\n最近30天每日销售总额:")
        print(daily_sales)
    else:
        print("\n警告: 最近30天无销售数据")
        daily_sales = pd.DataFrame()
    
    return {
        'sales_detail': sales_detail,
        'dept_sales': dept_sales,
        'employee_performance': employee_performance,
        'daily_sales': daily_sales
    }

def visualize_results(results):
    """可视化分析结果 - 修复版"""
    print("\n开始可视化分析结果...")
    
    # 创建大图表
    fig = plt.figure(figsize=(18, 12))
    fig.suptitle('销售数据分析报告', fontsize=20, fontweight='bold')
    
    # 1. 部门销售对比
    ax1 = plt.subplot(2, 2, 1)
    dept_sales = results['dept_sales']
    if not dept_sales.empty:
        # 使用更美观的颜色
        colors = plt.cm.viridis_r(np.linspace(0, 1, len(dept_sales)))
        bars = plt.bar(dept_sales['department'], dept_sales['total_sales'], color=colors)
        plt.title('部门销售总额对比', fontsize=14)
        plt.ylabel('销售总额', fontsize=12)
        plt.xlabel('部门', fontsize=12)
        plt.grid(axis='y', linestyle='--', alpha=0.7)
        
        # 添加数值标签
        for bar in bars:
            height = bar.get_height()
            ax1.annotate(f'{height:,.0f}元',
                         xy=(bar.get_x() + bar.get_width() / 2, height),
                         xytext=(0, 3),  # 3 points vertical offset
                         textcoords="offset points",
                         ha='center', va='bottom', fontsize=10)
    else:
        plt.text(0.5, 0.5, '无部门销售数据', ha='center', va='center', fontsize=12)
        plt.title('部门销售总额对比', fontsize=14)
    
    # 2. 员工销售排名
    ax2 = plt.subplot(2, 2, 2)
    emp_perf = results['employee_performance']
    if not emp_perf.empty:
        emp_perf = emp_perf.sort_values('total_sales', ascending=False)
        # 为每个部门分配不同颜色
        departments = emp_perf['department'].unique()
        color_map = {dept: plt.cm.tab10(i) for i, dept in enumerate(departments)}
        
        bars = ax2.bar(emp_perf['name'], emp_perf['total_sales'], 
                      color=[color_map[dept] for dept in emp_perf['department']])
        plt.title('员工销售排名', fontsize=14)
        plt.ylabel('销售总额', fontsize=12)
        plt.xlabel('员工姓名', fontsize=12)
        plt.xticks(rotation=15, fontsize=10)
        plt.grid(axis='y', linestyle='--', alpha=0.7)
        
        # 添加图例
        from matplotlib.patches import Patch
        legend_elements = [Patch(facecolor=color_map[dept], label=dept) for dept in departments]
        ax2.legend(handles=legend_elements, title='部门', loc='upper right')
        
        # 添加数值标签
        for bar in bars:
            height = bar.get_height()
            ax2.annotate(f'{height:,.0f}元',
                         xy=(bar.get_x() + bar.get_width() / 2, height),
                         xytext=(0, 3),
                         textcoords="offset points",
                         ha='center', va='bottom', fontsize=9)
    else:
        plt.text(0.5, 0.5, '无员工销售数据', ha='center', va='center', fontsize=12)
        plt.title('员工销售排名', fontsize=14)
    
    # 3. 预算使用率
    ax3 = plt.subplot(2, 2, 3)
    if not dept_sales.empty:
        dept_sales = dept_sales.sort_values('budget_utilization', ascending=False)
        colors = plt.cm.Pastel1(np.linspace(0, 1, len(dept_sales)))
        
        # 修复Decimal转换问题
        budget_utilization = dept_sales['budget_utilization'].values.astype(float)
        total_utilization = budget_utilization.sum()
        
        wedges, texts, autotexts = ax3.pie(
            budget_utilization, 
            labels=dept_sales['department'],
            autopct=lambda p: f'{p:.1f}%\n({p/100*total_utilization:.2f})',
            startangle=90,
            colors=colors,
            wedgeprops={'edgecolor': 'w', 'linewidth': 1.5},
            textprops={'fontsize': 10}
        )
        plt.title('部门预算使用率', fontsize=14)
        
        # 添加中心标题
        ax3.text(0, 0, f'总使用率: {total_utilization*100:.1f}%', 
                ha='center', va='center', fontsize=12, fontweight='bold')
    else:
        plt.text(0.5, 0.5, '无部门预算数据', ha='center', va='center', fontsize=12)
        plt.title('部门预算使用率', fontsize=14)
    
    # 4. 销售趋势图
    ax4 = plt.subplot(2, 2, 4)
    daily_sales = results['daily_sales']
    if not daily_sales.empty:
        daily_sales = daily_sales.sort_values('sale_date')
        
        # 创建平滑曲线
        try:
            x = np.arange(len(daily_sales))
            y = daily_sales['amount'].values
            
            # 创建平滑曲线
            X_Y_Spline = make_interp_spline(x, y)
            X_ = np.linspace(x.min(), x.max(), 500)
            Y_ = X_Y_Spline(X_)
            
            # 绘制平滑曲线
            plt.plot(X_, Y_, color='royalblue', alpha=0.5, label='平滑曲线')
            
            # 绘制原始数据点
            plt.scatter(x, y, color='royalblue', s=50, zorder=5)
        except:
            # 如果平滑失败,直接绘制原始数据
            plt.plot(daily_sales['sale_date'], daily_sales['amount'], 
                    color='royalblue', label='实际销售额')
        
        # 标记最高点和最低点
        max_idx = daily_sales['amount'].idxmax()
        min_idx = daily_sales['amount'].idxmin()
        plt.scatter(daily_sales.loc[max_idx, 'sale_date'], daily_sales.loc[max_idx, 'amount'], 
                   color='red', s=100, zorder=5, label='最高点')
        plt.scatter(daily_sales.loc[min_idx, 'sale_date'], daily_sales.loc[min_idx, 'amount'], 
                   color='green', s=100, zorder=5, label='最低点')
        
        # 添加标注
        plt.annotate(f'最高: {daily_sales.loc[max_idx, "amount"]:,.0f}元', 
                    xy=(daily_sales.loc[max_idx, 'sale_date'], daily_sales.loc[max_idx, 'amount']),
                    xytext=(daily_sales.loc[max_idx, 'sale_date'] + timedelta(days=1), 
                           daily_sales.loc[max_idx, 'amount'] * 0.9),
                    arrowprops=dict(facecolor='red', shrink=0.05))
        
        plt.annotate(f'最低: {daily_sales.loc[min_idx, "amount"]:,.0f}元', 
                    xy=(daily_sales.loc[min_idx, 'sale_date'], daily_sales.loc[min_idx, 'amount']),
                    xytext=(daily_sales.loc[min_idx, 'sale_date'] - timedelta(days=2), 
                           daily_sales.loc[min_idx, 'amount'] * 1.1),
                    arrowprops=dict(facecolor='green', shrink=0.05))
        
        plt.title('最近30天销售趋势', fontsize=14)
        plt.xlabel('日期', fontsize=12)
        plt.ylabel('每日销售额', fontsize=12)
        plt.xticks(rotation=45, fontsize=9)
        plt.grid(True, linestyle='--', alpha=0.7)
        plt.legend()
        
    else:
        plt.text(0.5, 0.5, '无近期销售数据', ha='center', va='center', fontsize=12)
        plt.title('最近30天销售趋势', fontsize=14)
    
    # 添加图表边框
    for ax in [ax1, ax2, ax3, ax4]:
        for spine in ax.spines.values():
            spine.set_visible(True)
            spine.set_edgecolor('#DDDDDD')
            spine.set_linewidth(1.5)
    
    plt.tight_layout(rect=[0, 0, 1, 0.96])  # 为总标题留出空间
    plt.savefig('sales_analysis.png', dpi=300, bbox_inches='tight')
    plt.show()
    print("\n分析图表已保存为 'sales_analysis.png'")

def generate_word_report(results):
    """生成Word报告"""
    doc = Document()
    
    # 添加标题
    doc.add_heading('销售数据分析报告', 0)
    
    # 添加报告时间
    current_time = datetime.now().strftime("%Y年%m月%d日 %H:%M:%S")
    doc.add_paragraph(f'报告生成时间:{current_time}')
    doc.add_paragraph('='*50)
    
    # 1. 部门销售分析
    doc.add_heading('1. 部门销售分析', level=1)
    dept_sales = results['dept_sales']
    if not dept_sales.empty:
        table = doc.add_table(rows=1, cols=5)
        table.style = 'Table Grid'
        header_cells = table.rows[0].cells
        headers = ['部门', '总销售额', '平均销售额', '交易次数', '预算使用率']
        for i, header in enumerate(headers):
            header_cells[i].text = header
        
        for _, row in dept_sales.iterrows():
            row_cells = table.add_row().cells
            row_cells[0].text = str(row['department'])
            row_cells[1].text = f"{row['total_sales']:,.2f}"
            row_cells[2].text = f"{row['avg_sale']:,.2f}"
            row_cells[3].text = str(row['transaction_count'])
            row_cells[4].text = f"{row['budget_utilization']*100:.2f}%"
    else:
        doc.add_paragraph('无部门销售数据')
    
    # 2. 员工绩效分析
    doc.add_heading('2. 员工绩效分析', level=1)
    emp_perf = results['employee_performance']
    if not emp_perf.empty:
        table = doc.add_table(rows=1, cols=6)
        table.style = 'Table Grid'
        header_cells = table.rows[0].cells
        headers = ['姓名', '部门', '总销售额', '平均销售额', '销售笔数', '销售/薪资比']
        for i, header in enumerate(headers):
            header_cells[i].text = header
            
        for _, row in emp_perf.iterrows():
            row_cells = table.add_row().cells
            row_cells[0].text = str(row['name'])
            row_cells[1].text = str(row['department'])
            row_cells[2].text = f"{row['total_sales']:,.2f}"
            row_cells[3].text = f"{row['avg_sale']:,.2f}"
            row_cells[4].text = str(row['sales_count'])
            row_cells[5].text = f"{row['sales_per_salary']:,.2f}"
    else:
        doc.add_paragraph('无员工绩效数据')
    
    # 3. 销售趋势分析
    doc.add_heading('3. 销售趋势分析', level=1)
    daily_sales = results['daily_sales']
    if not daily_sales.empty:
        doc.add_paragraph(f"最高单日销售额:{daily_sales['amount'].max():,.2f}")
        doc.add_paragraph(f"最低单日销售额:{daily_sales['amount'].min():,.2f}")
        doc.add_paragraph(f"平均日销售额:{daily_sales['amount'].mean():,.2f}")
    else:
        doc.add_paragraph('无销售趋势数据')
    
    # 4. 添加图表
    doc.add_heading('4. 数据可视化', level=1)
    
    # 保存当前的matplotlib图表到Word
    doc.add_paragraph('以下是销售数据的可视化分析:')
    
    # 调用visualize_results生成图表并保存
    visualize_results(results)
    doc.add_picture('sales_analysis.png', width=Inches(6))
    
    # 添加结论
    doc.add_heading('5. 分析结论', level=1)
    conclusions = [
        " 各部门销售情况总体表现良好,预算执行率保持在合理水平。",
        "员工销售业绩分布相对均衡,团队协作效果显著。",
        " 销售趋势呈现稳定增长态势,市场反应积极。"
    ]
    for conclusion in conclusions:
        doc.add_paragraph(conclusion, style='List Number')
    
    # 保存Word文档
    doc.save('销售数据分析报告.docx')
    print("\n分析报告已保存为 '销售数据分析报告.docx'")

def main():
    """主函数 - 修改版"""
    print("="*50)
    print("关系数据分析演示")
    print("="*50)
    
    # # 初始化数据库
    # print("\n初始化数据库...")
    # create_tables()
    # insert_sample_data()
    
    # 执行分析
    print("\n执行数据分析...")
    results = analyze_data()
    
    if results:
        # 生成Word报告
        print("\n生成Word报告...")
        generate_word_report(results)
    else:
        print("\n分析失败,无有效数据")
    
    print("\n关系数据分析完成!")

if __name__ == "__main__":
    main()


网站公告

今日签到

点亮在社区的每一天
去签到