导入CSV文件到MySQL

发布于:2025-08-12 ⋅ 阅读:(13) ⋅ 点赞:(0)

一.使用命令行进行导入

LOAD DATA LOCAL INFILE '/data/home/narieliu/titanic.csv'

INTO TABLE tb1

FIELDS TERMINATED BY ','

ENCLOSEDBY '"'

LINES TERMINATED BY '\n'

IGNORE 1 LINES;

  加local是在客户端导入,不加是在服务端导入

  客户端因为权限无法导入的可以使用脚本进行导入,但是相比较效率会下降

二.使用python脚本进行导入

        使用时,可根据需求进行相应的修改,需要环境python3,以及安装pymysql库

import csv
import pymysql

# 数据库连接参数
db_config = {
    'host': 'your_cloud_db_host',
    'user': 'your_username',
    'password': 'your_password',
    'database': 'your_database',
    'charset': 'utf8mb4',
    'cursorclass': pymysql.cursors.DictCursor
}

# CSV 文件路径
csv_file_path = 'CSV文件路径'

# 插入语句,字段名和占位符要对应
insert_sql = """
INSERT INTO tb1 (
    PassengerId, Survived, Pclass, Name, Sex, Age, SibSp, Parch, Ticket, Fare, Cabin, Embarked
) VALUES (
    %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s
)
"""

def main():
    connection = pymysql.connect(**db_config)
    try:
        with connection.cursor() as cursor, open(csv_file_path, 'r', encoding='utf-8') as f:
            reader = csv.reader(f)
            header = next(reader)  # 跳过表头

            batch_size = 1000
            batch = []

            for row in reader:
                # 处理空值,转换数据类型等(根据需要调整)
                # 这里简单示范,空字符串转 None
                row = [None if x == '' else x for x in row]

                # 转换数值字段类型(示例)
                # csv中默认为字符串,根据需要进行转换
                row[0] = int(row[0]) if row[0] is not None else None  
                row[1] = int(row[1]) if row[1] is not None else None  
                row[2] = int(row[2]) if row[2] is not None else None  
                # Name, Sex 保持字符串
                row[5] = float(row[5]) if row[5] is not None else None 
                row[6] = int(row[6]) if row[6] is not None else None  
                row[7] = int(row[7]) if row[7] is not None else None 
                # Ticket 字符串
                row[9] = float(row[9]) if row[9] is not None else None 
                # Cabin, Embarked 字符串

                batch.append(row)

                if len(batch) >= batch_size:
                    cursor.executemany(insert_sql, batch)
                    connection.commit()
                    batch.clear()

            # 插入剩余数据
            if batch:
                cursor.executemany(insert_sql, batch)
                connection.commit()

        print("数据导入完成!")

    except Exception as e:
        print("导入出错:", e)
    finally:
        connection.close()

if __name__ == '__main__':
    main()