一.使用命令行进行导入
LOAD DATA LOCAL INFILE '/data/home/narieliu/titanic.csv'
INTO TABLE tb1
FIELDS TERMINATED BY ','
ENCLOSEDBY '"'
LINES TERMINATED BY '\n'
IGNORE 1 LINES;
加local是在客户端导入,不加是在服务端导入
客户端因为权限无法导入的可以使用脚本进行导入,但是相比较效率会下降
二.使用python脚本进行导入
使用时,可根据需求进行相应的修改,需要环境python3,以及安装pymysql库
import csv
import pymysql
# 数据库连接参数
db_config = {
'host': 'your_cloud_db_host',
'user': 'your_username',
'password': 'your_password',
'database': 'your_database',
'charset': 'utf8mb4',
'cursorclass': pymysql.cursors.DictCursor
}
# CSV 文件路径
csv_file_path = 'CSV文件路径'
# 插入语句,字段名和占位符要对应
insert_sql = """
INSERT INTO tb1 (
PassengerId, Survived, Pclass, Name, Sex, Age, SibSp, Parch, Ticket, Fare, Cabin, Embarked
) VALUES (
%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s
)
"""
def main():
connection = pymysql.connect(**db_config)
try:
with connection.cursor() as cursor, open(csv_file_path, 'r', encoding='utf-8') as f:
reader = csv.reader(f)
header = next(reader) # 跳过表头
batch_size = 1000
batch = []
for row in reader:
# 处理空值,转换数据类型等(根据需要调整)
# 这里简单示范,空字符串转 None
row = [None if x == '' else x for x in row]
# 转换数值字段类型(示例)
# csv中默认为字符串,根据需要进行转换
row[0] = int(row[0]) if row[0] is not None else None
row[1] = int(row[1]) if row[1] is not None else None
row[2] = int(row[2]) if row[2] is not None else None
# Name, Sex 保持字符串
row[5] = float(row[5]) if row[5] is not None else None
row[6] = int(row[6]) if row[6] is not None else None
row[7] = int(row[7]) if row[7] is not None else None
# Ticket 字符串
row[9] = float(row[9]) if row[9] is not None else None
# Cabin, Embarked 字符串
batch.append(row)
if len(batch) >= batch_size:
cursor.executemany(insert_sql, batch)
connection.commit()
batch.clear()
# 插入剩余数据
if batch:
cursor.executemany(insert_sql, batch)
connection.commit()
print("数据导入完成!")
except Exception as e:
print("导入出错:", e)
finally:
connection.close()
if __name__ == '__main__':
main()