老规矩还是先分好三步,获取数据,解析数据,存储数据
因为股票是实时的,所以要加个cookie值,最好分线程或者爬取数据时等待爬取,不然会封ip
废话不多数,直接上代码
import matplotlib
import requests
import time
import schedule
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
matplotlib.use('TkAgg')
def get_xinlang_data():
headers = {
'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Mobile Safari/537.36 Edg/134.0.0.0',
'Referer': 'https://vip.stock.finance.sina.com.cn/mkt/',
'Cookie': 'UOR=cn.bing.com,k.sina.com.cn,; SINAGLOBAL=118.254.108.55_1712628055.839346; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%22192dc63c23a4cc-0d88f469598c1d8-4c657b58-1327104-192dc63c23b16f0%22%2C%22first_id%22%3A%22%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E5%BC%95%E8%8D%90%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC%22%2C%22%24latest_referrer%22%3A%22https%3A%2F%2Fwww.nowcoder.com%2F%22%7D%2C%22identities%22%3A%22eyIkaWRlbnRpdHlfY29va2llX2lkIjoiMTkyZGM2M2MyM2E0Y2MtMGQ4OGY0Njk1OThjMWQ4LTRjNjU3YjU4LTEzMjcxMDQtMTkyZGM2M2MyM2IxNmYwIn0%3D%22%2C%22history_login_id%22%3A%7B%22name%22%3A%22%22%2C%22value%22%3A%22%22%7D%2C%22%24device_id%22%3A%22192dc63c23a4cc-0d88f469598c1d8-4c657b58-1327104-192dc63c23b16f0%22%7D; SFA_version8.9.0=2025-03-19%2019%3A06; SR_SEL=1_511; vjuids=-7b64b517d.195ae26e9d8.0.8336211e80edc; vjlast=1742383541; FIN_ALL_VISITED=sh600519%2Csh600010; FINA_V_S_2=sh600519,sh600010; SGUID=1742386149579_47511117; _clck=xb0ipd%7C2%7Cfuc%7C0%7C1904; SFA_version8.10.0=2025-04-04%2010%3A09; Apache=175.2.169.138_1743732843.387460; SFA_version8.10.0_click=1; hqEtagMode=1; ULV=1743732861648:8:2:2:175.2.169.138_1743732843.387460:1743732842772; rotatecount=2'
}
all_data = []
for page in range(1, 14):
url = f'https://vip.stock.finance.sina.com.cn/quotes_service/api/json_v2.php/Market_Center.getHKStockData?page={page}&num=40&sort=symbol&asc=1&node=sgt_hk&_s_r_a=page'
response = requests.get(url, headers=headers)
# 检查请求是否成功
response.raise_for_status()
data = response.json()
all_data.extend(data)
# 将数据转换为 DataFrame
df = pd.DataFrame(all_data)
print(df)
# 保存数据到 CSV 文件
df.to_csv('xinlang.csv', index=False, encoding='utf-8')
print(f'Data saved to xinlang.csv')
def job():
print('Fetching and saving data...')
get_xinlang_data()
# 绘制图形
def draw():
# 读取 CSV 文件
df = pd.read_csv('xinlang.csv')
# 设置图形大小
plt.figure(figsize=(14, 8))
# 设置支持中文的字体
rcParams['font.sans-serif'] = ['SimHei'] # 或 'Noto Sans CJK SC'
rcParams['axes.unicode_minus'] = False # 解决负号显示问题
# 绘制 changepercent
plt.subplot(2, 2, 1)
plt.bar(df['name'], df['changepercent'], color='blue')
plt.title('Change Percent')
plt.xlabel('Stock Name')
plt.ylabel('Change Percent')
plt.xticks(rotation=90)
plt.tight_layout()
# 绘制 buy
plt.subplot(2, 2, 2)
plt.bar(df['name'], df['buy'], color='green')
plt.title('Buy Volume')
plt.xlabel('Stock Name')
plt.ylabel('Buy Volume')
plt.xticks(rotation=90)
plt.tight_layout()
# 显示图形
plt.show()
if __name__ == "__main__":
job()
draw()
## 设置定时任务,每天10点进行爬取
schedule.every().day.at("10:00").do(job)
# 实现任务定时调度
while True:
schedule.run_pending()
time.sleep(1)
注意:绘制那儿有点问题,我还没弄懂绘制图形怎么弄的,所以大家可以把draw函数先屏蔽掉,或者有人会的话可以评论指导一下。