新浪财经股票每天10点自动爬取

发布于:2025-04-06 ⋅ 阅读:(18) ⋅ 点赞:(0)

老规矩还是先分好三步,获取数据,解析数据,存储数据

因为股票是实时的,所以要加个cookie值,最好分线程或者爬取数据时等待爬取,不然会封ip

废话不多数,直接上代码

import matplotlib
import requests
import time
import schedule
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
matplotlib.use('TkAgg')
def get_xinlang_data():
    headers = {
        'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Mobile Safari/537.36 Edg/134.0.0.0',
        'Referer': 'https://vip.stock.finance.sina.com.cn/mkt/',
        'Cookie': 'UOR=cn.bing.com,k.sina.com.cn,; SINAGLOBAL=118.254.108.55_1712628055.839346; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%22192dc63c23a4cc-0d88f469598c1d8-4c657b58-1327104-192dc63c23b16f0%22%2C%22first_id%22%3A%22%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E5%BC%95%E8%8D%90%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC%22%2C%22%24latest_referrer%22%3A%22https%3A%2F%2Fwww.nowcoder.com%2F%22%7D%2C%22identities%22%3A%22eyIkaWRlbnRpdHlfY29va2llX2lkIjoiMTkyZGM2M2MyM2E0Y2MtMGQ4OGY0Njk1OThjMWQ4LTRjNjU3YjU4LTEzMjcxMDQtMTkyZGM2M2MyM2IxNmYwIn0%3D%22%2C%22history_login_id%22%3A%7B%22name%22%3A%22%22%2C%22value%22%3A%22%22%7D%2C%22%24device_id%22%3A%22192dc63c23a4cc-0d88f469598c1d8-4c657b58-1327104-192dc63c23b16f0%22%7D; SFA_version8.9.0=2025-03-19%2019%3A06; SR_SEL=1_511; vjuids=-7b64b517d.195ae26e9d8.0.8336211e80edc; vjlast=1742383541; FIN_ALL_VISITED=sh600519%2Csh600010; FINA_V_S_2=sh600519,sh600010; SGUID=1742386149579_47511117; _clck=xb0ipd%7C2%7Cfuc%7C0%7C1904; SFA_version8.10.0=2025-04-04%2010%3A09; Apache=175.2.169.138_1743732843.387460; SFA_version8.10.0_click=1; hqEtagMode=1; ULV=1743732861648:8:2:2:175.2.169.138_1743732843.387460:1743732842772; rotatecount=2'
    }
    all_data = []
    for page in range(1, 14):
        url = f'https://vip.stock.finance.sina.com.cn/quotes_service/api/json_v2.php/Market_Center.getHKStockData?page={page}&num=40&sort=symbol&asc=1&node=sgt_hk&_s_r_a=page'
        response = requests.get(url, headers=headers)

        # 检查请求是否成功
        response.raise_for_status()
        data = response.json()
        all_data.extend(data)
    # 将数据转换为 DataFrame
    df = pd.DataFrame(all_data)
    print(df)
    # 保存数据到 CSV 文件
    df.to_csv('xinlang.csv', index=False, encoding='utf-8')
    print(f'Data saved to xinlang.csv')
def job():
    print('Fetching and saving data...')
    get_xinlang_data()

# 绘制图形
def draw():

    # 读取 CSV 文件
    df = pd.read_csv('xinlang.csv')

    # 设置图形大小
    plt.figure(figsize=(14, 8))

    # 设置支持中文的字体
    rcParams['font.sans-serif'] = ['SimHei']  # 或 'Noto Sans CJK SC'
    rcParams['axes.unicode_minus'] = False  # 解决负号显示问题

    # 绘制 changepercent
    plt.subplot(2, 2, 1)
    plt.bar(df['name'], df['changepercent'], color='blue')
    plt.title('Change Percent')
    plt.xlabel('Stock Name')
    plt.ylabel('Change Percent')
    plt.xticks(rotation=90)
    plt.tight_layout()

    # 绘制 buy
    plt.subplot(2, 2, 2)
    plt.bar(df['name'], df['buy'], color='green')
    plt.title('Buy Volume')
    plt.xlabel('Stock Name')
    plt.ylabel('Buy Volume')
    plt.xticks(rotation=90)
    plt.tight_layout()

    # 显示图形
    plt.show()
if __name__ == "__main__":
     job()
    draw()

## 设置定时任务,每天10点进行爬取
schedule.every().day.at("10:00").do(job)

# 实现任务定时调度
 while True:
     schedule.run_pending()
     time.sleep(1)

注意:绘制那儿有点问题,我还没弄懂绘制图形怎么弄的,所以大家可以把draw函数先屏蔽掉,或者有人会的话可以评论指导一下。


网站公告

今日签到

点亮在社区的每一天
去签到