爬取网站链接:https://lishi.tianqi.com/xiamen/202312.html
爬取了厦门市2023年一整年的天气数据,包括最高温,最低温,天气,风力风向等
爬虫代码:
import requests
import pandas as pd
import csv
from pyecharts.charts import Bar,Timeline
import pyecharts.options as opts
import parsel
f = open('天气数据.csv',mode='w',encoding='utf-8',newline='')
csv_writer = csv.DictWriter(f,fieldnames=['日期','星期','最高温','最低温','天气','风向','风力'])
csv_writer.writeheader()
cookies = {
'cityPy': 'xiamen',
'cityPy_expire': '1721098187',
'UserId': '17204933865319972',
'Hm_lvt_ab6a683aa97a52202eab5b3a9042a8d2': '1720493387',
'HMACCOUNT': '4A9167DA75AB7059',
'Hm_lpvt_ab6a683aa97a52202eab5b3a9042a8d2': '1720493402',
'Hm_lvt_7c50c7060f1f743bccf8c150a646e90a': '1720493523',
'Hm_lvt_30606b57e40fddacb2c26d2b789efbcb': '1720493529',
'Hm_lpvt_30606b57e40fddacb2c26d2b789efbcb': '1720493529',
'Hm_lpvt_7c50c7060f1f743bccf8c150a646e90a': '1720493646',
}
headers = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'accept-language': 'zh-CN,zh;q=0.9',
'cache-control': 'no-cache',
# 'cookie': 'cityPy=xiamen; cityPy_expire=1721098187; UserId=17204933865319972; Hm_lvt_ab6a683aa97a52202eab5b3a9042a8d2=1720493387; HMACCOUNT=4A9167DA75AB7059; Hm_lpvt_ab6a683aa97a52202eab5b3a9042a8d2=1720493402; Hm_lvt_7c50c7060f1f743bccf8c150a646e90a=1720493523; Hm_lvt_30606b57e40fddacb2c26d2b789efbcb=1720493529; Hm_lpvt_30606b57e40fddacb2c26d2b789efbcb=1720493529; Hm_lpvt_7c50c7060f1f743bccf8c150a646e90a=1720493646',
'pragma': 'no-cache',
'priority': 'u=0, i',
'referer': 'https://lishi.tianqi.com/xiamen/202302.html',
'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="126", "Google Chrome";v="126"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'same-origin',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
}
for i in range(1,13):
if i < 10:
i = '0'+str(i)
url = f'https://lishi.tianqi.com/xiamen/2023{i}.html'
response = requests.get(url=url, cookies=cookies, headers=headers)
response.encoding=response.apparent_encoding
html = response.text
selector = parsel.Selector(html)
li_list = selector.css('div.tian_three ul.thrui li')
for li in li_list:
date_time = li.css('div.th200::text').get().split(' ')[0]
xingqi_time = li.css('div.th200::text').get().split(' ')[1]
all_list = li.css('div.th140::text').getall()
max_temp = all_list[0]
min_temp = all_list[1]
weather = all_list[2]
wind_orient = all_list[3].split(' ')[0]
wind_rank = all_list[3].split(' ')[1]
dit = {
'日期': date_time,
'星期': xingqi_time,
'最高温': max_temp,
'最低温': min_temp,
'天气': weather,
'风向': wind_orient,
'风力': wind_rank,
}
csv_writer.writerow(dit)
print(date_time,xingqi_time,max_temp,min_temp,weather,wind_orient,wind_rank)
保存到csv文件:
接下来读取文件,对文件格式做调整,以进行绘图操作。
df = pd.read_csv('天气数据.csv')
print(df.info())
df['日期'] = pd.to_datetime(df['日期'])
df['month_'] = df['日期'].dt.to_period('M')
new_data = df.groupby(['month_','天气']).size().reset_index()
new_data.columns = ['month','weather','count'] #改变列名
print(df.head)
print(new_data)
print(new_data[new_data['month']=='2023-01'][['weather','count']].sort_values(by='count',ascending=False).values.tolist()) # 条件筛选,获取一月份的天气和数量值,获取的是数据框格式
#.sort_values(by='count',ascending=False).values.tolist() 这段代码是按升序排序,获取值并转变为双列表格式
# [['多云', 14], ['小雨', 5], ['晴', 5], ['雾', 4], ['阴', 2], ['中雨', 1]]
new_data数据格式如下:
现在进行绘图操作:
# 实例化一个时间序列的对象
timeline = Timeline()
timeline.add_schema(play_interval=1000) #单位是毫秒
for month in new_data['month'].unique():
data = (
new_data[new_data['month'] == month][['weather', 'count']].sort_values(by='count',
ascending=False).values.tolist()
)
print(data)
bar = (
Bar()
.add_xaxis([x[0] for x in data])
.add_yaxis('',[x[1] for x in data])
.reversal_axis() #坐标轴倒转
.set_global_opts(
title_opts=opts.TitleOpts(title='厦门市2023年每月天气变化')
)
.set_series_opts(
label_opts=opts.LabelOpts(position='right')
)
)
timeline.add(bar,f'{month}')
timeline.render('天气轮播图.html')
点击轮播图下方的按钮就可以进行播放了。