获取数据
requests爬取天气网站的html信息
def getHtmlText(url):
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
'95.0.4638.54 Safari/537.36 Edg/95.0.1020.30'}
resp = requests.get(url, timeout=30, headers=headers)
resp.raise_for_status()
resp.encoding = resp.status_code
respText = resp.text
resp.close()
return respText
except:
return False
re正则表达式处理html数据
for provincialCapital in provincialCapitalList:
if provincialCapital not in specialList:
provincialCapital = p.get_pinyin(provincialCapital,'')
url = f"https://www.tianqi.com/{provincialCapital}"
text = re.finditer(r'<p class="now"><b>(?P<temperature>.*?)</b><i>℃</i></p>',getHtmlText(url),re.S)
for it in text:
temperatureList.append(int(it.group('temperature')))
else:
if provincialCapital == '澳门':
url = "http://www.weather.com.cn/weather/101330101.shtml"
text = re.finditer(r'<div class="con today clearfix">.*?<p class="tem">.*?<i>(?P<temperature>.*?)℃',
getHtmlText(url), re.S)
for it in text:
temperatureList.append(int(it.group('temperature')))
if provincialCapital == '上海':
url = "http://www.weather.com.cn/weather/101020100.shtml"
text = re.finditer(r'<div class="con today clearfix">.*?<p class="tem">.*?<i>(?P<temperature>.*?)℃',
getHtmlText(url), re.S)
for it in text:
temperatureList.append(int(it.group('temperature')))
if provincialCapital == '天津':
url = "http://www.weather.com.cn/weather/101030100.shtml"
text = re.finditer(r'<div class="con today clearfix">.*?<p class="tem">.*?<i>(?P<temperature>.*?)℃',
getHtmlText(url), re.S)
for it in text:
temperatureList.append(int(it.group('temperature')))
if provincialCapital == '北京':
url = "http://www.weather.com.cn/weather/101010100.shtml"
text = re.finditer(r'<div class="con today clearfix">.*?<p class="tem">.*?<i>(?P<temperature>.*?)℃',
getHtmlText(url), re.S)
for it in text:
temperatureList.append(int(it.group('temperature')))
if provincialCapital == '香港':
url = "http://www.weather.com.cn/weather/101320101.shtml"
text = re.finditer(r'<div class="con today clearfix">.*?<p class="tem">.*?<i>(?P<temperature>.*?)℃',
getHtmlText(url), re.S)
for it in text:
temperatureList.append(int(it.group('temperature')))
可视化数据
Map方法将数据可视化并在当前目录下生产html文件
c = (
Map(init_opts=opts.InitOpts(width="1000px", height="600px")) # 可切换主题
.set_global_opts(
title_opts=opts.TitleOpts(title=f"{time}全国省会各地温度"),
visualmap_opts=opts.VisualMapOpts(
min_=minTemperature,
max_=minTemperature+20,
range_text=['实时温度区间', ''], # 分区间
is_piecewise=True, # 定义图例为分段型,默认为连续的图例
pos_top="middle", # 分段位置
pos_left="left",
orient="vertical",
split_number=10 # 分成10个区间
)
)
.add("实时温度", printList, maptype="china")
.render("温度.html")
)
效果如图
全部代码如下
from pyecharts.charts import Map
from pyecharts import options as opts
import requests
import re
from xpinyin import Pinyin
from datetime import datetime
def getHtmlText(url):
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
'95.0.4638.54 Safari/537.36 Edg/95.0.1020.30'}
resp = requests.get(url, timeout=30, headers=headers)
resp.raise_for_status()
resp.encoding = resp.status_code
respText = resp.text
resp.close()
return respText
except:
return False
def getMap():
provincialCapitalList = ['乌鲁木齐','拉萨','西宁','兰州','成都','昆明','银川','西安','重庆','贵阳',
'南宁','海口','台北','广州','福州','长沙','南昌','杭州','武汉','合肥','南京',
'郑州','太原','石家庄','济南','呼和浩特','沈阳','长春','哈尔滨','上海','天津',
'北京','澳门','香港']
provinceList = ['新疆','西藏','青海','甘肃','四川','云南','宁夏','陕西','重庆','贵州',
'广西','海南','台湾','广东','福建','湖南','江西','浙江','湖北','安徽',
'江苏','河南','山西','河北','山东','内蒙古','辽宁','吉林','黑龙江',
'上海','天津','北京','澳门','香港']
specialList = ['上海','天津','北京','澳门','香港']
p = Pinyin()
temperatureList = []
for provincialCapital in provincialCapitalList:
if provincialCapital not in specialList:
provincialCapital = p.get_pinyin(provincialCapital,'')
url = f"https://www.tianqi.com/{provincialCapital}"
text = re.finditer(r'<p class="now"><b>(?P<temperature>.*?)</b><i>℃</i></p>',getHtmlText(url),re.S)
for it in text:
temperatureList.append(int(it.group('temperature')))
else:
if provincialCapital == '澳门':
url = "http://www.weather.com.cn/weather/101330101.shtml"
text = re.finditer(r'<div class="con today clearfix">.*?<p class="tem">.*?<i>(?P<temperature>.*?)℃',
getHtmlText(url), re.S)
for it in text:
temperatureList.append(int(it.group('temperature')))
if provincialCapital == '上海':
url = "http://www.weather.com.cn/weather/101020100.shtml"
text = re.finditer(r'<div class="con today clearfix">.*?<p class="tem">.*?<i>(?P<temperature>.*?)℃',
getHtmlText(url), re.S)
for it in text:
temperatureList.append(int(it.group('temperature')))
if provincialCapital == '天津':
url = "http://www.weather.com.cn/weather/101030100.shtml"
text = re.finditer(r'<div class="con today clearfix">.*?<p class="tem">.*?<i>(?P<temperature>.*?)℃',
getHtmlText(url), re.S)
for it in text:
temperatureList.append(int(it.group('temperature')))
if provincialCapital == '北京':
url = "http://www.weather.com.cn/weather/101010100.shtml"
text = re.finditer(r'<div class="con today clearfix">.*?<p class="tem">.*?<i>(?P<temperature>.*?)℃',
getHtmlText(url), re.S)
for it in text:
temperatureList.append(int(it.group('temperature')))
if provincialCapital == '香港':
url = "http://www.weather.com.cn/weather/101320101.shtml"
text = re.finditer(r'<div class="con today clearfix">.*?<p class="tem">.*?<i>(?P<temperature>.*?)℃',
getHtmlText(url), re.S)
for it in text:
temperatureList.append(int(it.group('temperature')))
printList = [list(data) for data in zip(provinceList, temperatureList)]
minTemperature = min(temperatureList)
time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
c = (
Map(init_opts=opts.InitOpts(width="1000px", height="600px"))
.set_global_opts(
title_opts=opts.TitleOpts(title=f"{time}全国省会各地温度"),
visualmap_opts=opts.VisualMapOpts(
min_=minTemperature,
max_=minTemperature+20,
range_text=['实时温度区间', ''],
is_piecewise=True,
pos_top="middle",
pos_left="left",
orient="vertical",
split_number=10
)
)
.add("实时温度", printList, maptype="china")
.render("温度.html")
)
if __name__ == "__main__":
getMap()