import requests
from lxml import etree
import redis
r = redis.Redis(host="localhost", port=6379, db=0)
def get_page_content(url):
url_lists = []
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
}
response = requests.get(url, headers=headers, verify=False)
if response.status_code == 200:
html = etree.HTML(response.content.decode())
if html is not None:
a_urls = html.xpath("//a")
for a in a_urls:
title = "".join(a.xpath(".//text()")).strip()
a_urls = a.xpath("./@href")
if len(a_urls) > 0:
a_url = a_urls[0]
url_lists.append({"url": a_url, "title": title})
return url_lists
def monitor_website(url, interval=7200):
while True:
time.sleep(interval)
new_content = get_page_content(url)
changes = []
if new_content:
for item in new_content:
if r.sismember("myset", item["url"]):
print("URL 已经存在,跳过")
else:
r.sadd("myset", item["url"])
changes.append(item)
if len(changes) > 0:
print(f"{url}网站已更新,新增数据为{changes}")
bot(f"{url}网站已更新,新增数据为{changes}")
def bot(text):
message_data = {
"msgtype": "text",
"text": {
"content": text,
"mentioned_list": ["@all"]
}
}
robot_webhook = 'https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=你的key'
response = requests.post(robot_webhook, headers={"Content-Type": "application/json"}, data=json.dumps(message_data))
if response.status_code == 200:
print("消息发送成功")
else:
print(f"消息发送失败,状态码:{response.status_code}")