运行代码时,它会打开斗鱼网站并逐个打印每个房间的相关信息 打印出每个房间的标题、类型、所有者、观看人数和封面图片
import time
from selenium import webdriver
class Douyu(object):
def __init__(self):
self.url = 'https://www.douyu.com/directory/all'
self.driver = webdriver.Edge()
def parse_data(self):
time.sleep(3)
room_list = self.driver.find_elements_by_xpath('//*[@id="listAll"]/section[2]/div[2]/ul/li/div')
# print(len(room_list))
# 遍历
data_list = []
for room in room_list:
temp = {}
temp['title'] = room.find_element_by_xpath('./a/div[2]/div[1]/h3').text
temp['type'] = room.find_element_by_xpath('./a/div[2]/div[1]/span').text
temp['owner'] = room.find_element_by_xpath('./a/div[2]/div[2]/h2').text
temp['num'] = room.find_element_by_xpath('./a/div[2]/div[2]/span').text
temp['picture'] = room.find_element_by_xpath('./a/div[1]/div[1]/picture/img').get_attribute('src')
# print(temp)
data_list.append(temp)
return data_list
def save_data(self, data_list):
for data in data_list:
print((data))
def run(self):
# url
# driver
# get
self.driver.get(self.url)
while True:
# parse
data_list = self.parse_data()
# save
self.save_data(data_list)
# next page
# try:
el_next = self.driver.find_element_by_xpath('//*[@class= "dy-Pagination-next"]')
self.driver.execute_script('scrollTo(0,10000000)')
el_next.click()
# except Exception:
# break
#
if __name__ == '__main__':
douyu = Douyu()
douyu.run()