selenium webdriver使用

发布于:2025-05-22 ⋅ 阅读:(19) ⋅ 点赞:(0)

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
import time
import requests
from urllib.parse import urlparse
import os
from lxml import etree
from urllib.parse import urljoin


def get_pdf(cur_url):
    proxies={'http':'192.168.1.122:1080','https':'192.168.1.122:1080'}
    headers={'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0',\
             'referer':'https://endeavor.dragonforms.com/'}

    d=webdriver.Chrome()

    d.get(cur_url)
    d.execute_script("window.scrollTo(0, document.body.scrollHeight/2+200);")
    time.sleep(10)


    frame_id=d.find_element(By.XPATH,'//div[@class="embed center fullWidth" or @class="embed fullWidth"]//iframe').get_attribute('id')
    iframe = d.find_element(By.ID, frame_id)
    d.switch_to.frame(iframe)


    d.find_element(By.XPATH,'//input[@id="id13"]').send_keys('cdg19880415@gmaill.com')
    d.find_element(By.XPATH,'//input[@id="id1"]').send_keys('chen')
    d.find_element(By.XPATH,'//input[@id="id2"]').send_keys('chen')
    d.find_element(By.XPATH,'//input[@id="id10"]').send_keys('beijing')
    try:
        d.find_element(By.XPATH,'//input[@id="id4"]').send_keys('chen')
    except Exception as e:
        pass
    d.find_element(By.XPATH,'//input[@id="id3"]').send_keys('beijing')
    try:
        d.find_element(By.XPATH,'//input[@id="id6"]').send_keys('beijing')
    except Exception as e:
        pass

    try:
        d.find_element(By.XPATH,'//input[@id="id9"]').send_keys('101300')
    except Exception as e:
        pass

    try:
        d.find_element(By.XPATH,'//input[@id="id11"]').send_keys('18518076020')
    except Exception as e:
        pass

    #####多选框城市和
    select_element = d.find_element(By.ID, "id7")
    select = Select(select_element)
    #select.select_by_index(2)
    select.select_by_visible_text("CHINA")

    select_element = d.find_element(By.ID, "id8")
    select = Select(select_element)
    select.select_by_visible_text("FOREIGN")

    try:
        select_element = d.find_element(By.ID, "id5082617")
        select = Select(select_element)
        select.select_by_visible_text("No")
    except Exception as e:
        pass

    try:
        select_element = d.find_element(By.ID, "id5082616")
        select = Select(select_element)
        select.select_by_visible_text("No")
    except Exception as e:
        pass


    time.sleep(5)

    d.switch_to.default_content()


    d.execute_script("window.scrollTo(0, document.body.scrollHeight/2+200);")
    d.switch_to.frame(iframe)
    d.find_element(By.ID,"custombtn").click()
    d.switch_to.default_content()
    d.execute_script("window.scrollTo(0, document.body.scrollHeight/2-600);")
    d.switch_to.frame(iframe)
    url=d.find_element(By.XPATH,'//div[@class="downloadReport-btn"]/a').get_attribute('href')


    parsed_url = urlparse(url)
    pdf_name = os.path.basename(parsed_url.path)
    f=open(pdf_name,'wb+')
    f.write(requests.get(url,proxies=proxies,headers=headers).content)
    f.close()

if __name__=="__main__":
    #headers={'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0'}
    #proxies={'http':'192.168.1.122:1080','https':'192.168.1.122:1080'}
    d=webdriver.Chrome()
    d.get("https://www.militaryaerospace.com/white-papers")
    d.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)
    d.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)
    d.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)
    d.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)
    d.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)
    d.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)
    d.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)
    d.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)
    d.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    eles=d.find_elements(By.XPATH,'//div[@class="items-wrapper"]//a[@class="title-wrapper"]')
    
    #html=etree.HTML(requests.get('https://www.militaryaerospace.com/white-papers',headers=headers,proxies=proxies).text)
    #url_list=html.xpath('//div[@class="items-wrapper"]//a[@class="title-wrapper"]/@href')
    for ele in eles:
        for i in range(3):
            try:
                cur_url=ele.get_attribute('href')
                #cur_url=urljoin('https://www.militaryaerospace.com/white-papers',cur_url)
                get_pdf(cur_url)
                break
            except Exception as e:
                continue
    #cur_url='https://www.militaryaerospace.com/white-papers/whitepaper/55129368/next-level-testing-the-role-of-lvdt-rvdt-resolver-simulation'
    #get_pdf(cur_url)


网站公告

今日签到

点亮在社区的每一天
去签到