selenium获取webdriver,并规避反爬

发布于:2024-12-21 ⋅ 阅读:(15) ⋅ 点赞:(0)
#pip install selenium-stealth
from selenium_stealth import stealth
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.chrome.options import Options

def create_selenium_browser(download_path=None):
    # chrome_options = Options()
    # chrome_options.add_argument('--no-sandbox')
    # chrome_options.add_argument('--disable-dev-shm-usage')
    # chrome_options.add_argument('--headless')
    # chrome_options.add_argument("window-size=1920,4080")
    # chrome_options.add_argument('disable-infobars')
    # if download_path:
    #     chrome_options.add_experimental_option("prefs", {
    #     "download.default_directory": download_path,
    #     "download.prompt_for_download": False, # 不弹框
    #     })
    # chrome_options.page_load_strategy = 'eager'
    # browser = webdriver.Chrome(chrome_options=chrome_options)
    user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) ' \
              'Chrome/123.0.0.0 Safari/537.36'
    options = Options()
    options.add_argument('--headless')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    options.add_argument('window-size=1920,4080')
    options.add_argument(f'user-agent={user_agent}')
    options.page_load_strategy = 'eager'
    driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()),options=options)
    # 使用 selenium-stealth 工具,可以动态修改 Selenium 的特征,降低被检测的风险
    stealth(browser,
        languages=["en-US", "en"],
        vendor="Google Inc.",
        platform="Win32",
        webgl_vendor="Intel Inc.",
        renderer="Intel Iris OpenGL Engine",
        fix_hairline=True)    
    return driver

以上代码获取到webdriver,并添加selenium-stealth规避反爬。