python selenium 打开网页

发布于:2024-07-04 ⋅ 阅读:(23) ⋅ 点赞:(0)

selenium工具类 - 文件名 seleniumkit.py

代码如下

# -*- coding:utf-8 _*-

from selenium import webdriver

import os
import time

from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By

from selenium.webdriver.chrome.service import Service



# 打开网页,会呼出浏览器
def seleniumOpenUrl(initUrl):
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument('---ignore-certificate-errors-spki-list')
    chrome_options.add_argument('--ignore-ssl-errors')
    chrome_options.add_argument('--ignore-ssl-error')
    chrome_options.add_argument('log-level=2')


    # 解决谷歌堆栈溢出问题
    chrome_options.add_experimental_option('useAutomationExtension', False)

    # https://blog.csdn.net/huashao0602/article/details/124322123
    # 解决遇到反爬虫 获取不到数据问题 Selenium绕过检测 & 隐藏特征
    chrome_options.add_argument("--disable-blink-features=AutomationControlled")

    # chromedriver.exe 放在根目录下
    path = os.path.join(os.getcwd(), 'chromedriver.exe')
    print('chromePath:', path)
    
    service = Service(executable_path=path)
    driver = webdriver.Chrome(options=chrome_options,service=service)

    driver.get(initUrl)

    driver.maximize_window()
    return driver


# 打开网页,无浏览器模式
def seleniumOpenUrlHeadless(initUrl):
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument('---ignore-certificate-errors-spki-list')
    chrome_options.add_argument('--ignore-ssl-errors')
    chrome_options.add_argument('--ignore-ssl-error')
    chrome_options.add_argument('log-level=2')

    # 解决谷歌堆栈溢出问题
    chrome_options.add_experimental_option('useAutomationExtension', False)

    # https://blog.csdn.net/huashao0602/article/details/124322123
    # 解决遇到反爬虫 获取不到数据问题 Selenium绕过检测 & 隐藏特征
    chrome_options.add_argument("--disable-blink-features=AutomationControlled")
    # chromedriver.exe 放在根目录下
    path = os.path.join(os.getcwd(), 'chromedriver.exe')
    print('chromePath:', path)
    browser_locale = 'fr'
    chrome_options.add_argument("--lang={}".format(browser_locale))
    chrome_options.add_argument("--headless")
    chrome_options.add_argument('headless')
    service = Service(executable_path=path)
    driver = webdriver.Chrome(options=chrome_options, service=service)
    driver.get(initUrl)
    driver.maximize_window()
    return driver

测试打开百度 - test.py

# -*- coding:utf-8 _*-

from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from seleniumkit import *


def openBaiDu():
    initUrl = "https://www.baidu.com/"
    driver = seleniumOpenUrl(initUrl)
    return driver

if __name__ == "__main__":
    openBaiDu()
    time.sleep(10)

打开百度的效果

在这里插入图片描述


网站公告

今日签到

点亮在社区的每一天
去签到