Selenium 无头浏览器模式 | 菜鸟教程,建议先看完前面这篇文章,再看我下面写的这篇。因为前面那一篇是基础,下面这一篇是扩展延申。
一、什么是无头浏览器模式?
无头浏览器(Headless Browser)是指在没有图形用户界面(GUI)的情况下运行的浏览器。它执行所有与常规浏览器相同的操作,但不显示任何可视化界面。
主要优势:
更高的性能:不需要渲染UI,节省了大量系统资源
更快的执行速度:比有界面模式快20-30%
适合服务器环境:可以在没有显示器的服务器上运行
更好的并行化:可以轻松运行多个无头浏览器实例
减少干扰:不会弹出浏览器窗口干扰其他工作
二、各浏览器的无头模式配置
1. Chrome/Chromium 无头模式
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
# 创建Chrome选项
chrome_options = Options()
# 启用无头模式(新版Chrome推荐方式)
chrome_options.add_argument("--headless=new")
# 或者使用传统方式(旧版Chrome)
# chrome_options.add_argument("--headless")
# 推荐的无头模式配置
chrome_options.add_argument("--no-sandbox") # 在Linux环境下必需
chrome_options.add_argument("--disable-dev-shm-usage") # 解决共享内存问题
chrome_options.add_argument("--disable-gpu") # 禁用GPU加速(某些系统需要)
chrome_options.add_argument("--window-size=1920,1080") # 设置窗口大小
# 创建无头浏览器实例
driver = webdriver.Chrome(options=chrome_options)
# 使用示例
driver.get("https://www.example.com")
print(f"页面标题: {driver.title}")
driver.quit()
2. Firefox 无头模式
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
# 创建Firefox选项
firefox_options = Options()
# 启用无头模式
firefox_options.add_argument("-headless")
# 或者使用属性设置(推荐)
firefox_options.headless = True
# 推荐配置
firefox_options.add_argument("--width=1920")
firefox_options.add_argument("--height=1080")
# 创建无头浏览器实例
driver = webdriver.Firefox(options=firefox_options)
# 使用示例
driver.get("https://www.example.com")
print(f"页面标题: {driver.title}")
driver.quit()
3. Edge 无头模式
from selenium import webdriver
from selenium.webdriver.edge.options import Options
# 创建Edge选项
edge_options = Options()
# 启用无头模式
edge_options.add_argument("--headless")
# 推荐配置
edge_options.add_argument("--disable-gpu")
edge_options.add_argument("--window-size=1920,1080")
# 创建无头浏览器实例
driver = webdriver.Edge(options=edge_options)
# 使用示例
driver.get("https://www.example.com")
print(f"页面标题: {driver.title}")
driver.quit()
三、无头模式的高级配置
1. 用户代理和语言设置
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument("--headless=new")
# 设置用户代理
chrome_options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
# 设置语言
chrome_options.add_argument("--lang=zh-CN")
# 禁用图片加载(提高性能)
chrome_options.add_argument("--blink-settings=imagesEnabled=false")
# 禁用JavaScript(某些场景下有用)
# chrome_options.add_argument("--disable-javascript")
driver = webdriver.Chrome(options=chrome_options)
2. 性能优化配置
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument("--headless=new")
# 性能优化配置
chrome_options.add_argument("--disable-extensions") # 禁用扩展
chrome_options.add_argument("--disable-plugins") # 禁用插件
chrome_options.add_argument("--disable-images") # 禁用图片
chrome_options.add_argument("--disable-notifications") # 禁用通知
chrome_options.add_argument("--disable-popup-blocking") # 禁用弹窗阻止
chrome_options.add_argument("--disable-default-apps") # 禁用默认应用
chrome_options.add_argument("--disable-background-timer-throttling") # 禁用后台计时器限制
chrome_options.add_argument("--disable-renderer-backgrounding") # 禁用渲染器后台化
chrome_options.add_argument("--disable-backgrounding-occluded-windows") # 禁用被遮挡窗口的后台处理
driver = webdriver.Chrome(options=chrome_options)
四、无头模式下的特殊处理
1. 处理页面渲染和等待
在无头模式下,页面渲染和行为可能与有界面模式略有不同,需要特别注意:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time
chrome_options = Options()
chrome_options.add_argument("--headless=new")
chrome_options.add_argument("--window-size=1920,1080")
driver = webdriver.Chrome(options=chrome_options)
try:
driver.get("https://www.example.com")
# 在无头模式下,可能需要更长的等待时间
# 方法1:使用显式等待
element = WebDriverWait(driver, 15).until(
EC.presence_of_element_located((By.ID, "content"))
)
# 方法2:等待JavaScript执行完成
WebDriverWait(driver, 15).until(
lambda d: d.execute_script("return document.readyState") == "complete"
)
# 方法3:等待特定条件
WebDriverWait(driver, 15).until(
lambda d: d.execute_script("return jQuery.active == 0")
)
# 截图验证页面状态(在无头模式下特别有用)
driver.save_screenshot("page_loaded.png")
print("页面加载成功")
finally:
driver.quit()
2. 处理下载功能
在无头模式下处理文件下载需要特殊配置:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import os
# 设置下载目录
download_dir = os.path.join(os.getcwd(), "downloads")
if not os.path.exists(download_dir):
os.makedirs(download_dir)
chrome_options = Options()
chrome_options.add_argument("--headless=new")
# 下载配置
prefs = {
"download.default_directory": download_dir,
"download.prompt_for_download": False,
"download.directory_upgrade": True,
"safebrowsing.enabled": True
}
chrome_options.add_experimental_option("prefs", prefs)
driver = webdriver.Chrome(options=chrome_options)
try:
driver.get("https://example.com/download")
# 点击下载链接
download_link = driver.find_element(By.ID, "download-link")
download_link.click()
# 等待下载完成(在无头模式下需要更复杂的检测逻辑)
import time
time.sleep(5) # 简单等待,实际应用中应该实现更智能的等待
finally:
driver.quit()
五、无头模式下的调试技巧
1. 使用远程调试
即使是无头模式,也可以启用远程调试功能:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument("--headless=new")
chrome_options.add_argument("--remote-debugging-port=9222")
chrome_options.add_argument("--remote-debugging-address=0.0.0.0")
driver = webdriver.Chrome(options=chrome_options)
# 现在你可以在浏览器中访问 http://localhost:9222 进行远程调试
2. 详细的日志记录
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import logging
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
chrome_options = Options()
chrome_options.add_argument("--headless=new")
# 启用浏览器日志
chrome_options.set_capability("goog:loggingPrefs", {
'browser': 'ALL',
'driver': 'ALL',
'performance': 'ALL'
})
driver = webdriver.Chrome(options=chrome_options)
try:
logger.info("开始执行无头浏览器测试")
driver.get("https://www.example.com")
logger.info(f"已访问页面: {driver.current_url}")
# 获取浏览器日志
for log_type in ['browser', 'driver', 'performance']:
logs = driver.get_log(log_type)
for log in logs:
logger.debug(f"{log_type} log: {log}")
logger.info("测试执行完成")
finally:
driver.quit()
logger.info("浏览器已关闭")
3. 性能监控
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import json
chrome_options = Options()
chrome_options.add_argument("--headless=new")
driver = webdriver.Chrome(options=chrome_options)
try:
driver.get("https://www.example.com")
# 获取性能指标
performance_metrics = driver.execute_script("""
var performance = window.performance || window.webkitPerformance || window.msPerformance || window.mozPerformance;
if (performance && performance.timing) {
var timing = performance.timing;
return {
dns: timing.domainLookupEnd - timing.domainLookupStart,
tcp: timing.connectEnd - timing.connectStart,
request: timing.responseStart - timing.requestStart,
response: timing.responseEnd - timing.responseStart,
domLoading: timing.domLoading - timing.navigationStart,
domInteractive: timing.domInteractive - timing.navigationStart,
domComplete: timing.domComplete - timing.navigationStart,
loadEvent: timing.loadEventEnd - timing.loadEventStart,
total: timing.loadEventEnd - timing.navigationStart
};
}
return null;
""")
print("性能指标:", json.dumps(performance_metrics, indent=2))
finally:
driver.quit()
六、无头模式的最佳实践
1. 环境检测和回退机制
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import os
def create_driver(headless=True):
"""创建浏览器实例,支持无头模式回退"""
chrome_options = Options()
if headless:
chrome_options.add_argument("--headless=new")
# 通用配置
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--window-size=1920,1080")
try:
driver = webdriver.Chrome(options=chrome_options)
# 验证无头模式是否正常工作
if headless:
driver.get("about:blank")
# 简单的无头模式验证
if not driver.execute_script("return navigator.webdriver")):
raise Exception("无头模式检测失败")
return driver
except Exception as e:
print(f"无头模式创建失败: {e}")
if headless:
print("尝试回退到有界面模式")
return create_driver(headless=False)
else:
raise
# 使用示例
try:
driver = create_driver(headless=True)
driver.get("https://www.example.com")
print("无头模式工作正常")
finally:
if driver:
driver.quit()
2. 并行测试配置
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from concurrent.futures import ThreadPoolExecutor
import threading
def create_driver_for_thread():
"""为每个线程创建独立的浏览器实例"""
chrome_options = Options()
chrome_options.add_argument("--headless=new")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
# 为每个线程设置不同的用户数据目录,避免冲突
thread_id = threading.get_ident()
chrome_options.add_argument(f"--user-data-dir=/tmp/chrome-profile-{thread_id}")
return webdriver.Chrome(options=chrome_options)
def test_task(url):
"""测试任务"""
driver = create_driver_for_thread()
try:
driver.get(url)
print(f"{threading.current_thread().name} 访问 {url} 成功")
return True
except Exception as e:
print(f"{threading.current_thread().name} 访问 {url} 失败: {e}")
return False
finally:
driver.quit()
# 并行执行测试
urls = [
"https://www.example.com",
"https://www.google.com",
"https://www.github.com"
]
with ThreadPoolExecutor(max_workers=3) as executor:
results = list(executor.map(test_task, urls))
print(f"成功: {sum(results)}, 失败: {len(results) - sum(results)}")
3. Docker 中的无头浏览器
在 Docker 容器中运行无头浏览器:
FROM python:3.9-slim
# 安装 Chrome 浏览器
RUN apt-get update && apt-get install -y \
wget \
gnupg \
&& wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \
&& echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list \
&& apt-get update \
&& apt-get install -y google-chrome-stable \
&& rm -rf /var/lib/apt/lists/*
# 安装 ChromeDriver
RUN wget -O /tmp/chromedriver.zip https://chromedriver.storage.googleapis.com/`curl -sS chromedriver.storage.googleapis.com/LATEST_RELEASE`/chromedriver_linux64.zip \
&& unzip /tmp/chromedriver.zip -d /usr/local/bin/ \
&& rm /tmp/chromedriver.zip
# 安装 Python 依赖
COPY requirements.txt .
RUN pip install -r requirements.txt
# 复制代码
COPY . .
# 设置无头浏览器环境变量
ENV DISPLAY=:99
ENV CHROME_BIN=/usr/bin/google-chrome
ENV CHROME_DRIVER=/usr/local/bin/chromedriver
CMD ["python", "main.py"]
七、常见问题与解决方案
1. 无头模式下的元素交互问题
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
chrome_options = Options()
chrome_options.add_argument("--headless=new")
driver = webdriver.Chrome(options=chrome_options)
try:
driver.get("https://www.example.com")
# 在无头模式下,可能需要显式滚动到元素
element = driver.find_element(By.ID, "my-element")
driver.execute_script("arguments[0].scrollIntoView(true);", element)
# 使用ActionChains进行复杂的交互
actions = ActionChains(driver)
actions.move_to_element(element).click().perform()
# 或者直接使用JavaScript点击
driver.execute_script("arguments[0].click();", element)
finally:
driver.quit()
2. 处理证书和安全警告
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument("--headless=new")
# 忽略证书错误
chrome_options.add_argument("--ignore-certificate-errors")
chrome_options.add_argument("--allow-running-insecure-content")
chrome_options.add_argument("--disable-web-security")
# 禁用各种提示
chrome_options.add_argument("--disable-infobars")
chrome_options.add_argument("--disable-notifications")
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(options=chrome_options)
八、性能对比:无头模式 vs 有界面模式
下面是一个简单的性能对比测试:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time
def test_performance(headless=True):
"""测试无头模式与有界面模式的性能差异"""
chrome_options = Options()
if headless:
chrome_options.add_argument("--headless=new")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
driver = webdriver.Chrome(options=chrome_options)
start_time = time.time()
try:
# 执行一系列操作
driver.get("https://www.example.com")
title = driver.title
# 模拟一些交互
for i in range(5):
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(0.1)
driver.execute_script("window.scrollTo(0, 0);")
end_time = time.time()
elapsed = end_time - start_time
mode = "无头" if headless else "有界面"
print(f"{mode}模式执行时间: {elapsed:.2f}秒")
return elapsed
finally:
driver.quit()
# 运行测试
headless_time = test_performance(headless=True)
normal_time = test_performance(headless=False)
print(f"无头模式比有界面模式快 {((normal_time - headless_time) / normal_time * 100):.1f}%")
无头浏览器模式是现代化自动化测试和网络爬虫的重要组成部分。通过合理配置和使用,你可以显著提高自动化任务的效率和可靠性,特别是在持续集成/持续部署(CI/CD)环境中。记住根据你的具体需求调整配置,并在生产环境部署前进行充分的测试。