【爬虫】通过模拟鼠标点击和键盘操作抓取网页数据

发布于:2025-09-02 ⋅ 阅读:(22) ⋅ 点赞:(0)

通过模拟鼠标点击,打开网页,然后屏幕截图,全选文字,并将文字保存到文本文件,翻页,逐步循环。

import os.path

import pyautogui
import time
import pyperclip


def select_and_save_to_file(filename="selected_content.txt"):
    try:
        # 设置操作延迟,确保系统有足够时间响应
        pyautogui.PAUSE = 1

        # 执行Ctrl+A全选
        pyautogui.hotkey('ctrl', 'a')
        print("执行了全选操作")

        # 执行Ctrl+C复制选中内容
        pyautogui.hotkey('ctrl', 'c')
        print("执行了复制操作")

        # 等待剪贴板内容准备好
        time.sleep(0.5)

        # 从剪贴板获取内容
        content = pyperclip.paste()
        if not content:
            print("剪贴板中没有内容")
            return

        # 将内容写入文件
        with open(filename, 'w', encoding='utf-8') as f:
            f.write(content)

        print(f"成功将内容保存到 {filename}")

    except Exception as e:
        print(f"操作过程中出现错误: {str(e)}")


list_cord = [461,499,537,577,618,656,693,734,773,811]
list_cord = [461,499,537]
# for page in range(1, 124):
page = 13
for idx, i in enumerate(list_cord):
    if os.path.exists(f'./save_data/page{page}_{idx+1}.jpg') and os.path.exists(f'./save_data/page{page}_{idx+1}.txt'):
         continue
    # 移动鼠标到指定位置
    pyautogui.moveTo(1830, i)  # x=100, y=150
    # 执行鼠标左键点击
    pyautogui.click()
    time.sleep(3)
    pyautogui.screenshot(f'./save_data/page{page}_{idx+1}.jpg')
    select_and_save_to_file(f'./save_data/page{page}_{idx+1}.txt')
    print(f'page{page}_{idx+1}抓取成功')
    pyautogui.moveTo(494, 19)  # x=100, y=150
    # 执行鼠标左键点击
    pyautogui.click()
    time.sleep(1)
        # break
    # pyautogui.moveTo(1324, 868)  # x=100, y=150
    # # 执行鼠标左键点击
    # pyautogui.click()
    # time.sleep(3)