from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time
import csv
from tqdm import tqdm
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--no-sandbox')
prefs = {"profile.managed_default_content_settings.images": 2}
chrome_options.add_experimental_option("prefs", prefs)
csv_file = 'lists_3.csv'
driver_path = '/usr/bin/chromedriver'
service = Service(driver_path)
driver = webdriver.Chrome(service=service, options=chrome_options)
urls = []
with open(csv_file, mode='r', encoding='utf-8') as file:
csv_reader = csv.reader(file)
for row in csv_reader:
if row:
urls.append(row[0])
output_file = 'code.txt'
with open(output_file, 'a', encoding='utf-8') as file:
for url in tqdm(urls, desc="Processing URLs"):
try:
driver.get(url)
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.TAG_NAME, "title")))
title = driver.title
file.write(f"{url}\n")
file.write(f"Title: {title}\n")
try:
WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.NAME, "keywords")))
keywords = driver.find_element(By.NAME, 'keywords').get_attribute('content')
file.write(f"Keywords: {keywords}\n")
except Exception:
file.write("Keywords: Not Found\n")
try:
WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.NAME, "description")))
description = driver.find_element(By.NAME, 'description').get_attribute('content')
file.write(f"Description: {description}\n")
except Exception:
file.write("Description: Not Found\n")
file.write("\n")
file.flush()
except Exception as e:
print(f"An error occurred while processing {url}: {str(e)}")
continue
driver.quit()
print("All data has been written to the output file.")