防检测
# coding: utf-8
import time
import re
from bs4 import BeautifulSoup
from lxml import etree
from selenium import webdriver
from selenium.webdriver import ChromeOptions
from selenium.webdriver import Chrome
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
# url = ''
url = 'https://www.baidu.com'
# 准备好参数配置
option = webdriver.ChromeOptions()
option.add_argument('--headless')
option.add_argument('--disable-gpu')
option.add_argument('--no-sandbox')
option.add_argument('--disable-dev-shm-usage')
option.add_argument('log-level=3')
option.add_argument('--disable-blink-features=AutomationControlled') # 谷歌浏览器去掉访问痕迹
option.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36")
option.add_argument("--window-size=1920,1050") # 专门应对无头浏览器中不能最大化屏幕的方案
option.add_experimental_option('excludeSwitches', ['enable-automation'])
option.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(options=option) # 把参数配置设置到浏览器中
driver.get(url)
# time.sleep(2)
if WebDriverWait(driver, 10, 0.5).until(EC.presence_of_element_located((By.CLASS_NAME, "news-list"))):
# driver.implicitly_wait(10)
pageSource = driver.page_source
print(pageSource)
# 只有截图才能看到效果咯
# driver.save_screenshot('./ch.png')
版权声明:本文为Liquor6原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。