https://googlechromelabs.github.io/chrome-for-testing/
https://storage.googleapis.com/chrome-for-testing-public/125.0.6422.141/win64/chromedriver-win64.zip
driver要和浏览器版本完全一致
login
import os
import json
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
import time
# 设置 ChromeDriver 路径(替换成你自己的路径)
chrome_driver_path = 'F:/python/spider/chromedriver-win64/chromedriver.exe'
# 配置 Chrome 选项
options = Options()
options.add_argument("--disable-blink-features=AutomationControlled")
driver = webdriver.Chrome(service=Service(executable_path=chrome_driver_path), options=options)
try:
# 打开小红书主页并手动登录
driver.get("https://zujuan.xkw.com/gzsx/zsd27925")
time.sleep(40) # 给用户足够的时间手动登录
# 获取登录后的 cookie
cookies = driver.get_cookies()
# 打印当前工作目录
print("Current working directory: ", os.getcwd())
# 将 cookie 保存到文件
with open("cookies.json", "w") as file:
json.dump(cookies, file)
print("Cookies saved successfully.")
finally:
driver.quit()
组
import json
from selenium import webdriver
from selenium.webdriver import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
import random
import time
# 设置 ChromeDriver 路径
chrome_driver_path = 'F:/python/spider/chromedriver-win64/chromedriver.exe'
MAX_ITEMS_BEFORE_WRITING = 20 # 每收集 20 条数据就写入一次文件
def write_to_file(collected_items):
# 追加写入文件的逻辑
with open('result.json', 'a', encoding='utf-8') as file:
json_data = [json.dumps(item, ensure_ascii=False) for item in collected_items]
file.write('\n'.join(json_data) + '\n')
print("Results saved successfully.")
# 随机延时函数,用来模拟动作比较快的点击操作
def random_delay(time_start, time_end):
delay = random.uniform(time_start, time_end)
time.sleep(delay)
# 配置 Chrome 选项
options = Options()
options.add_argument("--disable-blink-features=AutomationControlled")
driver = webdriver.Chrome(service=Service(executable_path=chrome_driver_path), options=options)
driver.maximize_window()
def scroll_and_collect(driver, num_pages):
collected_items = []
collected_count = 0
current_page = 0
while current_page <= num_pages:
current_page += 1
if current_page > 1:
pager_item = driver.find_element(By.ID, "iptGotoNum") # 翻页
pager_item.clear().send_keys(current_page)
driver.find_element(By.XPATH, ".//div[@class='go-to']/a[@class='confirm-btn']").click()
random_delay(4, 8) # 等待页面加载新的内容
print(current_page)
# 获取了页面上的所有选项卡
#items = driver.find_elements(By.XPATH, "//div[@class=' tk-quest-item quesroot ']")
#items = driver.find_element(By.CSS_SELECTOR, ".tk-quest-item.quesroot")
items = driver.find_elements(By.CLASS_NAME, "tk-quest-item")
# 遍历获取的列表,分析里面的元素
for item in items:
try:
# 找到元素里封面、标题、作者昵称、作者头像等元素
type = item.find_element(By.XPATH, ".//div[@class='ques-additional']").find_elements(By.CLASS_NAME, "info-cnt")[0].text
difficulty = item.find_element(By.XPATH, ".//div[@class='ques-additional']").find_elements(By.CLASS_NAME, "info-cnt")[1].text
content = item.find_element(By.XPATH, ".//div[@class='wrapper quesdiv']/div[@class='exam-item__cnt ']").get_attribute("outerHTML")
##点击显示答案
#answer_item_box = item.find_element(By.XPATH, ".//div[@class='wrapper quesdiv']")
#answer_item_box.click()
#random_delay(2, 10) # 等待页面加载
#answer_item = item.find_element(By.XPATH, ".//div[@class='wrapper quesdiv']/div[@class='exam-item__opt']/div[@class='item answer']")
#answer_text = answer_item.text
#if answer_text.find("升级会员")>0:
# answer = ""
#else:
# answer = answer_item.get_attribute("outerHTML")
# 存储获取的结果
print({
"type": type,
"difficulty": difficulty,
"content": content,
#"answer": answer
})
collected_items.append({
"type": type,
"difficulty": difficulty,
"content": content,
#"answer": answer
})
collected_count += 1
# 写入文件
if collected_count >= MAX_ITEMS_BEFORE_WRITING:
write_to_file(collected_items)
collected_items = [] # 清空已收集的项
collected_count = 0 # 重置计数器
except NoSuchElementException:
continue
# 最后一次写入剩余的项
if collected_count > 0:
write_to_file(collected_items)
return collected_items
#关闭弹窗广告
def close_win(browser):
time.sleep(10)
try:
closewindow = browser.find_element_by_class_name('next-dialog-close')
browser.execute_script("arguments[0].click();", closewindow)
except Exception as e:
print(f"searchKey: there is no suspond Page1. e = {e}")
try:
# 打开主页
driver.get("https://zujuan.xkw.com/gzsx/zsd27925")
random_delay(5, 10) # 等待页面加载
# 从文件加载 cookie
with open("cookies.json", "r") as file:
cookies = json.load(file)
# 注入 cookie
for cookie in cookies:
driver.add_cookie(cookie)
random_delay(2, 6)
driver.refresh() # 刷新页面以加载注入的 cookie
random_delay(5, 10)
# 显式等待,直到搜索框出现
wait = WebDriverWait(driver, 10)
#关闭广告
#close_win(driver)
wait.until(EC.presence_of_element_located((By.XPATH, "//section[@class='test-list']")))
# 获取前100个内容的封面、标题、作者头像和昵称
num_pages = 10
scroll_and_collect(driver, num_pages)
# 延时几秒以便查看搜索结果
time.sleep(60)
finally:
driver.quit()
原神测试
import json
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
import random
import time
# 设置 ChromeDriver 路径
chrome_driver_path = 'F:/python/spider/chromedriver-win64/chromedriver.exe'
MAX_ITEMS_BEFORE_WRITING = 20 # 每收集 20 条数据就写入一次文件
def write_to_file(collected_items):
# 追加写入文件的逻辑
with open('result.json', 'a', encoding='utf-8') as file:
json_data = [json.dumps(item, ensure_ascii=False) for item in collected_items]
file.write('\n'.join(json_data) + '\n')
print("Results saved successfully.")
# 随机延时函数,用来模拟动作比较快的点击操作
def random_delay(time_start, time_end):
delay = random.uniform(time_start, time_end)
time.sleep(delay)
# 配置 Chrome 选项
options = Options()
options.add_argument("--disable-blink-features=AutomationControlled")
driver = webdriver.Chrome(service=Service(executable_path=chrome_driver_path), options=options)
driver.maximize_window()
def scroll_and_collect(driver, num_items):
collected_items = []
collected_count = 0
result_count = 0
while result_count < num_items:
# 获取了页面上的所有选项卡
items = driver.find_elements(By.XPATH, "//section[@class='note-item']")
# 遍历获取的列表,分析里面的元素
for item in items:
try:
# 找到元素里封面、标题、作者昵称、作者头像等元素
cover = item.find_element(By.XPATH, ".//div/a[@class='cover mask ld']/img").get_attribute("src")
title = item.find_element(By.XPATH, ".//div[@class='footer']/a[@class='title']/span").text
author_avatar = item.find_element(By.XPATH, ".//div[@class='card-bottom-wrapper']/a[@class='author']/img").get_attribute("src")
author_name = item.find_element(By.XPATH, ".//div[@class='card-bottom-wrapper']/a[@class='author']/div/div[@class='name']/span[@class='name']").text
# 存储获取的结果
collected_items.append({
"cover": cover,
"title": title,
"author_avatar": author_avatar,
"author_name": author_name
})
result_count += 1
collected_count += 1
# 写入文件
if collected_count >= MAX_ITEMS_BEFORE_WRITING:
write_to_file(collected_items)
collected_items = [] # 清空已收集的项
collected_count = 0 # 重置计数器
except NoSuchElementException:
continue
# 翻页
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
random_delay(4, 8) # 等待页面加载新的内容
# 等待新内容加载的逻辑
try:
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, "//section[@class='note-item']"))
)
except NoSuchElementException:
break
# 最后一次写入剩余的项
if collected_count > 0:
write_to_file(collected_items)
return collected_items
try:
# 打开小红书主页
driver.get("https://www.xiaohongshu.com")
random_delay(5, 10) # 等待页面加载
# 从文件加载 cookie
with open("cookies.json", "r") as file:
cookies = json.load(file)
# 注入 cookie
for cookie in cookies:
driver.add_cookie(cookie)
random_delay(2, 6)
driver.refresh() # 刷新页面以加载注入的 cookie
random_delay(5, 10)
# 显式等待,直到搜索框出现
wait = WebDriverWait(driver, 10)
search_box = wait.until(EC.presence_of_element_located((By.XPATH, "//input[@placeholder='搜索小红书']")))
search_box.send_keys("原神")
random_delay(2, 5)
search_button = wait.until(EC.element_to_be_clickable((By.XPATH, "//div[@class='input-button']")))
search_button.click()
random_delay(2, 5)
wait.until(EC.presence_of_element_located((By.XPATH, "//section[@class='note-item']")))
# 获取前100个内容的封面、标题、作者头像和昵称
num_items = 10
scroll_and_collect(driver, num_items)
# 延时几秒以便查看搜索结果
time.sleep(60)
finally:
driver.quit()
c# Selenium.WebDriver.ChromeDriver, Selenium.WebDriver ,Selenium.Support, Selenium.Chrome.WebDriver
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Runtime.InteropServices;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using System.Windows.Forms;
using OpenQA.Selenium.Chrome;
namespace WindowsFormsBroswer
{
/// <summary>
/// https://www.cnblogs.com/zhaotianff/p/11330810.html selenium操作
/// https://www.nowcoder.com/discuss/746291688196296704
/// https://zhuanlan.zhihu.com/p/27397132773 selenium截图
/// https://blog.csdn.net/c_xiazai12345/article/details/120654809 窗口切换
/// https://www.yisu.com/jc/556716.html CefSharp替换WebBrowser控件
/// https://www.cnblogs.com/mq0036/p/11059644.html
/// </summary>
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void crawlingWebFunc()
{
SetText("\r\n开始尝试...");
List<testfold> surls = new List<testfold>();
string path = System.Environment.CurrentDirectory + "\\图片url\\";
DirectoryInfo root = new DirectoryInfo(path);
DirectoryInfo[] dics = root.GetDirectories();
foreach (var itemdic in dics)
{
string txt = "";
StreamReader sr = new StreamReader(itemdic.FullName + "\\data.txt");
while (!sr.EndOfStream)
{
string str = sr.ReadLine();
txt += str;// + "\n";
}
sr.Close();
surls.Add(new testfold() { key = itemdic.FullName, picurl = txt });
}
ChromeDriverService service = ChromeDriverService.CreateDefaultService(System.Environment.CurrentDirectory);
// service.HideCommandPromptWindow = true;
ChromeOptions options = new ChromeOptions();
options.AddArguments("--test-type", "--ignore-certificate-errors");
options.AddArgument("enable-automation");
// options.AddArgument("headless");
// options.AddArguments("--proxy-server=http://user:password@yourProxyServer.com:8080");
using (IWebDriver driver = new OpenQA.Selenium.Chrome.ChromeDriver(service, options, TimeSpan.FromSeconds(120)))
{
driver.Url = "https://www.1688.com/";
Thread.Sleep(200);
try
{
int a = 1;
foreach (var itemsurls in surls)
{
SetText("\r\n第" + a.ToString() + "个");
driver.Navigate().GoToUrl(itemsurls.picurl);
//登录
if (driver.Url.Contains("login.1688.com"))
{
SetText("\r\n需要登录,开始尝试...");
trylogin(driver); //尝试登录完成
//再试试
driver.Navigate().GoToUrl("https://s.1688.com/youyuan/index.htm?tab=imageSearch&imageType=oss&imageAddress=cbuimgsearch/eWXC7XHHPN1607529600000&spm=");
if (driver.Url.Contains("login.1688.com"))
{
//没办法退出
SetText("\r\n退出,换ip重试...");
return;
}
}
//鼠标放上去的内容因为页面自带只能显示一个的原因 没办法做到全部显示 然后在下载 只能是其他方式下载
// var elements = document.getElementsByClassName('hover-container');
// Array.prototype.forEach.call(elements, function(element) {
// element.style.display = "block";
// console.log(element);
// });
// IJavaScriptExecutor js = (IJavaScriptExecutor)driver;
// var sss = js.ExecuteScript(" var elements = document.getElementsByClassName('hover-container'); Array.prototype.forEach.call(elements, function(element) { console.log(element); element.setAttribute(\"class\", \"测试title\"); element.style.display = \"block\"; console.log(element); });");
Thread.Sleep(500);
var responseModel = Write(itemsurls.key, driver.PageSource, Pagetypeenum.列表);
Thread.Sleep(500);
int i = 1;
foreach (var offer in responseModel?.data?.offerList ?? new List<OfferItemModel>())
{
driver.Navigate().GoToUrl(offer.information.detailUrl);
string responseDatadetail = driver.PageSource;
Write(itemsurls.key, driver.PageSource, Pagetypeenum.详情);
SetText("\r\n第" + a.ToString() + "-" + i.ToString() + "个");
Thread.Sleep(500);
i++;
}
}
}
catch (Exception ex)
{
CloseChromeDriver(driver);
throw;
}
}
}
#region 异常 退出chromedriver
[DllImport("user32.dll", EntryPoint = "FindWindow")]
private extern static IntPtr FindWindow(string lpClassName, string lpWindowName);
[DllImport("user32.dll", EntryPoint = "SendMessage")]
public static extern int SendMessage(IntPtr hWnd, int Msg, int wParam, int lParam);
public const int SW_HIDE = 0;
public const int SW_SHOW = 5;
[DllImport("user32.dll", EntryPoint = "ShowWindow")]
public static extern int ShowWindow(IntPtr hwnd, int nCmdShow);
/// <summary>
/// 获取窗口句柄
/// </summary>
/// <returns></returns>
public IntPtr GetWindowHandle()
{
string name = (Environment.CurrentDirectory + "\\chromedriver.exe");
IntPtr hwd = FindWindow(null, name);
return hwd;
}
/// <summary>
/// 关闭chromedriver窗口
/// </summary>
public void CloseWindow()
{
try
{
IntPtr hwd = GetWindowHandle();
SendMessage(hwd, 0x10, 0, 0);
}
catch { }
}
/// <summary>
/// 退出chromedriver
/// </summary>
/// <param name="driver"></param>
public void CloseChromeDriver(IWebDriver driver)
{
try
{
driver.Quit();
driver.Dispose();
}
catch { }
CloseWindow();
}
#endregion 异常 退出chromedriver
}
}
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
namespace WindowsFormsBroswer
{
public partial class Form2 : Form
{
public Form2()
{
InitializeComponent();
WebBrowser webBrowser = new WebBrowser();
webBrowser.Navigate("https://www.baidu.com/");
webBrowser.DocumentCompleted += WebBrowser_DocumentCompleted;
//Application.Run( );
Console.WriteLine("结束");
}
private static void WebBrowser_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
{
try
{
MessageBox.Show("加载成功");
}
finally
{
webBrowser.DocumentCompleted -= WebBrowser_DocumentCompleted;
}
}
}
}