0


使用C#和Selenium实现验证码识别登录详解

一、准备工作
安装Visual Studio或Rider作为开发工具。
安装Selenium WebDriver库和Tesseract-OCR库。
二、打开网站并设置浏览器窗口
首先,打开浏览器并将窗口最大化,以确保每次截取的图片都是相同的大小:

csharp

using OpenQA.Selenium;
using OpenQA.Selenium.Chrome;更多内容联系1436423940
using System;

class Program
{
static void Main()
{
IWebDriver driver = new ChromeDriver();
driver.Navigate().GoToUrl("https://www.example.com");
driver.Manage().Window.Maximize();
}
}
三、截取带有验证码的网页内容
截取当前屏幕内容,并保存到本地:

csharp

using OpenQA.Selenium;
using OpenQA.Selenium.Chrome;
using System;
using System.IO;

class Program
{
static void Main()
{
IWebDriver driver = new ChromeDriver();
driver.Navigate().GoToUrl("https://www.example.com");
driver.Manage().Window.Maximize();

    Screenshot screenshot = ((ITakesScreenshot)driver).GetScreenshot();
     screenshot.SaveAsFile("H:\\test\\01.png", ScreenshotImageFormat.Png);
 }

}
四、识别图片验证码
使用 Tesseract 识别图片验证码
定位验证码在图片中的位置并截取:
csharp

using OpenQA.Selenium;
using OpenQA.Selenium.Chrome;
using System;
using System.Drawing;
using System.Drawing.Imaging;
using System.IO;
using Tesseract;

class Program
{
static void Main()
{
IWebDriver driver = new ChromeDriver();
driver.Navigate().GoToUrl("https://www.example.com");
driver.Manage().Window.Maximize();

    Screenshot screenshot = ((ITakesScreenshot)driver).GetScreenshot();
     screenshot.SaveAsFile("H:\\test\\01.png", ScreenshotImageFormat.Png);

    Bitmap fullImg = new Bitmap("H:\\test\\01.png");
     Rectangle captchaArea = new Rectangle(564, 395, 79, 28); // 验证码位置
     Bitmap captchaImg = fullImg.Clone(captchaArea, fullImg.PixelFormat);
     captchaImg.Save("H:\\test\\02.png", ImageFormat.Png);

    var ocr = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default);
     var img = Pix.LoadFromFile("H:\\test\\02.png");
     var result = ocr.Process(img);
     string captchaText = result.GetText().Trim();
     Console.WriteLine("Captcha: " + captchaText);
 }

}
五、输入账号、密码和验证码
定位账号、密码和验证码输入框,并输入相关内容:

csharp

using OpenQA.Selenium;
using OpenQA.Selenium.Chrome;
using System;
using System.Drawing;
using System.Drawing.Imaging;
using System.IO;
using Tesseract;

class Program
{
static void Main()
{
IWebDriver driver = new ChromeDriver();
driver.Navigate().GoToUrl("https://www.example.com");
driver.Manage().Window.Maximize();

    Screenshot screenshot = ((ITakesScreenshot)driver).GetScreenshot();
     screenshot.SaveAsFile("H:\\test\\01.png", ScreenshotImageFormat.Png);

    Bitmap fullImg = new Bitmap("H:\\test\\01.png");
     Rectangle captchaArea = new Rectangle(564, 395, 79, 28); // 验证码位置
     Bitmap captchaImg = fullImg.Clone(captchaArea, fullImg.PixelFormat);
     captchaImg.Save("H:\\test\\02.png", ImageFormat.Png);

    var ocr = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default);
     var img = Pix.LoadFromFile("H:\\test\\02.png");
     var result = ocr.Process(img);
     string captchaText = result.GetText().Trim();
     Console.WriteLine("Captcha: " + captchaText);

    IWebElement username = driver.FindElement(By.Id("username"));
     IWebElement password = driver.FindElement(By.Id("password_1"));
     IWebElement captcha = driver.FindElement(By.Id("user_ck"));

    username.SendKeys("your_username");
     password.SendKeys("your_password");
     captcha.SendKeys(captchaText);
 }

}
六、点击登录按钮
定位并点击登录按钮:

csharp

using OpenQA.Selenium;
using OpenQA.Selenium.Chrome;
using System;
using System.Drawing;
using System.Drawing.Imaging;
using System.IO;
using Tesseract;

class Program
{
static void Main()
{
IWebDriver driver = new ChromeDriver();
driver.Navigate().GoToUrl("https://www.example.com");
driver.Manage().Window.Maximize();

    Screenshot screenshot = ((ITakesScreenshot)driver).GetScreenshot();
     screenshot.SaveAsFile("H:\\test\\01.png", ScreenshotImageFormat.Png);

    Bitmap fullImg = new Bitmap("H:\\test\\01.png");
     Rectangle captchaArea = new Rectangle(564, 395, 79, 28); // 验证码位置
     Bitmap captchaImg = fullImg.Clone(captchaArea, fullImg.PixelFormat);
     captchaImg.Save("H:\\test\\02.png", ImageFormat.Png);

    var ocr = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default);
     var img = Pix.LoadFromFile("H:\\test\\02.png");
     var result = ocr.Process(img);
     string captchaText = result.GetText().Trim();
     Console.WriteLine("Captcha: " + captchaText);

    IWebElement username = driver.FindElement(By.Id("username"));
     IWebElement password = driver.FindElement(By.Id("password_1"));
     IWebElement captcha = driver.FindElement(By.Id("user_ck"));

    username.SendKeys("your_username");
     password.SendKeys("your_password");
     captcha.SendKeys(captchaText);

    IWebElement loginButton = driver.FindElement(By.Name("yt0"));
     loginButton.Click();
 }

}
七、关闭浏览器
最后,关闭浏览器:

csharp

using OpenQA.Selenium;
using OpenQA.Selenium.Chrome;
using System;
using System.Drawing;
using System.Drawing.Imaging;
using System.IO;
using Tesseract;

class Program
{
static void Main()
{
IWebDriver driver = new ChromeDriver();
driver.Navigate().GoToUrl("https://www.example.com");
driver.Manage().Window.Maximize();

    Screenshot screenshot = ((ITakesScreenshot)driver).GetScreenshot();
     screenshot.SaveAsFile("H:\\test\\01.png", ScreenshotImageFormat.Png);

    Bitmap fullImg = new Bitmap("H\\test\\01.png");
     Rectangle captchaArea = new Rectangle(564, 395, 79, 28); // 验证码位置
     Bitmap captchaImg = fullImg.Clone(captchaArea, fullImg.PixelFormat);
     captchaImg.Save("H:\\test\\02.png", ImageFormat.Png);

    var ocr = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default);
     var img = Pix.LoadFromFile("H:\\test\\02.png");
     var result = ocr.Process(img);
     string captchaText = result.GetText().Trim();
     Console.WriteLine("Captcha: " + captchaText);

    IWebElement username = driver.FindElement(By.Id("username"));
     IWebElement password = driver.FindElement(By.Id("password_1"));
     IWebElement captcha = driver.FindElement(By.Id("user_ck"));

    username.SendKeys("your_username");
     password.SendKeys("your_password");更多内容联系1436423940
     captcha.SendKeys(captchaText);

    IWebElement loginButton = driver.FindElement(By.Name("yt0"));
     loginButton.Click();

    driver.Quit();
 }

}
八、问题和解决方案
Tesseract-OCR 报错解决方案
在使用 Tesseract 识别图片时,如果报错 tesseract-ocr 相关信息,可以通过 tesseract-ocr 下载 页面下载并安装 tesseract-ocr。

设置 TesseractEngine 实例的 datapath,确保其指向 tesseract-ocr 的安装路径:

csharp

var ocr = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default);


本文转载自: https://blog.csdn.net/asfdsgdf/article/details/140425307
版权归原作者 asfdsgdf 所有, 如有侵权,请联系我们删除。

“使用C#和Selenium实现验证码识别登录详解”的评论:

还没有评论