第一种:selenium进行操作。
- 使用自带滑动和按键进行拖拽,会被检测到。大部分屏蔽方式加上也是过不去的。
- 使用selenium中driver的execute_script,Js注入操作。(不通过,不过这段代码在浏览器可正常操作)
js1 ="var slider = document.getElementById('nc_1_n1z'),container = slider.parentNode; " \
"var rect = slider.getBoundingClientRect(),x0= rect.x || rect.left,y0= rect.y || rect.top,w = container.getBoundingClientRect().width,x1= x0 + w,y1= y0; " \
"var mousedown = document.createEvent('MouseEvents'); mousedown.initMouseEvent('mousedown', true, true, window, 0, x0, y0, x0, y0, false, false, false, false, 0, null);slider.dispatchEvent(mousedown);" \
"var mousemove = document.createEvent('MouseEvents'); mousemove.initMouseEvent('mousemove', true, true, window, 0, x1, y1, x1, y1, false, false, false, false, 0, null); slider.dispatchEvent(mousemove);"
driver.execute_script(js1)
- Windows可以用win32api 方式进行滑动。(可通过,局限比较大,点击代码自取)
- Pyautogui 可以在mac和windows进行。(服务器就不可用,点击代码自取)
第二种:puppeteer
- 直接使用puppeteer模块进行操作访问和存储,要比selemium几率大一些,但是还是很难通过。百分之10-20的概率。
const puppeteer =require('puppeteer');const fs =require('fs');functionupdate(url){(async()=>{// const browser = await puppeteer.launch();const browser =await puppeteer.launch({headless:true,slowMo:150,args:['--window-size=1920,1080'],executablePath:'/Applications/Google Chrome.app/Contentas/MacOS/Google Chrome'})// 启动浏览器,传入headless为false可以打开窗口// //启动新页面const page =await browser.newPage();// //设置页面打开时的页面宽度高度await page.setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3477.0 Safari/537.36");await page.setExtraHTTPHeaders({'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9','Accept-Language':'zh-CN,zh;q=0.9','Cache-Control':'max-age=0','Connection':'keep-alive','Sec-Fetch-Dest':'document','Sec-Fetch-Mode':'navigate','Sec-Fetch-Site':'same-origin','Sec-Fetch-User':'?1','Upgrade-Insecure-Requests':'1',});//直接删掉webdriver属性await page.evaluateOnNewDocument(()=>{const newProto = navigator.__proto__;delete newProto.webdriver;
navigator.__proto__ = newProto;});//defineProperty删除webdriver后如何检测await page.evaluateOnNewDocument(()=>{
Object.defineProperty(navigator,'webdriver',{get:()=>false,});});await page.evaluateOnNewDocument(()=>{get:()=>"Linux x86_64"});//防止检测,赋值pluginsawait page.evaluateOnNewDocument(()=>{
Object.defineProperty(navigator,'plugins',{get:()=>[{0:{type:"application/x-google-chrome-pdf",suffixes:"pdf",description:"Portable Document Format",enabledPlugin: Plugin},description:"Portable Document Format",filename:"internal-pdf-viewer",length:1,name:"Chrome PDF Plugin"},{0:{type:"application/pdf",suffixes:"pdf",description:"",enabledPlugin: Plugin},description:"",filename:"mhjfbmdgcfjbbpaeojofohoefgiehjai",length:1,name:"Chrome PDF Viewer"},{0:{type:"application/x-nacl",suffixes:"",description:"Native Client Executable",enabledPlugin: Plugin},1:{type:"application/x-pnacl",suffixes:"",description:"Portable Native Client Executable",enabledPlugin: Plugin},description:"",filename:"internal-nacl-plugin",length:2,name:"Native Client"}],});});//window.chrome绕开await page.evaluateOnNewDocument(()=>{
window.chrome ={runtime:{},loadTimes:function(){},csi:function(){},app:{}};});//早期绕开方式functionhasChrome(){return!navigator.language ||!navigator.languages
};functionhasChrome(){return!! window.chrome
};await page.goto(url);await page.waitForTimeout(5000)await page.evaluate(// setTimeout(function(){var slider = document.getElementById('nc_1_n1z'), container = slider.parentNode;var rect = slider.getBoundingClientRect(), x0 = rect.x || rect.left, y0 = rect.y || rect.top,
w = container.getBoundingClientRect().width, x1 = x0 + w, y1 = y0;var mousedown = document.createEvent("MouseEvents");
mousedown.initMouseEvent("mousedown",true,true, window,0,
x0, y0, x0, y0,false,false,false,false,0,null);
slider.dispatchEvent(mousedown);var mousemove = document.createEvent("MouseEvents");
mousemove.initMouseEvent("mousemove",true,true, window,0,
x1, y1, x1, y1,false,false,false,false,0,null);
slider.dispatchEvent(mousemove);// }, 3000)});await page.waitForTimeout(5000)//page.frames() 获取当前页面所有的 iframe,然后根据 iframe 的名字精确获取某个想要的 iframeconst frame =await page.mainFrame();const bodyHandle =await frame.$('html');//获取所有的html//frame.evaluate()在浏览器中执行函数,相当于在控制台中执行函数,返回一个Promiseconst html =await frame.evaluate(body=>body.innerHTML,bodyHandle);await bodyHandle.dispose();/* fs.wirteFile有三个参数
* 1,第一个参数是要写入的文件路径
* 2,第二个参数是要写入得内容
* 3,第三个参数是可选参数,表示要写入的文件编码格式,一般就不写,默认就行
* 4,第四个参数是个回调函数 只有一个参数error,来判断是否写入成功
*/
browser.close();
fs.writeFile("a.html",html,error=>{if(error)return console.log("写入文件失败,原因是:"+error.message);
console.log('写入成功');// process.kill(browser.process().pid);});})();};update('https://www.123456789.com')//网站需要自己找下,如果用python调用会报错。
本文转载自: https://blog.csdn.net/weixin_45195493/article/details/127448985
版权归原作者 马虎的程序猿 所有, 如有侵权,请联系我们删除。
版权归原作者 马虎的程序猿 所有, 如有侵权,请联系我们删除。