Java使用selenium实现RPA采集机器人
采集机器人主要应用采集部分网站数据,但是目前私自爬取部分网站数据可能涉及违法,请谨慎使用。
主要使用的是打包运行的方式,因此使用main方法的形式
主流程
主流程主要为,参数初始化;启动浏览器登录;开启循环采集;采集结束,发送错误信息并睡眠;
publicclassRpaRobot{publicstaticvoidstart(){// 初始化参数配置RpaRobotConfig.init();LogManager.logInfo("rpa客户端启动中...");// 配置初始化,启动浏览器,登录LogInOutHandler.openBrowserStart();LogManager.logInfo("rpa客户端启动成功...");while(true){// 错误信息列表List<Map<String,String>> errorList =newArrayList<>();// 采集CollectHandler.Collect(errorList);// 将错误信息发送钉钉if(!CollectionUtils.isEmpty(errorList)){DingDingSendMsg.sendMsg(true,Robot.getCxt().getConfig().getDingURL(), JSON
.toJSONString(errorList),null);}// 睡眠CollectHandler.toSleep();}}publicstaticvoidmain(String[] args){start();}}
基础类构建
RPA上下文
@DatapublicclassRpaContext{// 当前浏览器privateWebDriver driver;// 加载的文件配置privateJSONConfig config;}
配置类
@DatapublicclassJSONConfig{/** 登录网址*/privateString openUrl ="";/** 公司名*/privateString companyName ="";/** 手机号*/privateString phoneNumber ="";/** 密码*/privateString password ="";/** 验证是否已采集url*/privateString checkFactoringUrl ="";/** 保理保存url*/privateString saveFactoringUrl ="";/** 发送钉钉消息url*/privateString dingURL ="";/** 保存日志url*/privateString saveLogUrl;/** 发送钉钉-保存日志异常*/privateString logDingURL;}
运行配置(可忽略)
@DatapublicclassRunConfig{privateString invokeTime;privatelong lastTime;privatelong nextTime =0L;privateString runTaskName;privatelong runTaskStartTime;privatelong runTaskEndTime;privateint errorIndex;// 一次任务循环间隔privatelong timeInterva;// 某一批次明细是否采集privateboolean collection;privateboolean down;privateboolean approve;privatelong pushIndex;privatelong rpaStart;publicStringgetLogFileUrl(){try{String fileUrl =FileWriteTools.fileDir +"/"+newSimpleDateFormat("yyyy-MM-dd").format(newDate())+".txt";return fileUrl;}catch(Exception e){return"";}}}
浏览器工具
// 浏览器工具publicclassBrowserTools{// 私有化工具类privateBrowserTools(){}publicstaticLogger logger =LoggerFactory.getLogger(BrowserTools.class);/**
* 生产谷歌浏览器引擎
*/publicstaticWebDriverchromDriver(){WebDriver driver =newChromeDriver();return driver;}publicstaticWebDriverinternetExplorerDriver(boolean wait){DesiredCapabilities ieCapabilities =DesiredCapabilities.internetExplorer();// 启用 {@link #FORCE_CREATE_PROCESS} 时定义使用的 IE CLI 切换的功能
ieCapabilities.setCapability(InternetExplorerDriver.IE_SWITCHES,"-private");// 定义在操作期间使用本机事件还是 JavaScript 事件的能力
ieCapabilities.setCapability(InternetExplorerDriver.NATIVE_EVENTS,false);// 定义在 IEDriverServer 启动期间忽略非浏览器保护模式设置的能力
ieCapabilities.setCapability(InternetExplorerDriver.INTRODUCE_FLAKINESS_BY_IGNORING_SECURITY_DOMAINS,true);// 定义在 IEDriverServer 启动 IE 之前清理或不清理浏览器缓存的能力
ieCapabilities.setCapability(InternetExplorerDriver.IE_ENSURE_CLEAN_SESSION,true);// 启用此功能以默认接受所有 SSL(安全套接字协议) 证书
ieCapabilities.setCapability(CapabilityType.ACCEPT_SSL_CERTS,true);
ieCapabilities.setJavascriptEnabled(true);// 需要窗口焦点
ieCapabilities.setCapability("requireWindowFocus",true);// 不启用持久悬停
ieCapabilities.setCapability("enablePersistentHover",false);if(wait){// 页面加载策略
ieCapabilities.setCapability("pageLoadStrategy","none");}WebDriver driver =newInternetExplorerDriver(ieCapabilities);return driver;}publicstaticWebDriverwebDriver(String name){String[] names = name.replace(",",",").split(",");if("ie".equals(names[0])){if(names.length >1){returninternetExplorerDriver("no".equals(names[1]));}else{returninternetExplorerDriver(false);}}elseif("chrom".equals(names[0])){returnchromDriver();}returnnull;}}
加载系统变量(浏览器等)
publicclassLoadSystemTools{privatestaticfinalString JACOB_DLL_PATH ="C:/Windows/System32/jacob-1.19-x64.dll";privatestaticfinalString SAVE_FILE_URL ="plug/SaveIEFile.exe";publicstaticLogger logger =LoggerFactory.getLogger(LoadSystemTools.class);/**
* @description 获取当前工程的根路径
* @createTime 2020年3月9日下午6:18:45
* @version 1.0.0
* @return
*/publicstaticStringgetRootPath(){String path ="";try{
path =URLDecoder.decode(LoadSystemTools.class.getResource("/").getPath().replaceFirst("/",""),"utf-8");}catch(Exception e){thrownewRuntimeException("解析URL异常");}return path;}publicstaticvoidloadSystem(){// 用于加载系统中的变量System.setProperty(LibraryLoader.JACOB_DLL_PATH, JACOB_DLL_PATH);// 运行// String iePath = getRootPath() + "drivers/IEDriverServer.exe";// 打包String iePath ="drivers/IEDriverServer.exe";System.setProperty("webdriver.ie.driver", iePath);// 运行// String chromPath = getRootPath() + "drivers/chromedriver.exe";// 打包String chromPath ="drivers/chromedriver.exe";System.setProperty("webdriver.chrome.driver", chromPath);}publicstaticvoidloadSystemForTest(){System.setProperty(LibraryLoader.JACOB_DLL_PATH, JACOB_DLL_PATH);System.setProperty("webdriver.ie.driver","src/main/resources/drivers/IEDriverServer.exe");System.setProperty("webdriver.chrome.driver","src/main/resources/drivers/chromedriver.exe");}publicstaticStringgetSaveIEFileUrl(){returngetRootPath()+ SAVE_FILE_URL;}
操作类
publicclassOperateHandler{privatestaticfinalString ID ="id";privatestaticfinalString XPATH ="xpath";privatestaticfinalString click ="click";// 点击privatestaticfinalString sendKeys ="sendKeys";// 赋值privatestaticfinalString getText ="getText";// 获取值privatestaticfinalString getAttribute ="getAttribute";// 获取input值/**
* 机器人无异常操作
* @param methodKey 键
* @param methodVal 值
* @param operation 操作
* @param param 操作参数
* @param errorRetryTime 错误重试次数
* @return void
*/publicstaticvoidrobotOperationNoException(String methodKey,String methodVal,String operation,String param,int errorRetryTime){try{robotOperation(methodKey, methodVal, operation, param, errorRetryTime);}catch(Exception e){LogManager.logError(null,"robotOperationNoException->异常");}}publicstaticStringrobotOperation(String methodKey,String methodVal,String operation,CharSequence param){returnrobotOperation(methodKey, methodVal, operation, param,3);}publicstaticStringrobotOperation(String methodKey,String methodVal,String operation,CharSequence param,int errorRetryTime){RpaContext cxt =RobotFactoring.getCxt();boolean isGoOn =true;int i =0;String res ="";while(isGoOn){try{switch(operation){case click:click(findElement(methodKey, methodVal));
isGoOn =false;break;case sendKeys:findElement(methodKey, methodVal).sendKeys(param);
isGoOn =false;break;case getText:
res =findElement(methodKey, methodVal).getText();
isGoOn =false;break;case getAttribute:
res =findElement(methodKey, methodVal).getAttribute("value");
isGoOn =false;break;default:
isGoOn =false;}ThreadTools.sleepMillis(50);}catch(Exception e){
i++;ThreadTools.sleepMillis(300);if(i > errorRetryTime){LogManager.logError(null,"执行参数:operation="+ operation +",methodKey="+ methodKey
+",methodVal="+ methodVal +",param="+ param +", 执行次数="+(i-1));thrownewRuntimeException(e.getMessage());}}}return res;}publicstaticWebElementfindElement(String methodKey,String methodVal){RpaContext cxt =RobotFactoring.getCxt();WebElement ele = cxt.getDriver().findElement(locator(methodKey, methodVal));return ele;}/**
* 点击事件
* 如果超时忽略报错
*
* @param webElement
*/publicstaticvoidclick(WebElement webElement){try{
webElement.click();}catch(TimeoutException e){
e.printStackTrace();}catch(Exception e){throw e;}}/**
* 查找元素集合
* @param methodKey
* @param methodVal
* @return List<WebElement>
*/publicstaticList<WebElement>findElements(String methodKey,String methodVal)throwsException{RpaContext cxt =RobotFactoring.getCxt();List<WebElement> elements =null;try{
elements = cxt.getDriver().findElements(locator(methodKey, methodVal));}catch(Exception e){LogManager.logError(e,"查询多个元素异常,methodKey:"+ methodKey +", methodVal:"+ methodVal);}return elements;}publicstaticbooleanelementExist(String methodKey,String methodVal){try{RobotFactoring.getCxt().getDriver().findElement(locator(methodKey, methodVal));returntrue;}catch(Exception e){returnfalse;}}/**
* 判断元素集合是否存在
* @param methodKey
* @param methodVal
* @return boolean
*/publicstaticbooleanelementsExist(String methodKey,String methodVal){try{List<WebElement> elements =RobotFactoring.getCxt().getDriver().findElements(locator(methodKey, methodVal));if(CollectionUtils.isEmpty(elements)){returnfalse;}returntrue;}catch(Exception e){returnfalse;}}privatestaticBylocator(String methodKey,String methodVal){if("id".equals(methodKey)){returnBy.id(methodVal);}elseif("name".equals(methodKey)){returnBy.name(methodVal);}elseif("className".equals(methodKey)){returnBy.className(methodVal);}elseif("tagName".equals(methodKey)){returnBy.tagName(methodVal);}elseif("linkText".equals(methodKey)){returnBy.linkText(methodVal);}elseif("partialLinkText".equals(methodKey)){returnBy.partialLinkText(methodVal);}elseif("xpath".equals(methodKey)){returnBy.xpath(methodVal);}elseif("css".equals(methodKey)){returnBy.cssSelector(methodVal);}else{returnnull;}}publicstaticvoidclose(WebDriver driver){
driver.close();// cmd 关闭应用Runtime rt =Runtime.getRuntime();try{// rt.exec("cmd.exe /C start /b taskkill /f /t /im iexplore.exe /im chrome.exe /im IEDriverServer.exe /im chromedriver.exe");
rt.exec("cmd.exe /C start /b taskkill /f /t /im iexplore.exe /im IEDriverServer.exe");Thread.sleep(3000L);}catch(Exception e){
e.printStackTrace();}}}
参数初始化
publicclassRpaRobotConfig{publicstaticvoidinit(){LoadSystemTools.loadSystem();RpaContext cxt =newRpaContext();ExcelConfig config =newExcelConfig();RunConfig runConfig =newRunConfig();
runConfig.setInvokeTime(newSimpleDateFormat("yyyy-MM-dd").format(newDate()));
runConfig.setRunTaskName("RPA启动流程");try{InputStream is =FactoringRpaRobotConfig.class.getResourceAsStream("/config/config.json");BufferedReader br =newBufferedReader(newInputStreamReader(is,StandardCharsets.UTF_8));String s="";String configContentStr ="";try{while((s=br.readLine())!=null){
configContentStr = configContentStr+s;}}catch(IOException e){
e.printStackTrace();}JSONConfig jsonConfig =JSONObject.parseObject(configContentStr,JSONConfig.class);
cxt.setConfig(jsonConfig);
cxt.setRunConfig(runConfig);RobotFactoring.setCxt(cxt);}catch(Exception e){LogManager.logError(e,"rpa初始化异常");}}}
打开浏览器,登录
publicclassLogInOutHandler{/**
* 开启浏览器,并开始登录
* @param
* @return void
* @author yuanyaheng
* @date 2022/4/21 9:59
*/publicstaticvoidopenBrowserStart(){RpaContext cxt =RobotFactoring.getCxt();try{quitDriver(cxt.getDriver());WebDriver driver =BrowserTools.internetExplorerDriver(false);RobotFactoring.getCxt().setDriver(driver);
cxt.setDriver(driver);// 打开浏览器
driver.manage().window().maximize();System.out.println("准备打开页面");// 访问页面
driver.navigate().to(cxt.getConfig().getOpenUrl());}catch(Exception e){LogManager.logError(e,"openBrowserStart登录异常");}int loginTimes =1;// 首次登录,登录等待时间为4秒startLogin(cxt,loginTimes,RobotConstants.LOGIN_WAIT_TIME);}privatestaticvoidquitDriver(WebDriver driver){try{if(driver !=null){LogManager.logInfo("quitDriver->开启清理老版浏览器进程");
driver.close();
driver.quit();Runtime.getRuntime().exec("taskkill /F /IM IEDriverServer.exe");}}catch(Exception e){LogManager.logError(e,"quitDriver->浏览器关闭异常");}// 老版浏览器清理后,等待10秒再次开启新的旅程ThreadTools.sleep(10);}/**
* 开始登录
* @param cxt
* @param loginTimes 登录次数
* @param loginWaitTime 点击登录等待时间
* @return void
* @author yuanyaheng
* @date 2022/4/21 9:58
*/publicstaticvoidstartLogin(RpaContext cxt,int loginTimes,int loginWaitTime){ThreadTools.sleep(2);try{login(cxt,loginWaitTime);// 登录后检查登录状态if(checkLoginStatus(cxt)){// 登录成功打印日志LogManager.getInstance().loginSuccess();}else{// 登录失败则重新访问页面重新开始登录流程// cxt.getDriver().get(cxt.getConfig().getOpenUrl());
loginTimes++;// 登录失败等待时间+1秒
loginWaitTime++;if(loginTimes >=5){// 关闭浏览器OperateHandler.close(cxt.getDriver());LogManager.logInfo("连续五次登录失败,准备重新打开浏览器,尝试登录");// 重新打开浏览器openBrowserStart();}else{LogManager.getInstance().loginFailure("登录失败,尝试重新登录,当前登录次数:"+ loginTimes);ThreadTools.sleep(2);// 重新开始登录流程 ,递归startLogin(cxt,loginTimes,loginWaitTime);}}}catch(Exception e){LogManager.logError(e,"startLogin登录异常");ThreadTools.sleep(2);}}/**
* 检查登录状态
* @param
* @return void
* @author yuanyaheng
* @date 2022/4/1 16:17
*/publicstaticbooleancheckLoginStatus(RpaContext cxt){// 元素 首页 存在,则为trueif(clickFirstPageValidate()){returntrue;}try{// 若不存在,则重新访问页面
cxt.getDriver().get(cxt.getConfig().getOpenUrl());}catch(Exception e){LogManager.logError(e,"重新访问页面失败"+ e.getMessage());// 访问页面发生异常,则关闭浏览器OperateHandler.close(cxt.getDriver());ThreadTools.sleep(2);// 重新打开浏览器FactoringLogInOutHandler.openBrowserStart();}ThreadTools.sleep(3);// 检测是否存在密码框if(OperateHandler.elementExist(RobotConstants.ID,"password")){LogManager.logInfo("checkLoginStatus->检测到登录密码框,需要重新登录");ThreadTools.sleep(2);returnfalse;}// 递归returncheckLoginStatus(cxt);}/**
* 检查元素 首页 是否存在
* @param
* @return boolean
* @author yuanyaheng
* @date 2022/4/1 16:59
*/publicstaticbooleanclickFirstPageValidate(){try{// 首页按钮是否存在boolean elementExist =OperateHandler.elementExist(RobotConstants.XPATH,"//div[@class='header']/div[2]/ul/a[1]");if(elementExist){// 若存在则点击验证OperateHandler.robotOperation(RobotConstants.XPATH,"//div[@class='header']/div[2]/ul/a[1]",RobotConstants.click,null);returntrue;}}catch(Exception e){LogManager.logError(null,"建信保理首页无法点击");}returnfalse;}publicstaticvoidlogin(RpaContext cxt,int loginWaitTime){// 输入公司名OperateHandler.robotOperationNoException(RobotConstants.XPATH,"//input[@name='name']",RobotConstants.sendKeys,
cxt.getConfig().getCompanyName(),1);ThreadTools.sleepMillis(100);// 输入手机号OperateHandler.robotOperationNoException(RobotConstants.XPATH,"//input[@name='mobile']",RobotConstants.sendKeys,
cxt.getConfig().getPhoneNumber(),1);ThreadTools.sleepMillis(100);// 输入密码OperateHandler.robotOperationNoException(RobotConstants.ID,"password",RobotConstants.sendKeys,
cxt.getConfig().getPassword(),1);ThreadTools.sleepMillis(100);// 点击登录OperateHandler.robotOperationNoException(RobotConstants.ID,"loginSubmit",RobotConstants.click,
cxt.getConfig().getPassword(),1);ThreadTools.sleep(loginWaitTime);}/**
* 登出
* @param cxt
* @return void
* @author yuanyaheng
* @date 2022/4/6 16:45
*/publicstaticbooleanlogout(RpaContext cxt){try{// 判断 展示退出登录是否存在if(OperateHandler.elementExist(RobotConstants.XPATH,"//div[@class='logout ivu-dropdown']//div[@class='ivu-dropdown-rel']//i[@class='ivu-icon ivu-icon-ios-arrow-down']")){// 若存在,则将鼠标移动到该位置WebElement element = cxt.getDriver().findElement(By.xpath("//div[@class='logout ivu-dropdown']//div[@class='ivu-dropdown-rel']//i[@class='ivu-icon ivu-icon-ios-arrow-down']"));Actions action =newActions(cxt.getDriver());
action.moveToElement(element);}else{returnfalse;}ThreadTools.sleep(1);// 判断退出登录按钮是否存在if(OperateHandler.elementExist(RobotConstants.XPATH,"//div[@class='ivu-select-dropdown logout-dropdown']//ul[@class='ivu-dropdown-menu']//li[@class='ivu-dropdown-item']")){// 若存在,使用js进行点击操作(因为该元素隐藏,driver.click()无法进行操作)WebElement logout = cxt.getDriver().findElement(By.xpath("//div[@class='ivu-select-dropdown logout-dropdown']//ul[@class='ivu-dropdown-menu']//li[@class='ivu-dropdown-item']"));JavascriptExecutor js =(JavascriptExecutor) cxt.getDriver();
js.executeScript("arguments[0].click()", logout);ThreadTools.sleep(3);}else{returnfalse;}// 判断确认按钮是否存在if(OperateHandler.elementExist(RobotConstants.XPATH,"//div[@class='modal-footer']//button[@class='ivu-btn ivu-btn-primary']")){// 点击确认OperateHandler.click(cxt.getDriver().findElement(By.xpath("//div[@class='modal-footer']//button[@class='ivu-btn ivu-btn-primary']")));}else{returnfalse;}returntrue;}catch(Exception e){LogManager.logInfo("logout->退出登录失败");returnfalse;}}}
采集
采集过程要注意的是,每次采集后,如果发生了跳转其他页面的情况,那么driver就会发生变化,当本页数据采集完,再返回之前页面,元素就会失效。获取当前页的数个融资编号,然后根据融资编号点击对应详情按钮,采集完毕返回后,需要重新获取融资编号。
若需要持续性的采集,那么在代码中就需要对所有可能出现的异常进行处理,防止运行时因为异常导致中断。
packagecom.banksteel.finance.rpa.factoring;importcom.alibaba.fastjson.JSONObject;importcom.banksteel.finance.rpa.config.ExcelConfig;importcom.banksteel.finance.rpa.config.RobotFactoring;importcom.banksteel.finance.rpa.config.RpaContext;importcom.banksteel.finance.rpa.constant.RobotConstants;importcom.banksteel.finance.rpa.log.LogManager;importcom.banksteel.finance.rpa.tools.DateUtil;importcom.banksteel.finance.rpa.tools.HttpClientUtil;importcom.banksteel.finance.rpa.tools.StringTools;importcom.banksteel.finance.rpa.tools.ThreadTools;importcom.google.gson.JsonObject;importjava.math.BigDecimal;importjava.text.DecimalFormat;importjava.util.ArrayList;importjava.util.Date;importjava.util.HashMap;importjava.util.List;importjava.util.Map;importjava.util.logging.Logger;importorg.openqa.selenium.By;importorg.openqa.selenium.InvalidElementStateException;importorg.openqa.selenium.Keys;importorg.openqa.selenium.NoSuchElementException;importorg.openqa.selenium.StaleElementReferenceException;importorg.openqa.selenium.WebDriver;importorg.openqa.selenium.WebElement;importorg.springframework.util.StringUtils;/**
* @version 1.0.0
*/publicclassFactoringCollectHandler{privatestaticExcelConfig excelConfig;privatestaticWebDriver driver;privatestaticfinalint RETRY_MAX_TIME =5;/**
* 保理采集
* @param errorList 错误信息集合
* @return void
*/publicstaticvoidfactoringCollect(List<Map<String,String>> errorList){
driver =RobotFactoring.getCxt().getDriver();boolean retryFlag =true;int retryTime =1;while(retryFlag){// 定义初始页数int page =1;try{// 已采集融信编号List<String> usedLetterNum =newArrayList<>();// 不采集融信编号List<String> notCollectList =newArrayList<>();// 获取不采集集合getNotCollectList(notCollectList);LogManager.logInfo("factoringCollect->不采集集合为:"+notCollectList);// 采集数据
retryFlag =collectData(errorList, page, usedLetterNum, notCollectList);}catch(Exception e){LogManager.logError(e,"factoringCollect->采集异常,当前重试次数"+ retryTime);// 若重试次数大于等于3,则返回首页,或重新登录if(retryTime >=3){LogManager.logInfo("factoringCollect->当前重试次数大于等于3,尝试返回首页,或重新登录");boolean loginStatus =FactoringLogInOutHandler.checkLoginStatus(RobotFactoring.getCxt());if(!loginStatus){int loginTime =1;FactoringLogInOutHandler.startLogin(RobotFactoring.getCxt(), loginTime,RobotConstants.LOGIN_WAIT_TIME);
retryTime =1;}}
retryTime++;if(retryTime >= RETRY_MAX_TIME){
retryFlag =false;LogManager.logInfo("factoringCollect->当前重试次数超出上线,停止重试");}}}}/**
* 获取不采集集合
* @param notCollectList
* @return void
*/privatestaticvoidgetNotCollectList(List<String> notCollectList)throwsInterruptedException{// 获取 待签收 列表中融信编号String pendingReceipt ="//div[@class='main-tabs']/div[1]";getAccNumAllPage(pendingReceipt, driver, notCollectList);// 获取 转让 列表中融信编号String transfer ="//div[@class='main-tabs']/div[2]";getAccNumAllPage(transfer, driver, notCollectList);// 获取 待缴费 列表中融信编号String pendingPayment ="//div[@class='main-tabs']/div[3]";getAccNumAllPage(pendingPayment, driver, notCollectList);}/**
* 采集数据
* @param errorList
* @param page
* @param usedLetterNum
* @param notCollectList
* @return void
*/privatestaticbooleancollectData(List<Map<String,String>> errorList,int page,List<String> usedLetterNum,List<String> notCollectList){while(true){// 点击 全部融信// click(driver.findElement(By.xpath("//div[@class='main-tabs']/div[4]")));try{OperateHandler.robotOperation(RobotConstants.XPATH,"//div[@class='main-tabs']/div[4]",RobotConstants.click,null);ThreadTools.sleepMillis(5000L);// 输入页码,输入失败,则停止循环if(!inputPage(driver, page)){// 返回true 继续触发重试returntrue;}// 获取票据信息 融信编号List<WebElement> spans = driver.findElements(By.xpath("//span[@class='margin-right-1']"));if(spans.size()==0){
page =1;System.out.println("跳出本次循环检测");LogManager.logInfo("collectData->当前页融信编号个数为0,跳出本次检测,当前页码为:"+ page);// 返回true 继续触发重试returntrue;}boolean pageFlagPlus =true;for(WebElement span : spans){String accNoText = span.getText();// 判断当前 融资编号是否需要采集if(!needCollect(usedLetterNum, notCollectList, accNoText)){continue;}
pageFlagPlus =false;
usedLetterNum.add(accNoText);// 找到当前融信编号父元素 /fl/list-header/table-listWebElement parent = span.findElement(By.xpath("./../../.."));// 点击 查看详情 按钮try{OperateHandler.click(parent.findElement(By.xpath("./div[@class='list-header']//button[@class='btn btn-middle current ivu-btn ivu-btn-default']")));}catch(NoSuchElementException e){if(e.getMessage().contains("Unable to find element")){// 建信 融资没有详情按钮跳过
usedLetterNum.remove(accNoText);
notCollectList.add(accNoText);LogManager.logInfo("collectData->未找到当前融信编号对应详情按钮,当前页码为:"+ page +",当前融信编号为:"+ accNoText);break;}else{throw e;}}ThreadTools.sleepMillis(3000L);JSONObject jsonObject =newJSONObject();// 采集详情页信息boolean breakFlag =collect(errorList, jsonObject, accNoText, usedLetterNum, notCollectList);// 若返回结果中break标志为false,则当前数据不需要采集,直接跳过if(!breakFlag){// 由于页面切换,使用continue会导致元素找不到,因此使用breakbreak;}
jsonObject.put("openDate",DateUtil.formatDate(newDate(),"yyyy-MM-dd"));BigDecimal sum =newBigDecimal(jsonObject.getString("serviceFee")).add(newBigDecimal(jsonObject.getString("financingFee")));DecimalFormat df =newDecimalFormat("0.00");
jsonObject.put("otherFees", df.format(sum));
jsonObject.put("infoResource",1);
jsonObject.remove("serviceFee");
jsonObject.remove("financingFee");// 接口调用保存值System.out.println(jsonObject);LogManager.logInfo("本次采集数据:",jsonObject);try{httpConnectionSave(jsonObject, errorList);}catch(Exception e){LogManager.logError(e,"保理信息保存失败");}// 点击关闭返回OperateHandler.click(driver.findElement(By.xpath("//button[@class='btn current ivu-btn ivu-btn-primary']")));break;}if(pageFlagPlus){
page++;}// 处理未知异常弹窗handleUnknownWindow();ThreadTools.sleepMillis(5000L);int pageMax;// 判断总条数是否存在if(OperateHandler.elementExist(RobotConstants.XPATH,"//ul[@class='page fr ivu-page']//span[@class='ivu-page-total']")){// 获取页面最大页数String totalMax = driver.findElement(By.xpath("//ul[@class='page fr ivu-page']//span[@class='ivu-page-total']")).getText().replace("共","").replace("条","");
pageMax =Integer.parseInt(totalMax.trim())/5;if(Integer.parseInt(totalMax.trim())%5!=0){
pageMax++;}}else{// 若发生意外情况,则使最大页等于当前页,保证继续执行
pageMax = page;}// 循环大于最大页数时循环跳出if(page > pageMax){// 返回false 不再重试returnfalse;}}catch(Exception e){// 采集过程发生异常,则返回true继续重试// 点击关闭返回try{if(OperateHandler.elementExist(RobotConstants.XPATH,"//button[@class='btn current ivu-btn ivu-btn-primary']")){OperateHandler.click(driver.findElement(By.xpath("//button[@class='btn current ivu-btn ivu-btn-primary']")));LogManager.logInfo("collectData->采集过程发生异常,点击关闭按钮返回重试");}}catch(Exception ex){LogManager.logInfo("collectData->采集过程发生异常,点击关闭按钮失败,返回重试");returntrue;}returntrue;}LogManager.logInfo("已采集集合:"+ usedLetterNum +" , page = "+ page);}}/**
* 处理未知异常弹窗
* @param
* @return void
*/privatestaticvoidhandleUnknownWindow(){//检查未知弹窗确认按钮是否存在boolean elementExist =OperateHandler.elementExist(RobotConstants.XPATH,"//div[@class='modal-footer']//button[@class='ivu-btn ivu-btn-primary']//span[text()='确认']");try{if(elementExist){OperateHandler.click(driver.findElement(By.xpath("//div[@class='modal-footer']//button[@class='ivu-btn ivu-btn-primary']//span[text()='确认']")));}}catch(Exception e){LogManager.logInfo("点击未知弹窗失败");}}/**
* 采集详情页面数据
* @param errorList
* @param jsonObject
* @param accNoText
* @param usedLetterNum
* @param notCollectList
* @return void
*/privatestaticbooleancollect(List<Map<String,String>> errorList,JSONObject jsonObject,String accNoText,List<String> usedLetterNum,List<String> notCollectList){boolean continueFlag =true;String errorMsg ="";try{// 获取融信编号对应的数据String accNo =getEleTextByText(driver,"融信编号:",RobotConstants.SPAN,1);if(StringTools.isTrimEmpty(accNo)){
errorMsg +="融信编号详情页为空,";}if(!accNo.equals(accNoText)){
usedLetterNum.remove(accNoText);// 点击返回重试OperateHandler.click(driver.findElement(By.xpath("//button[@class='btn current ivu-btn ivu-btn-primary']")));returnfalse;}
jsonObject.put("accNo", accNo);// 判断 预计 是否存在if(OperateHandler.elementExist(RobotConstants.XPATH,"//div[@class='mt20 bg-F9FAFB content-style ivu-row']//span[text()='预计']")){// 若 预计 存在,则说明当前数据不采集
usedLetterNum.remove(accNoText);
notCollectList.add(accNoText);// 点击返回重试OperateHandler.click(driver.findElement(By.xpath("//button[@class='btn current ivu-btn ivu-btn-primary']")));LogManager.logInfo("collect->当前融信详情未到达采集状态,当前融信编号为:"+accNoText);returnfalse;}String preReceiver =getEleTextByText(driver,"采购商名称:",RobotConstants.SPAN,1);if(StringTools.isTrimEmpty(preReceiver)){
errorMsg +="采购商名称详情页为空,";}
jsonObject.put("preReceiver", preReceiver);String factorFinancAmt =getEleTextByText(driver,"融资金额(元):",RobotConstants.SPAN,1).replace(",","");if(StringTools.isTrimEmpty(factorFinancAmt)){
errorMsg +="融资金额详情页为空,";}
jsonObject.put("factorFinancAmt", factorFinancAmt);String factorRate =getEleTextByText(driver,"融资利率:",RobotConstants.SPAN,1).replace("%(年化)","");if(StringTools.isTrimEmpty(factorRate)){
errorMsg +="融资利率详情页为空,";}
jsonObject.put("factorRate", factorRate);String accDate =getEleTextByText(driver,"承诺付款日期:",RobotConstants.SPAN,1);if(StringTools.isTrimEmpty(accDate)){
errorMsg +="承诺付款日期详情页为空,";}
jsonObject.put("accDate", accDate);String accountDate =getEleTextByText(driver,"放款日期:",RobotConstants.SPAN,1);if(StringTools.isTrimEmpty(accountDate)){
errorMsg +="放款日期详情页为空,";}
jsonObject.put("accountDate", accountDate);String financingCost =getEleTextByText(driver,"融资利息(元):",RobotConstants.SPAN,1).replace(",","");if(StringTools.isTrimEmpty(financingCost)){
errorMsg +="融资利息(元)详情页为空,";}
jsonObject.put("financingCost", financingCost);String financingFee =getEleTextByText(driver,"融资费用(元):",RobotConstants.SPAN,1).replace(",","");if(StringTools.isTrimEmpty(financingFee)){
errorMsg +="融资费用详情页为空,";}
jsonObject.put("financingFee", financingFee.replace(",",""));String serviceFee =getEleTextByText(driver,"服务费用(元):",RobotConstants.SPAN,1);if(StringTools.isTrimEmpty(serviceFee)){
errorMsg +="服务费用详情页为空,";}
jsonObject.put("serviceFee", serviceFee.replace(",",""));if(StringTools.isNotTrimEmpty(errorMsg)){// 含有错误信息储存信息
errorMsg = errorMsg.substring(0, errorMsg.length()-1);Map<String,String> errorMap =newHashMap<>();
errorMap.put(accNoText, accNoText +"保利信息"+ errorMsg);
errorList.add(errorMap);}return continueFlag;}catch(Exception e){// 采集详情信息过程发生异常,则直接将false放入map返回returnfalse;}}/**
* 获取文本对应元素文本
* @param driver driver
* @param text 查询的文本
* @param neighborEleType 相邻元素类型
* @param neighborNum 相邻元素索引(1开始)
* @return String
*/privatestaticStringgetEleTextByText(WebDriver driver,String text,String neighborEleType,Integer neighborNum){// 拼接查询文本参数 //span[text()='融信编号:']/following-sibling::span[1]StringBuffer stringBuffer =newStringBuffer();
stringBuffer.append("//span[text()='");
stringBuffer.append(text);if(!StringUtils.isEmpty(neighborEleType)&& neighborNum !=null){
stringBuffer.append("']/following-sibling::");
stringBuffer.append(neighborEleType);
stringBuffer.append("[");
stringBuffer.append(neighborNum);
stringBuffer.append("]");}String xpath = stringBuffer.toString();String result = driver.findElement(By.xpath(xpath)).getText();return result;}/**
* 判断当前融资编号对应信息是否需要采集
* @param usedLetterNum
* @param notCollectList
* @param accNum
* @return boolean
*/privatestaticbooleanneedCollect(List<String> usedLetterNum,List<String> notCollectList,String accNum){// 不采集集合包含,则falseif(notCollectList.contains(accNum)){returnfalse;}// 保理采集原始表包含该账户则跳过// 调用接口判断是否储存if(httpConnectionSelect(accNum)){returnfalse;}// 已采集集合包含则falseif(usedLetterNum.contains(accNum)){returnfalse;}returntrue;}/**
* 获取除 全部融信 之外三个状态 不需要采集的数据的融信编号
* @param clickStr
* @param driver
* @param notCollectList
* @return void
*/privatestaticvoidgetAccNumAllPage(String clickStr,WebDriver driver,List<String> notCollectList)throwsInterruptedException{int page =1;while(true){// 点击 待签收OperateHandler.robotOperation(RobotConstants.XPATH, clickStr,RobotConstants.click,null);ThreadTools.sleep(2);// 输入页码inputPage(driver, page);ThreadTools.sleep(2);// 查询当前页 融资编号List<WebElement> spans = driver.findElements(By.xpath("//span[@class='margin-right-1']"));if(spans.size()==0){
page =1;System.out.println("跳出本次循环检测");break;}// 将融资编号添加到不采集集合for(WebElement span : spans){String accNoText = span.getText();
notCollectList.add(accNoText);}
page++;// 查询总条数String totalMax = driver.findElement(By.xpath("//ul[@class='page fr ivu-page']//span[@class='ivu-page-total']")).getText().replace("共","").replace("条","");int pageMax =Integer.parseInt(totalMax.trim())/5;if(Integer.parseInt(totalMax.trim())%5!=0){
pageMax++;}// 当前页码大于总页数跳出循环if(page>pageMax){break;}}}/**
* 输入页码,失败则返回false
* @param driver
* @param page
* @return boolean
*/privatestaticbooleaninputPage(WebDriver driver,int page){try{// 找到页码输入框,并清除当前输入框
driver.findElement(By.xpath("//div[@class='ivu-page-options-elevator']//input")).clear();ThreadTools.sleepMillis(500L);// 输入当前页码
driver.findElement(By.xpath("//div[@class='ivu-page-options-elevator']//input")).sendKeys(page +"");ThreadTools.sleepMillis(500L);// 键入 enter键
driver.findElement(By.xpath("//div[@class='ivu-page-options-elevator']//input")).sendKeys(Keys.ENTER);ThreadTools.sleepMillis(500L);// // 找到页码输入框,并清除当前输入框// OperateHandler.findElement(RobotConstants.XPATH, "//div[@class='ivu-page-options-elevator']//input").clear();// // 输入当前页码// OperateHandler.robotOperation(RobotConstants.XPATH,"//div[@class='ivu-page-options-elevator']//input",RobotConstants.sendKeys,page+"");// // 键入 enter键// OperateHandler.robotOperation(RobotConstants.XPATH,"//div[@class='ivu-page-options-elevator']//input",RobotConstants.sendKeys,Keys.ENTER);System.out.println("------");ThreadTools.sleepMillis(5000L);}catch(InvalidElementStateException e){String errorMessage = e.getMessage();if(errorMessage.contains("Element is no longer valid")){// 没有查询到分页元素 不再循环returnfalse;}}returntrue;}/**
* 唯一性校验接口
*
* @param letterNum
* @return
*/privatestaticbooleanhttpConnectionSelect(String letterNum){JSONObject jsonObject =newJSONObject();
jsonObject.put("accNo", letterNum);// https://mgt.banksteel.com/finance-funds/api/rpaFactoringSelect.htmJSONObject result =HttpClientUtil.httpPost(RobotFactoring.getCxt().getConfig().getCheckFactoringUrl(), jsonObject,false);if(result.getBoolean("status")){returntrue;}returnfalse;}/**
* 发送http请求,保存数据
* @param jsonObject
* @param errorList
* @return boolean
*/privatestaticbooleanhttpConnectionSave(JSONObject jsonObject,List<Map<String,String>> errorList){// "https://mgt.banksteel.com/finance-funds/api/getRpaFactoringCollect.htm"JSONObject result =HttpClientUtil.httpPost(RobotFactoring.getCxt().getConfig().getSaveFactoringUrl(), jsonObject,false);if(result.getBoolean("status")){returntrue;}Map<String,String> errorMap =newHashMap<>();
errorMap.put(jsonObject.getString("accNo"), result.getString("msg"));
errorList.add(errorMap);returnfalse;}/**
* 系统睡眠
* @param
* @return void
*/publicstaticvoidtoSleep(){RpaContext cxt =RobotFactoring.getCxt();// 登出,并返回结果boolean isLogout =FactoringLogInOutHandler.logout(cxt);LogManager.logInfo("rpa开始睡眠...");// 线程睡眠半个小时ThreadTools.sleep(30*60);if(isLogout){LogManager.logInfo("rpa睡眠结束...");// 重新访问网页
driver.navigate().to(cxt.getConfig().getOpenUrl());// 登录int loginTime =1;FactoringLogInOutHandler.startLogin(cxt,loginTime,RobotConstants.LOGIN_WAIT_TIME);}else{// 若登出失败,则浏览器关闭,重新打开浏览器FactoringLogInOutHandler.openBrowserStart();}ThreadTools.sleep(6);}}
睡眠
睡眠方法在采集类的最下面,睡眠结束后需要检查当前系统状态,是否属于登录状态(检查首页按钮是否存在),若不是,则重新登录
使用总结
主要使用的内容为
WebDriver driver= BrowserTools.internetExplorerDriver(false);
driver.findElement
策略语法描述By iddriver.findElement(By.id())通过id属性定位元素By namedriver.findElement(By.name())通过name属性定位元素By class namedriver.findElement(By.className())通过class属性定位元素By tag namedriver.findElement(By.tagName())通过HTML标签名定位元素By link textdriver.findElement(By.linkText())通过链接内容定位元素By partial link textdriver.findElement(By.partialLinkText())通过部分链接内容定位元素By cssdriver.findElement(By.cssSelector())通过css选择器定位元素By xpathdriver.findElement(By.Xpath())通过xpath定位元素
主要记录一下xpath的语法
表达式描述nodename选取此节点的所有子节点。/从根节点选取(取子节点)。//从匹配选择的当前节点选择文档中的节点,而不考虑它们的位置(取子孙节点)。.选取当前节点。…选取当前节点的父节点。@选取属性。路径表达式结果bookstore选取 bookstore 元素的所有子节点。/bookstore选取根元素 bookstore。注释:假如路径起始于正斜杠( / ),则此路径始终代表到某元素的绝对路径!bookstore/book选取属于 bookstore 的子元素的所有 book 元素。//book选取所有 book 子元素,而不管它们在文档中的位置。bookstore//book选择属于 bookstore 元素的后代的所有 book 元素,而不管它们位于 bookstore 之下的什么位置。//@lang选取名为 lang 的所有属性。
谓语
路径表达式结果/bookstore/book[1]选取属于 bookstore 子元素的第一个 book 元素。/bookstore/book[last()]选取属于 bookstore 子元素的最后一个 book 元素。/bookstore/book[last()-1]选取属于 bookstore 子元素的倒数第二个 book 元素。/bookstore/book[position()❤️]选取最前面的两个属于 bookstore 元素的子元素的 book 元素。//title[@lang]选取所有拥有名为 lang 的属性的 title 元素。//title[@lang=‘eng’]选取所有 title 元素,且这些元素拥有值为 eng 的 lang 属性。/bookstore/book[price>35.00]选取 bookstore 元素的所有 book 元素,且其中的 price 元素的值须大于 35.00。/bookstore/book[price>35.00]//title选取 bookstore 元素中的 book 元素的所有 title 元素,且其中的 price 元素的值须大于 35.00。通配符描述*匹配任何元素节点。@*匹配任何属性节点。node()匹配任何类型的节点。路径表达式结果/bookstore/*选取 bookstore 元素的所有子元素。//选取文档中的所有元素。//title[@]选取所有带有属性的 title 元素。
文本选择,文本选择看似方便,其实有一定可能选取失败,具体原因不是很清楚。
//span[text()='预计']
使用 // 进行选取时,若选取不到,可将更多上级写出,以便选取,例如上方例子可写成
driver.findElement(
By.xpath("//div[@class='mt20 bg-F9FAFB content-style ivu-row']//span[text()='预计']"));
记录(登出按钮是隐藏状态,鼠标悬停才会展示登出,如何登出)
当按钮是隐藏状态的时候,使用click无法对齐进行操作,需要使用JavascriptExecutor进行点击操作
// 获取该元素(需要悬停的元素)
WebElement element = driver.findElement(By.xpath());
// 移动鼠标到该元素
Actions action = new Actions(driver);
action.moveToElement(element);
// 获取登出元素
WebElement logout = driver.findElement(By.xpath());
// 使用JavascriptExecutor执行点击
JavascriptExecutor js = (JavascriptExecutor) driver;
js.executeScript("arguments[0].click()", logout);
版权归原作者 java小恒 所有, 如有侵权,请联系我们删除。