0


Java使用selenium实现RPA采集机器人

Java使用selenium实现RPA采集机器人

​ 采集机器人主要应用采集部分网站数据,但是目前私自爬取部分网站数据可能涉及违法,请谨慎使用。

​ 主要使用的是打包运行的方式,因此使用main方法的形式

主流程

主流程主要为,参数初始化;启动浏览器登录;开启循环采集;采集结束,发送错误信息并睡眠;

publicclassRpaRobot{publicstaticvoidstart(){// 初始化参数配置RpaRobotConfig.init();LogManager.logInfo("rpa客户端启动中...");// 配置初始化,启动浏览器,登录LogInOutHandler.openBrowserStart();LogManager.logInfo("rpa客户端启动成功...");while(true){// 错误信息列表List<Map<String,String>> errorList =newArrayList<>();// 采集CollectHandler.Collect(errorList);// 将错误信息发送钉钉if(!CollectionUtils.isEmpty(errorList)){DingDingSendMsg.sendMsg(true,Robot.getCxt().getConfig().getDingURL(), JSON
                    .toJSONString(errorList),null);}// 睡眠CollectHandler.toSleep();}}publicstaticvoidmain(String[] args){start();}}

基础类构建

RPA上下文

@DatapublicclassRpaContext{// 当前浏览器privateWebDriver driver;// 加载的文件配置privateJSONConfig config;}

配置类

@DatapublicclassJSONConfig{/** 登录网址*/privateString openUrl ="";/** 公司名*/privateString companyName ="";/** 手机号*/privateString phoneNumber ="";/** 密码*/privateString password ="";/** 验证是否已采集url*/privateString checkFactoringUrl ="";/** 保理保存url*/privateString saveFactoringUrl ="";/** 发送钉钉消息url*/privateString dingURL ="";/** 保存日志url*/privateString saveLogUrl;/** 发送钉钉-保存日志异常*/privateString logDingURL;}

运行配置(可忽略)

@DatapublicclassRunConfig{privateString invokeTime;privatelong lastTime;privatelong nextTime =0L;privateString runTaskName;privatelong runTaskStartTime;privatelong runTaskEndTime;privateint errorIndex;// 一次任务循环间隔privatelong timeInterva;// 某一批次明细是否采集privateboolean collection;privateboolean down;privateboolean approve;privatelong pushIndex;privatelong rpaStart;publicStringgetLogFileUrl(){try{String fileUrl =FileWriteTools.fileDir +"/"+newSimpleDateFormat("yyyy-MM-dd").format(newDate())+".txt";return fileUrl;}catch(Exception e){return"";}}}

浏览器工具

// 浏览器工具publicclassBrowserTools{// 私有化工具类privateBrowserTools(){}publicstaticLogger logger =LoggerFactory.getLogger(BrowserTools.class);/**
     * 生产谷歌浏览器引擎
     */publicstaticWebDriverchromDriver(){WebDriver driver =newChromeDriver();return driver;}publicstaticWebDriverinternetExplorerDriver(boolean wait){DesiredCapabilities ieCapabilities =DesiredCapabilities.internetExplorer();// 启用 {@link #FORCE_CREATE_PROCESS} 时定义使用的 IE CLI 切换的功能
        ieCapabilities.setCapability(InternetExplorerDriver.IE_SWITCHES,"-private");// 定义在操作期间使用本机事件还是 JavaScript 事件的能力
        ieCapabilities.setCapability(InternetExplorerDriver.NATIVE_EVENTS,false);// 定义在 IEDriverServer 启动期间忽略非浏览器保护模式设置的能力
        ieCapabilities.setCapability(InternetExplorerDriver.INTRODUCE_FLAKINESS_BY_IGNORING_SECURITY_DOMAINS,true);// 定义在 IEDriverServer 启动 IE 之前清理或不清理浏览器缓存的能力
        ieCapabilities.setCapability(InternetExplorerDriver.IE_ENSURE_CLEAN_SESSION,true);// 启用此功能以默认接受所有 SSL(安全套接字协议) 证书
        ieCapabilities.setCapability(CapabilityType.ACCEPT_SSL_CERTS,true);
        ieCapabilities.setJavascriptEnabled(true);// 需要窗口焦点
        ieCapabilities.setCapability("requireWindowFocus",true);// 不启用持久悬停
        ieCapabilities.setCapability("enablePersistentHover",false);if(wait){// 页面加载策略
            ieCapabilities.setCapability("pageLoadStrategy","none");}WebDriver driver =newInternetExplorerDriver(ieCapabilities);return driver;}publicstaticWebDriverwebDriver(String name){String[] names = name.replace(",",",").split(",");if("ie".equals(names[0])){if(names.length >1){returninternetExplorerDriver("no".equals(names[1]));}else{returninternetExplorerDriver(false);}}elseif("chrom".equals(names[0])){returnchromDriver();}returnnull;}}

加载系统变量(浏览器等)

publicclassLoadSystemTools{privatestaticfinalString JACOB_DLL_PATH ="C:/Windows/System32/jacob-1.19-x64.dll";privatestaticfinalString SAVE_FILE_URL ="plug/SaveIEFile.exe";publicstaticLogger logger =LoggerFactory.getLogger(LoadSystemTools.class);/**
     * @description 获取当前工程的根路径
     * @createTime 2020年3月9日下午6:18:45
     * @version 1.0.0
     * @return
     */publicstaticStringgetRootPath(){String path ="";try{
            path =URLDecoder.decode(LoadSystemTools.class.getResource("/").getPath().replaceFirst("/",""),"utf-8");}catch(Exception e){thrownewRuntimeException("解析URL异常");}return path;}publicstaticvoidloadSystem(){// 用于加载系统中的变量System.setProperty(LibraryLoader.JACOB_DLL_PATH, JACOB_DLL_PATH);// 运行//        String iePath = getRootPath() + "drivers/IEDriverServer.exe";// 打包String iePath ="drivers/IEDriverServer.exe";System.setProperty("webdriver.ie.driver", iePath);// 运行//        String chromPath = getRootPath() + "drivers/chromedriver.exe";// 打包String chromPath ="drivers/chromedriver.exe";System.setProperty("webdriver.chrome.driver", chromPath);}publicstaticvoidloadSystemForTest(){System.setProperty(LibraryLoader.JACOB_DLL_PATH, JACOB_DLL_PATH);System.setProperty("webdriver.ie.driver","src/main/resources/drivers/IEDriverServer.exe");System.setProperty("webdriver.chrome.driver","src/main/resources/drivers/chromedriver.exe");}publicstaticStringgetSaveIEFileUrl(){returngetRootPath()+ SAVE_FILE_URL;}

操作类

publicclassOperateHandler{privatestaticfinalString ID ="id";privatestaticfinalString XPATH ="xpath";privatestaticfinalString click ="click";// 点击privatestaticfinalString sendKeys ="sendKeys";// 赋值privatestaticfinalString getText ="getText";// 获取值privatestaticfinalString getAttribute ="getAttribute";// 获取input值/**
     * 机器人无异常操作
     * @param    methodKey    键
     * @param    methodVal    值
     * @param    operation    操作
     * @param    param        操作参数
     * @param    errorRetryTime    错误重试次数
     * @return  void
     */publicstaticvoidrobotOperationNoException(String methodKey,String methodVal,String operation,String param,int errorRetryTime){try{robotOperation(methodKey, methodVal, operation, param, errorRetryTime);}catch(Exception e){LogManager.logError(null,"robotOperationNoException->异常");}}publicstaticStringrobotOperation(String methodKey,String methodVal,String operation,CharSequence param){returnrobotOperation(methodKey, methodVal, operation, param,3);}publicstaticStringrobotOperation(String methodKey,String methodVal,String operation,CharSequence param,int errorRetryTime){RpaContext cxt =RobotFactoring.getCxt();boolean isGoOn =true;int i =0;String res ="";while(isGoOn){try{switch(operation){case click:click(findElement(methodKey, methodVal));
                        isGoOn =false;break;case sendKeys:findElement(methodKey, methodVal).sendKeys(param);
                        isGoOn =false;break;case getText:
                        res =findElement(methodKey, methodVal).getText();
                        isGoOn =false;break;case getAttribute:
                        res =findElement(methodKey, methodVal).getAttribute("value");
                        isGoOn =false;break;default:
                        isGoOn =false;}ThreadTools.sleepMillis(50);}catch(Exception e){
                i++;ThreadTools.sleepMillis(300);if(i > errorRetryTime){LogManager.logError(null,"执行参数:operation="+ operation +",methodKey="+ methodKey
                            +",methodVal="+ methodVal +",param="+ param +", 执行次数="+(i-1));thrownewRuntimeException(e.getMessage());}}}return res;}publicstaticWebElementfindElement(String methodKey,String methodVal){RpaContext cxt =RobotFactoring.getCxt();WebElement ele = cxt.getDriver().findElement(locator(methodKey, methodVal));return ele;}/**
     * 点击事件
     * 如果超时忽略报错
     *
     * @param webElement
     */publicstaticvoidclick(WebElement webElement){try{
            webElement.click();}catch(TimeoutException e){
            e.printStackTrace();}catch(Exception e){throw e;}}/**
     * 查找元素集合
     * @param    methodKey
     * @param    methodVal
     * @return  List<WebElement>
     */publicstaticList<WebElement>findElements(String methodKey,String methodVal)throwsException{RpaContext cxt =RobotFactoring.getCxt();List<WebElement> elements =null;try{
            elements = cxt.getDriver().findElements(locator(methodKey, methodVal));}catch(Exception e){LogManager.logError(e,"查询多个元素异常,methodKey:"+ methodKey +", methodVal:"+ methodVal);}return elements;}publicstaticbooleanelementExist(String methodKey,String methodVal){try{RobotFactoring.getCxt().getDriver().findElement(locator(methodKey, methodVal));returntrue;}catch(Exception e){returnfalse;}}/**
     * 判断元素集合是否存在
     * @param    methodKey
     * @param    methodVal
     * @return  boolean
     */publicstaticbooleanelementsExist(String methodKey,String methodVal){try{List<WebElement> elements =RobotFactoring.getCxt().getDriver().findElements(locator(methodKey, methodVal));if(CollectionUtils.isEmpty(elements)){returnfalse;}returntrue;}catch(Exception e){returnfalse;}}privatestaticBylocator(String methodKey,String methodVal){if("id".equals(methodKey)){returnBy.id(methodVal);}elseif("name".equals(methodKey)){returnBy.name(methodVal);}elseif("className".equals(methodKey)){returnBy.className(methodVal);}elseif("tagName".equals(methodKey)){returnBy.tagName(methodVal);}elseif("linkText".equals(methodKey)){returnBy.linkText(methodVal);}elseif("partialLinkText".equals(methodKey)){returnBy.partialLinkText(methodVal);}elseif("xpath".equals(methodKey)){returnBy.xpath(methodVal);}elseif("css".equals(methodKey)){returnBy.cssSelector(methodVal);}else{returnnull;}}publicstaticvoidclose(WebDriver driver){
        driver.close();// cmd 关闭应用Runtime rt =Runtime.getRuntime();try{//            rt.exec("cmd.exe /C start /b taskkill /f /t /im iexplore.exe /im chrome.exe /im IEDriverServer.exe /im chromedriver.exe");
            rt.exec("cmd.exe /C start /b taskkill /f /t /im iexplore.exe  /im IEDriverServer.exe");Thread.sleep(3000L);}catch(Exception e){
            e.printStackTrace();}}}

参数初始化

publicclassRpaRobotConfig{publicstaticvoidinit(){LoadSystemTools.loadSystem();RpaContext cxt =newRpaContext();ExcelConfig config =newExcelConfig();RunConfig runConfig =newRunConfig();
        runConfig.setInvokeTime(newSimpleDateFormat("yyyy-MM-dd").format(newDate()));
        runConfig.setRunTaskName("RPA启动流程");try{InputStream is =FactoringRpaRobotConfig.class.getResourceAsStream("/config/config.json");BufferedReader br =newBufferedReader(newInputStreamReader(is,StandardCharsets.UTF_8));String s="";String configContentStr ="";try{while((s=br.readLine())!=null){
                configContentStr = configContentStr+s;}}catch(IOException e){

            e.printStackTrace();}JSONConfig jsonConfig =JSONObject.parseObject(configContentStr,JSONConfig.class);
            cxt.setConfig(jsonConfig);
            cxt.setRunConfig(runConfig);RobotFactoring.setCxt(cxt);}catch(Exception e){LogManager.logError(e,"rpa初始化异常");}}}

打开浏览器,登录

publicclassLogInOutHandler{/**
     * 开启浏览器,并开始登录
     * @param
     * @return  void
     * @author  yuanyaheng
     * @date    2022/4/21 9:59
     */publicstaticvoidopenBrowserStart(){RpaContext cxt =RobotFactoring.getCxt();try{quitDriver(cxt.getDriver());WebDriver driver =BrowserTools.internetExplorerDriver(false);RobotFactoring.getCxt().setDriver(driver);
            cxt.setDriver(driver);// 打开浏览器
            driver.manage().window().maximize();System.out.println("准备打开页面");// 访问页面
            driver.navigate().to(cxt.getConfig().getOpenUrl());}catch(Exception e){LogManager.logError(e,"openBrowserStart登录异常");}int loginTimes =1;// 首次登录,登录等待时间为4秒startLogin(cxt,loginTimes,RobotConstants.LOGIN_WAIT_TIME);}privatestaticvoidquitDriver(WebDriver driver){try{if(driver !=null){LogManager.logInfo("quitDriver->开启清理老版浏览器进程");
                driver.close();
                driver.quit();Runtime.getRuntime().exec("taskkill /F /IM IEDriverServer.exe");}}catch(Exception e){LogManager.logError(e,"quitDriver->浏览器关闭异常");}// 老版浏览器清理后,等待10秒再次开启新的旅程ThreadTools.sleep(10);}/**
     * 开始登录
     * @param    cxt
     * @param    loginTimes    登录次数
     * @param    loginWaitTime    点击登录等待时间
     * @return  void
     * @author  yuanyaheng
     * @date    2022/4/21 9:58
     */publicstaticvoidstartLogin(RpaContext cxt,int loginTimes,int loginWaitTime){ThreadTools.sleep(2);try{login(cxt,loginWaitTime);// 登录后检查登录状态if(checkLoginStatus(cxt)){// 登录成功打印日志LogManager.getInstance().loginSuccess();}else{// 登录失败则重新访问页面重新开始登录流程//                cxt.getDriver().get(cxt.getConfig().getOpenUrl());
                loginTimes++;// 登录失败等待时间+1秒
                loginWaitTime++;if(loginTimes >=5){// 关闭浏览器OperateHandler.close(cxt.getDriver());LogManager.logInfo("连续五次登录失败,准备重新打开浏览器,尝试登录");// 重新打开浏览器openBrowserStart();}else{LogManager.getInstance().loginFailure("登录失败,尝试重新登录,当前登录次数:"+ loginTimes);ThreadTools.sleep(2);// 重新开始登录流程 ,递归startLogin(cxt,loginTimes,loginWaitTime);}}}catch(Exception e){LogManager.logError(e,"startLogin登录异常");ThreadTools.sleep(2);}}/**
     * 检查登录状态
     * @param
     * @return  void
     * @author  yuanyaheng
     * @date    2022/4/1 16:17
     */publicstaticbooleancheckLoginStatus(RpaContext cxt){// 元素 首页 存在,则为trueif(clickFirstPageValidate()){returntrue;}try{// 若不存在,则重新访问页面
            cxt.getDriver().get(cxt.getConfig().getOpenUrl());}catch(Exception e){LogManager.logError(e,"重新访问页面失败"+ e.getMessage());// 访问页面发生异常,则关闭浏览器OperateHandler.close(cxt.getDriver());ThreadTools.sleep(2);// 重新打开浏览器FactoringLogInOutHandler.openBrowserStart();}ThreadTools.sleep(3);// 检测是否存在密码框if(OperateHandler.elementExist(RobotConstants.ID,"password")){LogManager.logInfo("checkLoginStatus->检测到登录密码框,需要重新登录");ThreadTools.sleep(2);returnfalse;}// 递归returncheckLoginStatus(cxt);}/**
     * 检查元素 首页 是否存在
     * @param
     * @return  boolean
     * @author  yuanyaheng
     * @date    2022/4/1 16:59
     */publicstaticbooleanclickFirstPageValidate(){try{// 首页按钮是否存在boolean elementExist =OperateHandler.elementExist(RobotConstants.XPATH,"//div[@class='header']/div[2]/ul/a[1]");if(elementExist){// 若存在则点击验证OperateHandler.robotOperation(RobotConstants.XPATH,"//div[@class='header']/div[2]/ul/a[1]",RobotConstants.click,null);returntrue;}}catch(Exception e){LogManager.logError(null,"建信保理首页无法点击");}returnfalse;}publicstaticvoidlogin(RpaContext cxt,int loginWaitTime){// 输入公司名OperateHandler.robotOperationNoException(RobotConstants.XPATH,"//input[@name='name']",RobotConstants.sendKeys,
            cxt.getConfig().getCompanyName(),1);ThreadTools.sleepMillis(100);// 输入手机号OperateHandler.robotOperationNoException(RobotConstants.XPATH,"//input[@name='mobile']",RobotConstants.sendKeys,
            cxt.getConfig().getPhoneNumber(),1);ThreadTools.sleepMillis(100);// 输入密码OperateHandler.robotOperationNoException(RobotConstants.ID,"password",RobotConstants.sendKeys,
            cxt.getConfig().getPassword(),1);ThreadTools.sleepMillis(100);// 点击登录OperateHandler.robotOperationNoException(RobotConstants.ID,"loginSubmit",RobotConstants.click,
            cxt.getConfig().getPassword(),1);ThreadTools.sleep(loginWaitTime);}/**
     * 登出
     * @param    cxt
     * @return  void
     * @author  yuanyaheng
     * @date    2022/4/6 16:45
     */publicstaticbooleanlogout(RpaContext cxt){try{// 判断 展示退出登录是否存在if(OperateHandler.elementExist(RobotConstants.XPATH,"//div[@class='logout ivu-dropdown']//div[@class='ivu-dropdown-rel']//i[@class='ivu-icon ivu-icon-ios-arrow-down']")){// 若存在,则将鼠标移动到该位置WebElement element = cxt.getDriver().findElement(By.xpath("//div[@class='logout ivu-dropdown']//div[@class='ivu-dropdown-rel']//i[@class='ivu-icon ivu-icon-ios-arrow-down']"));Actions action =newActions(cxt.getDriver());
                action.moveToElement(element);}else{returnfalse;}ThreadTools.sleep(1);// 判断退出登录按钮是否存在if(OperateHandler.elementExist(RobotConstants.XPATH,"//div[@class='ivu-select-dropdown logout-dropdown']//ul[@class='ivu-dropdown-menu']//li[@class='ivu-dropdown-item']")){// 若存在,使用js进行点击操作(因为该元素隐藏,driver.click()无法进行操作)WebElement logout = cxt.getDriver().findElement(By.xpath("//div[@class='ivu-select-dropdown logout-dropdown']//ul[@class='ivu-dropdown-menu']//li[@class='ivu-dropdown-item']"));JavascriptExecutor js =(JavascriptExecutor) cxt.getDriver();
                js.executeScript("arguments[0].click()", logout);ThreadTools.sleep(3);}else{returnfalse;}// 判断确认按钮是否存在if(OperateHandler.elementExist(RobotConstants.XPATH,"//div[@class='modal-footer']//button[@class='ivu-btn ivu-btn-primary']")){// 点击确认OperateHandler.click(cxt.getDriver().findElement(By.xpath("//div[@class='modal-footer']//button[@class='ivu-btn ivu-btn-primary']")));}else{returnfalse;}returntrue;}catch(Exception e){LogManager.logInfo("logout->退出登录失败");returnfalse;}}}

采集

​ 采集过程要注意的是,每次采集后,如果发生了跳转其他页面的情况,那么driver就会发生变化,当本页数据采集完,再返回之前页面,元素就会失效。获取当前页的数个融资编号,然后根据融资编号点击对应详情按钮,采集完毕返回后,需要重新获取融资编号。

​ 若需要持续性的采集,那么在代码中就需要对所有可能出现的异常进行处理,防止运行时因为异常导致中断。

packagecom.banksteel.finance.rpa.factoring;importcom.alibaba.fastjson.JSONObject;importcom.banksteel.finance.rpa.config.ExcelConfig;importcom.banksteel.finance.rpa.config.RobotFactoring;importcom.banksteel.finance.rpa.config.RpaContext;importcom.banksteel.finance.rpa.constant.RobotConstants;importcom.banksteel.finance.rpa.log.LogManager;importcom.banksteel.finance.rpa.tools.DateUtil;importcom.banksteel.finance.rpa.tools.HttpClientUtil;importcom.banksteel.finance.rpa.tools.StringTools;importcom.banksteel.finance.rpa.tools.ThreadTools;importcom.google.gson.JsonObject;importjava.math.BigDecimal;importjava.text.DecimalFormat;importjava.util.ArrayList;importjava.util.Date;importjava.util.HashMap;importjava.util.List;importjava.util.Map;importjava.util.logging.Logger;importorg.openqa.selenium.By;importorg.openqa.selenium.InvalidElementStateException;importorg.openqa.selenium.Keys;importorg.openqa.selenium.NoSuchElementException;importorg.openqa.selenium.StaleElementReferenceException;importorg.openqa.selenium.WebDriver;importorg.openqa.selenium.WebElement;importorg.springframework.util.StringUtils;/**
 * @version 1.0.0
 */publicclassFactoringCollectHandler{privatestaticExcelConfig excelConfig;privatestaticWebDriver driver;privatestaticfinalint RETRY_MAX_TIME =5;/**
     * 保理采集
     * @param    errorList    错误信息集合
     * @return  void
     */publicstaticvoidfactoringCollect(List<Map<String,String>> errorList){

        driver =RobotFactoring.getCxt().getDriver();boolean retryFlag =true;int retryTime =1;while(retryFlag){// 定义初始页数int page =1;try{// 已采集融信编号List<String> usedLetterNum =newArrayList<>();// 不采集融信编号List<String> notCollectList =newArrayList<>();// 获取不采集集合getNotCollectList(notCollectList);LogManager.logInfo("factoringCollect->不采集集合为:"+notCollectList);// 采集数据
                retryFlag =collectData(errorList, page, usedLetterNum, notCollectList);}catch(Exception e){LogManager.logError(e,"factoringCollect->采集异常,当前重试次数"+ retryTime);// 若重试次数大于等于3,则返回首页,或重新登录if(retryTime >=3){LogManager.logInfo("factoringCollect->当前重试次数大于等于3,尝试返回首页,或重新登录");boolean loginStatus =FactoringLogInOutHandler.checkLoginStatus(RobotFactoring.getCxt());if(!loginStatus){int loginTime =1;FactoringLogInOutHandler.startLogin(RobotFactoring.getCxt(), loginTime,RobotConstants.LOGIN_WAIT_TIME);
                        retryTime =1;}}

                retryTime++;if(retryTime >= RETRY_MAX_TIME){
                    retryFlag =false;LogManager.logInfo("factoringCollect->当前重试次数超出上线,停止重试");}}}}/**
     * 获取不采集集合
     * @param    notCollectList
     * @return  void
     */privatestaticvoidgetNotCollectList(List<String> notCollectList)throwsInterruptedException{// 获取 待签收 列表中融信编号String pendingReceipt ="//div[@class='main-tabs']/div[1]";getAccNumAllPage(pendingReceipt, driver, notCollectList);// 获取 转让 列表中融信编号String transfer ="//div[@class='main-tabs']/div[2]";getAccNumAllPage(transfer, driver, notCollectList);// 获取 待缴费 列表中融信编号String pendingPayment ="//div[@class='main-tabs']/div[3]";getAccNumAllPage(pendingPayment, driver, notCollectList);}/**
     * 采集数据
     * @param    errorList
     * @param    page
     * @param    usedLetterNum
     * @param    notCollectList
     * @return  void
     */privatestaticbooleancollectData(List<Map<String,String>> errorList,int page,List<String> usedLetterNum,List<String> notCollectList){while(true){// 点击 全部融信//                    click(driver.findElement(By.xpath("//div[@class='main-tabs']/div[4]")));try{OperateHandler.robotOperation(RobotConstants.XPATH,"//div[@class='main-tabs']/div[4]",RobotConstants.click,null);ThreadTools.sleepMillis(5000L);// 输入页码,输入失败,则停止循环if(!inputPage(driver, page)){// 返回true 继续触发重试returntrue;}// 获取票据信息 融信编号List<WebElement> spans = driver.findElements(By.xpath("//span[@class='margin-right-1']"));if(spans.size()==0){
                    page =1;System.out.println("跳出本次循环检测");LogManager.logInfo("collectData->当前页融信编号个数为0,跳出本次检测,当前页码为:"+ page);// 返回true 继续触发重试returntrue;}boolean pageFlagPlus =true;for(WebElement span : spans){String accNoText = span.getText();// 判断当前 融资编号是否需要采集if(!needCollect(usedLetterNum, notCollectList, accNoText)){continue;}

                    pageFlagPlus =false;

                    usedLetterNum.add(accNoText);// 找到当前融信编号父元素 /fl/list-header/table-listWebElement parent = span.findElement(By.xpath("./../../.."));// 点击 查看详情 按钮try{OperateHandler.click(parent.findElement(By.xpath("./div[@class='list-header']//button[@class='btn btn-middle current ivu-btn ivu-btn-default']")));}catch(NoSuchElementException e){if(e.getMessage().contains("Unable to find element")){// 建信 融资没有详情按钮跳过
                            usedLetterNum.remove(accNoText);
                            notCollectList.add(accNoText);LogManager.logInfo("collectData->未找到当前融信编号对应详情按钮,当前页码为:"+ page +",当前融信编号为:"+ accNoText);break;}else{throw e;}}ThreadTools.sleepMillis(3000L);JSONObject jsonObject =newJSONObject();// 采集详情页信息boolean breakFlag =collect(errorList, jsonObject, accNoText, usedLetterNum, notCollectList);// 若返回结果中break标志为false,则当前数据不需要采集,直接跳过if(!breakFlag){// 由于页面切换,使用continue会导致元素找不到,因此使用breakbreak;}

                    jsonObject.put("openDate",DateUtil.formatDate(newDate(),"yyyy-MM-dd"));BigDecimal sum =newBigDecimal(jsonObject.getString("serviceFee")).add(newBigDecimal(jsonObject.getString("financingFee")));DecimalFormat df =newDecimalFormat("0.00");
                    jsonObject.put("otherFees", df.format(sum));
                    jsonObject.put("infoResource",1);

                    jsonObject.remove("serviceFee");
                    jsonObject.remove("financingFee");// 接口调用保存值System.out.println(jsonObject);LogManager.logInfo("本次采集数据:",jsonObject);try{httpConnectionSave(jsonObject, errorList);}catch(Exception e){LogManager.logError(e,"保理信息保存失败");}// 点击关闭返回OperateHandler.click(driver.findElement(By.xpath("//button[@class='btn current ivu-btn ivu-btn-primary']")));break;}if(pageFlagPlus){
                    page++;}// 处理未知异常弹窗handleUnknownWindow();ThreadTools.sleepMillis(5000L);int pageMax;// 判断总条数是否存在if(OperateHandler.elementExist(RobotConstants.XPATH,"//ul[@class='page fr ivu-page']//span[@class='ivu-page-total']")){// 获取页面最大页数String totalMax = driver.findElement(By.xpath("//ul[@class='page fr ivu-page']//span[@class='ivu-page-total']")).getText().replace("共","").replace("条","");
                    pageMax =Integer.parseInt(totalMax.trim())/5;if(Integer.parseInt(totalMax.trim())%5!=0){
                        pageMax++;}}else{// 若发生意外情况,则使最大页等于当前页,保证继续执行
                    pageMax = page;}// 循环大于最大页数时循环跳出if(page > pageMax){// 返回false 不再重试returnfalse;}}catch(Exception e){// 采集过程发生异常,则返回true继续重试// 点击关闭返回try{if(OperateHandler.elementExist(RobotConstants.XPATH,"//button[@class='btn current ivu-btn ivu-btn-primary']")){OperateHandler.click(driver.findElement(By.xpath("//button[@class='btn current ivu-btn ivu-btn-primary']")));LogManager.logInfo("collectData->采集过程发生异常,点击关闭按钮返回重试");}}catch(Exception ex){LogManager.logInfo("collectData->采集过程发生异常,点击关闭按钮失败,返回重试");returntrue;}returntrue;}LogManager.logInfo("已采集集合:"+ usedLetterNum +" , page = "+ page);}}/**
     * 处理未知异常弹窗
     * @param
     * @return  void
     */privatestaticvoidhandleUnknownWindow(){//检查未知弹窗确认按钮是否存在boolean elementExist =OperateHandler.elementExist(RobotConstants.XPATH,"//div[@class='modal-footer']//button[@class='ivu-btn ivu-btn-primary']//span[text()='确认']");try{if(elementExist){OperateHandler.click(driver.findElement(By.xpath("//div[@class='modal-footer']//button[@class='ivu-btn ivu-btn-primary']//span[text()='确认']")));}}catch(Exception e){LogManager.logInfo("点击未知弹窗失败");}}/**
     * 采集详情页面数据
     * @param    errorList
     * @param    jsonObject
     * @param    accNoText
     * @param usedLetterNum
     * @param notCollectList
     * @return  void
     */privatestaticbooleancollect(List<Map<String,String>> errorList,JSONObject jsonObject,String accNoText,List<String> usedLetterNum,List<String> notCollectList){boolean continueFlag =true;String errorMsg ="";try{// 获取融信编号对应的数据String accNo =getEleTextByText(driver,"融信编号:",RobotConstants.SPAN,1);if(StringTools.isTrimEmpty(accNo)){
                errorMsg +="融信编号详情页为空,";}if(!accNo.equals(accNoText)){
                usedLetterNum.remove(accNoText);// 点击返回重试OperateHandler.click(driver.findElement(By.xpath("//button[@class='btn current ivu-btn ivu-btn-primary']")));returnfalse;}
            jsonObject.put("accNo", accNo);// 判断 预计 是否存在if(OperateHandler.elementExist(RobotConstants.XPATH,"//div[@class='mt20 bg-F9FAFB content-style ivu-row']//span[text()='预计']")){// 若 预计 存在,则说明当前数据不采集
                usedLetterNum.remove(accNoText);
                notCollectList.add(accNoText);// 点击返回重试OperateHandler.click(driver.findElement(By.xpath("//button[@class='btn current ivu-btn ivu-btn-primary']")));LogManager.logInfo("collect->当前融信详情未到达采集状态,当前融信编号为:"+accNoText);returnfalse;}String preReceiver =getEleTextByText(driver,"采购商名称:",RobotConstants.SPAN,1);if(StringTools.isTrimEmpty(preReceiver)){
                errorMsg +="采购商名称详情页为空,";}
            jsonObject.put("preReceiver", preReceiver);String factorFinancAmt =getEleTextByText(driver,"融资金额(元):",RobotConstants.SPAN,1).replace(",","");if(StringTools.isTrimEmpty(factorFinancAmt)){
                errorMsg +="融资金额详情页为空,";}
            jsonObject.put("factorFinancAmt", factorFinancAmt);String factorRate =getEleTextByText(driver,"融资利率:",RobotConstants.SPAN,1).replace("%(年化)","");if(StringTools.isTrimEmpty(factorRate)){
                errorMsg +="融资利率详情页为空,";}
            jsonObject.put("factorRate", factorRate);String accDate =getEleTextByText(driver,"承诺付款日期:",RobotConstants.SPAN,1);if(StringTools.isTrimEmpty(accDate)){
                errorMsg +="承诺付款日期详情页为空,";}
            jsonObject.put("accDate", accDate);String accountDate =getEleTextByText(driver,"放款日期:",RobotConstants.SPAN,1);if(StringTools.isTrimEmpty(accountDate)){
                errorMsg +="放款日期详情页为空,";}
            jsonObject.put("accountDate", accountDate);String financingCost =getEleTextByText(driver,"融资利息(元):",RobotConstants.SPAN,1).replace(",","");if(StringTools.isTrimEmpty(financingCost)){
                errorMsg +="融资利息(元)详情页为空,";}
            jsonObject.put("financingCost", financingCost);String financingFee =getEleTextByText(driver,"融资费用(元):",RobotConstants.SPAN,1).replace(",","");if(StringTools.isTrimEmpty(financingFee)){
                errorMsg +="融资费用详情页为空,";}
            jsonObject.put("financingFee", financingFee.replace(",",""));String serviceFee =getEleTextByText(driver,"服务费用(元):",RobotConstants.SPAN,1);if(StringTools.isTrimEmpty(serviceFee)){
                errorMsg +="服务费用详情页为空,";}
            jsonObject.put("serviceFee", serviceFee.replace(",",""));if(StringTools.isNotTrimEmpty(errorMsg)){// 含有错误信息储存信息
                errorMsg = errorMsg.substring(0, errorMsg.length()-1);Map<String,String> errorMap =newHashMap<>();
                errorMap.put(accNoText, accNoText +"保利信息"+ errorMsg);
                errorList.add(errorMap);}return continueFlag;}catch(Exception e){// 采集详情信息过程发生异常,则直接将false放入map返回returnfalse;}}/**
     * 获取文本对应元素文本
     * @param    driver    driver
     * @param    text 查询的文本
     * @param    neighborEleType    相邻元素类型
     * @param    neighborNum    相邻元素索引(1开始)
     * @return  String
     */privatestaticStringgetEleTextByText(WebDriver driver,String text,String neighborEleType,Integer neighborNum){// 拼接查询文本参数 //span[text()='融信编号:']/following-sibling::span[1]StringBuffer stringBuffer =newStringBuffer();
        stringBuffer.append("//span[text()='");
        stringBuffer.append(text);if(!StringUtils.isEmpty(neighborEleType)&& neighborNum !=null){
            stringBuffer.append("']/following-sibling::");
            stringBuffer.append(neighborEleType);
            stringBuffer.append("[");
            stringBuffer.append(neighborNum);
            stringBuffer.append("]");}String xpath = stringBuffer.toString();String result = driver.findElement(By.xpath(xpath)).getText();return result;}/**
     * 判断当前融资编号对应信息是否需要采集
     * @param    usedLetterNum
     * @param    notCollectList
     * @param    accNum
     * @return  boolean
     */privatestaticbooleanneedCollect(List<String> usedLetterNum,List<String> notCollectList,String accNum){// 不采集集合包含,则falseif(notCollectList.contains(accNum)){returnfalse;}// 保理采集原始表包含该账户则跳过// 调用接口判断是否储存if(httpConnectionSelect(accNum)){returnfalse;}// 已采集集合包含则falseif(usedLetterNum.contains(accNum)){returnfalse;}returntrue;}/**
     * 获取除 全部融信 之外三个状态 不需要采集的数据的融信编号
     * @param    clickStr
     * @param    driver
     * @param    notCollectList
     * @return  void

     */privatestaticvoidgetAccNumAllPage(String clickStr,WebDriver driver,List<String> notCollectList)throwsInterruptedException{int page =1;while(true){// 点击 待签收OperateHandler.robotOperation(RobotConstants.XPATH, clickStr,RobotConstants.click,null);ThreadTools.sleep(2);// 输入页码inputPage(driver, page);ThreadTools.sleep(2);// 查询当前页 融资编号List<WebElement> spans = driver.findElements(By.xpath("//span[@class='margin-right-1']"));if(spans.size()==0){
                page =1;System.out.println("跳出本次循环检测");break;}// 将融资编号添加到不采集集合for(WebElement span : spans){String accNoText = span.getText();
                notCollectList.add(accNoText);}

            page++;// 查询总条数String totalMax = driver.findElement(By.xpath("//ul[@class='page fr ivu-page']//span[@class='ivu-page-total']")).getText().replace("共","").replace("条","");int pageMax =Integer.parseInt(totalMax.trim())/5;if(Integer.parseInt(totalMax.trim())%5!=0){
                pageMax++;}// 当前页码大于总页数跳出循环if(page>pageMax){break;}}}/**
     * 输入页码,失败则返回false
     * @param    driver
     * @param    page
     * @return  boolean
     */privatestaticbooleaninputPage(WebDriver driver,int page){try{// 找到页码输入框,并清除当前输入框
            driver.findElement(By.xpath("//div[@class='ivu-page-options-elevator']//input")).clear();ThreadTools.sleepMillis(500L);// 输入当前页码
            driver.findElement(By.xpath("//div[@class='ivu-page-options-elevator']//input")).sendKeys(page +"");ThreadTools.sleepMillis(500L);// 键入 enter键
            driver.findElement(By.xpath("//div[@class='ivu-page-options-elevator']//input")).sendKeys(Keys.ENTER);ThreadTools.sleepMillis(500L);//                // 找到页码输入框,并清除当前输入框//                OperateHandler.findElement(RobotConstants.XPATH, "//div[@class='ivu-page-options-elevator']//input").clear();//                // 输入当前页码//                OperateHandler.robotOperation(RobotConstants.XPATH,"//div[@class='ivu-page-options-elevator']//input",RobotConstants.sendKeys,page+"");//                // 键入 enter键//                OperateHandler.robotOperation(RobotConstants.XPATH,"//div[@class='ivu-page-options-elevator']//input",RobotConstants.sendKeys,Keys.ENTER);System.out.println("------");ThreadTools.sleepMillis(5000L);}catch(InvalidElementStateException e){String errorMessage = e.getMessage();if(errorMessage.contains("Element is no longer valid")){// 没有查询到分页元素 不再循环returnfalse;}}returntrue;}/**
     * 唯一性校验接口
     *
     * @param letterNum
     * @return
     */privatestaticbooleanhttpConnectionSelect(String letterNum){JSONObject jsonObject =newJSONObject();
        jsonObject.put("accNo", letterNum);// https://mgt.banksteel.com/finance-funds/api/rpaFactoringSelect.htmJSONObject result =HttpClientUtil.httpPost(RobotFactoring.getCxt().getConfig().getCheckFactoringUrl(), jsonObject,false);if(result.getBoolean("status")){returntrue;}returnfalse;}/**
     * 发送http请求,保存数据
     * @param    jsonObject
     * @param    errorList
     * @return  boolean
     */privatestaticbooleanhttpConnectionSave(JSONObject jsonObject,List<Map<String,String>> errorList){//        "https://mgt.banksteel.com/finance-funds/api/getRpaFactoringCollect.htm"JSONObject result =HttpClientUtil.httpPost(RobotFactoring.getCxt().getConfig().getSaveFactoringUrl(), jsonObject,false);if(result.getBoolean("status")){returntrue;}Map<String,String> errorMap =newHashMap<>();
        errorMap.put(jsonObject.getString("accNo"), result.getString("msg"));
        errorList.add(errorMap);returnfalse;}/**
     * 系统睡眠
     * @param
     * @return  void
     */publicstaticvoidtoSleep(){RpaContext cxt =RobotFactoring.getCxt();// 登出,并返回结果boolean isLogout =FactoringLogInOutHandler.logout(cxt);LogManager.logInfo("rpa开始睡眠...");// 线程睡眠半个小时ThreadTools.sleep(30*60);if(isLogout){LogManager.logInfo("rpa睡眠结束...");// 重新访问网页
            driver.navigate().to(cxt.getConfig().getOpenUrl());// 登录int loginTime =1;FactoringLogInOutHandler.startLogin(cxt,loginTime,RobotConstants.LOGIN_WAIT_TIME);}else{// 若登出失败,则浏览器关闭,重新打开浏览器FactoringLogInOutHandler.openBrowserStart();}ThreadTools.sleep(6);}}

睡眠

睡眠方法在采集类的最下面,睡眠结束后需要检查当前系统状态,是否属于登录状态(检查首页按钮是否存在),若不是,则重新登录

使用总结

主要使用的内容为

WebDriver driver= BrowserTools.internetExplorerDriver(false);

driver.findElement
策略语法描述By iddriver.findElement(By.id())通过id属性定位元素By namedriver.findElement(By.name())通过name属性定位元素By class namedriver.findElement(By.className())通过class属性定位元素By tag namedriver.findElement(By.tagName())通过HTML标签名定位元素By link textdriver.findElement(By.linkText())通过链接内容定位元素By partial link textdriver.findElement(By.partialLinkText())通过部分链接内容定位元素By cssdriver.findElement(By.cssSelector())通过css选择器定位元素By xpathdriver.findElement(By.Xpath())通过xpath定位元素
主要记录一下xpath的语法
表达式描述nodename选取此节点的所有子节点。/从根节点选取(取子节点)。//从匹配选择的当前节点选择文档中的节点,而不考虑它们的位置(取子孙节点)。.选取当前节点。…选取当前节点的父节点。@选取属性。路径表达式结果bookstore选取 bookstore 元素的所有子节点。/bookstore选取根元素 bookstore。注释:假如路径起始于正斜杠( / ),则此路径始终代表到某元素的绝对路径!bookstore/book选取属于 bookstore 的子元素的所有 book 元素。//book选取所有 book 子元素,而不管它们在文档中的位置。bookstore//book选择属于 bookstore 元素的后代的所有 book 元素,而不管它们位于 bookstore 之下的什么位置。//@lang选取名为 lang 的所有属性。

谓语

路径表达式结果/bookstore/book[1]选取属于 bookstore 子元素的第一个 book 元素。/bookstore/book[last()]选取属于 bookstore 子元素的最后一个 book 元素。/bookstore/book[last()-1]选取属于 bookstore 子元素的倒数第二个 book 元素。/bookstore/book[position()❤️]选取最前面的两个属于 bookstore 元素的子元素的 book 元素。//title[@lang]选取所有拥有名为 lang 的属性的 title 元素。//title[@lang=‘eng’]选取所有 title 元素,且这些元素拥有值为 eng 的 lang 属性。/bookstore/book[price>35.00]选取 bookstore 元素的所有 book 元素,且其中的 price 元素的值须大于 35.00。/bookstore/book[price>35.00]//title选取 bookstore 元素中的 book 元素的所有 title 元素,且其中的 price 元素的值须大于 35.00。通配符描述*匹配任何元素节点。@*匹配任何属性节点。node()匹配任何类型的节点。路径表达式结果/bookstore/*选取 bookstore 元素的所有子元素。//选取文档中的所有元素。//title[@]选取所有带有属性的 title 元素。
文本选择,文本选择看似方便,其实有一定可能选取失败,具体原因不是很清楚。

//span[text()='预计']

使用 // 进行选取时,若选取不到,可将更多上级写出,以便选取,例如上方例子可写成

driver.findElement(
    By.xpath("//div[@class='mt20 bg-F9FAFB content-style ivu-row']//span[text()='预计']"));

记录(登出按钮是隐藏状态,鼠标悬停才会展示登出,如何登出)

当按钮是隐藏状态的时候,使用click无法对齐进行操作,需要使用JavascriptExecutor进行点击操作

// 获取该元素(需要悬停的元素)
WebElement element = driver.findElement(By.xpath());
// 移动鼠标到该元素
Actions action = new Actions(driver);
action.moveToElement(element);

// 获取登出元素
WebElement logout = driver.findElement(By.xpath());
// 使用JavascriptExecutor执行点击
JavascriptExecutor js = (JavascriptExecutor) driver;
js.executeScript("arguments[0].click()", logout);
标签: java selenium

本文转载自: https://blog.csdn.net/yyh695809001/article/details/124349633
版权归原作者 java小恒 所有, 如有侵权,请联系我们删除。

“Java使用selenium实现RPA采集机器人”的评论:

还没有评论