0


Java HttpClient爬虫请求

**本项目采用spring-boot构建, maven工程

添加依赖

pom文件

<projectxmlns="http://maven.apache.org/POM/4.0.0"xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"><modelVersion>4.0.0</modelVersion><groupId>org.test</groupId><artifactId>testDome</artifactId><version>0.0.1-SNAPSHOT</version><dependencies><dependency><groupId>org.apache.httpcomponents</groupId><artifactId>httpclient</artifactId><version>4.5.5</version></dependency><dependency><groupId>com.alibaba</groupId><artifactId>fastjson</artifactId><version>1.2.47</version></dependency></dependencies></project>

GET 无参形式

packagetestDemo;importorg.apache.http.Header;importorg.apache.http.HttpHost;importorg.apache.http.client.config.RequestConfig;importorg.apache.http.client.methods.CloseableHttpResponse;importorg.apache.http.client.methods.HttpGet;importorg.apache.http.impl.client.CloseableHttpClient;importorg.apache.http.impl.client.HttpClients;importorg.apache.http.util.EntityUtils;publicclassDoGET{publicstaticvoidmain(String[] args)throwsException{//        RequestConfig config = RequestConfig.custom().setRedirectsEnabled(false).build();//不允许重定向//        CloseableHttpClient httpClient = HttpClients.custom().setDefaultRequestConfig(config).build();//        proxyHost -- 代理ip; proxyPort -- 端口号//        int proxyPort = 8000;//        String proxyHost = "192.10.2.125";//        HttpHost proxy = new HttpHost(proxyHost, proxyPort, "HTTP");//创建Httpclient对象CloseableHttpClient httpclient =HttpClients.createDefault();//get请求(忽略SSL证书),获取结果// TODO: 2020/4/27  忽略SSL证书//创建http GET请求HttpGet get =newHttpGet("http://www.baidu.com");//        CloseableHttpResponse response = httpclient.execute(proxy, get);CloseableHttpResponse response = httpclient.execute(get);try{// 执行请求
            response = httpclient.execute(get);// 判断返回状态是否为200if(response.getStatusLine().getStatusCode()==200){//请求体内容String content =EntityUtils.toString(response.getEntity(),"UTF-8");//内容System.out.println("<<"+ content +">>");System.out.println("内容长度:"+ content.length());//                Header[] cookie =  response.getHeaders("Set-Cookie");}}finally{if(response !=null){
                response.close();}//相当于关闭浏览器
            httpclient.close();}}}

GET带参请求

packagetestDemo;importjava.io.File;importjava.net.URI;importorg.apache.http.client.methods.CloseableHttpResponse;importorg.apache.http.client.methods.HttpGet;importorg.apache.http.client.utils.URIBuilder;importorg.apache.http.impl.client.CloseableHttpClient;importorg.apache.http.impl.client.HttpClients;importorg.apache.http.util.EntityUtils;/**
 * 带参数的GET请求
 * 两种方式:
 * 1.直接将参数拼接到url后面 如:?wd=java
 * 2.使用URI的方法设置参数 setParameter("wd", "java")
 */publicclassDoGETParam{publicstaticvoidmain(String[] args)throwsException{// 创建Httpclient对象CloseableHttpClient httpclient =HttpClients.createDefault();// 定义请求的参数URI uri =newURIBuilder("http://www.baidu.com/s").setParameter("wd","java").build();// 创建http GET请求HttpGet httpGet =newHttpGet(uri);//response 对象CloseableHttpResponse response =null;try{// 执行http get请求
            response = httpclient.execute(httpGet);// 判断返回状态是否为200if(response.getStatusLine().getStatusCode()==200){String content =EntityUtils.toString(response.getEntity(),"UTF-8");//内容System.out.println("内容长度:"+ content.length());System.out.println("内容<<:"+ content);}}finally{if(response !=null){
                response.close();}
            httpclient.close();}}}

POST无参请求

packagetestDemo;importorg.apache.http.client.methods.CloseableHttpResponse;importorg.apache.http.client.methods.HttpPost;importorg.apache.http.impl.client.CloseableHttpClient;importorg.apache.http.impl.client.HttpClients;importorg.apache.http.util.EntityUtils;publicclassDoPOST{publicstaticvoidmain(String[] args)throwsException{// 创建Httpclient对象CloseableHttpClient httpclient =HttpClients.createDefault();// 创建http POST请求HttpPost httpPost =newHttpPost("http://www.oschina.net/");//伪装浏览器请求
        httpPost.setHeader("User-Agent","Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36");CloseableHttpResponse response =null;try{// 执行请求
            response = httpclient.execute(httpPost);// 判断返回状态是否为200if(response.getStatusLine().getStatusCode()==200){String content =EntityUtils.toString(response.getEntity(),"UTF-8");//内容写入文件System.out.println(">>"+ content);System.out.println("内容长度:"+ content.length());}else{System.out.println(response.getStatusLine().getStatusCode());String content =EntityUtils.toString(response.getEntity(),"UTF-8");System.out.println(">>"+ content);}}finally{if(response !=null){
                response.close();}
            httpclient.close();}}}

POST带参请求

packagetestDemo;importjava.io.File;importjava.util.ArrayList;importjava.util.List;importorg.apache.http.NameValuePair;importorg.apache.http.client.entity.UrlEncodedFormEntity;importorg.apache.http.client.methods.CloseableHttpResponse;importorg.apache.http.client.methods.HttpPost;importorg.apache.http.impl.client.CloseableHttpClient;importorg.apache.http.impl.client.HttpClients;importorg.apache.http.message.BasicNameValuePair;importorg.apache.http.util.EntityUtils;/**
 * 带有参数的Post请求
 * Mengtao
 */publicclassDoPOSTParam{publicstaticvoidmain(String[] args)throwsException{// 创建Httpclient对象CloseableHttpClient httpclient =HttpClients.createDefault();// 创建http POST请求HttpPost httpPost =newHttpPost("http://www.bcia.com.cn/bcia/FAQ/search");// 设置2个post参数List<NameValuePair> parameters =newArrayList<NameValuePair>(0);
        parameters.add(newBasicNameValuePair("lang","cn"));
        parameters.add(newBasicNameValuePair("pageNum","1"));// 构造一个form表单式的实体UrlEncodedFormEntity formEntity =newUrlEncodedFormEntity(parameters);// 将请求实体设置到httpPost对象中
        httpPost.setEntity(formEntity);//伪装浏览器
        httpPost.setHeader("Referer","http://www.bcia.com.cn/cjwt.html");
        httpPost.setHeader("Host","www.bcia.com.cn");
        httpPost.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36");CloseableHttpResponse response =null;try{// 执行请求
            response = httpclient.execute(httpPost);// 判断返回状态是否为200if(response.getStatusLine().getStatusCode()==200){String content =EntityUtils.toString(response.getEntity(),"UTF-8");System.out.println("内容"+ content);System.out.println("内容长度:"+ content.length());}elseSystem.out.println("内容111"+ response.getStatusLine().getStatusCode());}finally{if(response !=null)
                response.close();}
        httpclient.close();}}

本文转载自: https://blog.csdn.net/qq_41369057/article/details/131222505
版权归原作者 Codeoooo 所有, 如有侵权,请联系我们删除。

“Java HttpClient爬虫请求”的评论:

还没有评论