查询企业信息范例Coding

时间: 2024-11-10 admin IT培训

查询企业信息范例Coding

查询企业信息范例Coding

一.HttpClient工具类

package com.hanzhigu.utils;import com.alibaba.fastjson.JSON;
import com.hanzhigu.smartframe.utils.ValueUtils;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.conn.ssl.NoopHostnameVerifier;
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
import org.apache.http.conn.ssl.TrustSelfSignedStrategy;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.ssl.SSLContexts;
import org.apache.http.util.EntityUtils;import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;/*** HttpClient工具类* @author Obito* @version 2022-09-19*/
@Slf4j
public class HttpClientUtils {@Data@Builder@NoArgsConstructor@AllArgsConstructorstatic class HttpClientConfig{private String url;private String uri;private Object param;}public static final String HTTP_GET="GET";public static final String HTTP_POST="POST";/*** 创建httpPost请求* @param config* @return*/private static HttpPost crateHttpPost(HttpClientConfig config) {// 创建Post请求HttpPost httpPost;String url = config.getUrl()+config.getUri();httpPost = new HttpPost(url);String jsonString = JSON.toJSONString(config.getParam());//{user[]}StringEntity entity = new StringEntity(jsonString, "UTF-8");httpPost.setEntity(entity);// 设置ContentType(注:如果只是传普通参数的话,ContentType不一定非要用application/json)httpPost.setHeader("Content-Type", "application/json;charset=utf8");httpPost.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3");httpPost.setHeader("Accept-Language", "zh-CN,zh;q=0.9");httpPost.setHeader("Connection", "keep-alive");httpPost.setHeader("Cookie","jsid=SEO-BAIDU-ALL-SY-000001; TYCID=82cfdd80b69011ed9869e902bd16ded9; ssuid=1672639749; _ga=GA1.2.756005888.1677757778; tyc-user-info=%7B%22state%22%3A%220%22%2C%22vipManager%22%3A%220%22%2C%22mobile%22%3A%2213975099232%22%7D; tyc-user-info-save-time=1677761899034; auth_token=eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxMzk3NTA5OTIzMiIsImlhdCI6MTY3Nzc2MTg5OCwiZXhwIjoxNjgwMzUzODk4fQ.IDdFfrxNArt1Zb0MDc37SskoP50KSQ6fRia-1QitSrYmtbBeFgOV_nR6DXf_AUFqDbtMB0GhEwJDY2_BPcq9-w; HWWAFSESID=f14fc1021cfb6dbb7b3a; HWWAFSESTIME=1678437808496; csrfToken=Cch1T41amdgu8er2_cIH1bw1; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%22289172060%22%2C%22first_id%22%3A%221869298c4d74dc-0aba302e12ae998-16525635-1296000-1869298c4d89be%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E8%87%AA%E7%84%B6%E6%90%9C%E7%B4%A2%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC%22%2C%22%24latest_referrer%22%3A%22https%3A%2F%2Fwww.baidu%2Flink%22%7D%2C%22identities%22%3A%22eyIkaWRlbnRpdHlfY29va2llX2lkIjoiMTg2OTI5OGM0ZDc0ZGMtMGFiYTMwMmUxMmFlOTk4LTE2NTI1NjM1LTEyOTYwMDAtMTg2OTI5OGM0ZDg5YmUiLCIkaWRlbnRpdHlfbG9naW5faWQiOiIyODkxNzIwNjAifQ%3D%3D%22%2C%22history_login_id%22%3A%7B%22name%22%3A%22%24identity_login_id%22%2C%22value%22%3A%22289172060%22%7D%2C%22%24device_id%22%3A%221869298c4d74dc-0aba302e12ae998-16525635-1296000-1869298c4d89be%22%7D; bannerFlag=true; Hm_lvt_e92c8d65d92d534b0fc290df538b4758=1677496731,1677756314,1678437815; bdHomeCount=3; cloud_token=69af3ada4a434957a1497d99ea558704; searchSessionId=1679556189.09849499; Hm_lpvt_e92c8d65d92d534b0fc290df538b4758=1679556191");return httpPost;}/*** 创建httpPost请求* @param config* @return*/private static HttpGet crateHttpGet(HttpClientConfig config) {if (config.getParam()!=null){Map<String,Object> params = (Map<String, Object>) config.getParam();StringBuilder url = new StringBuilder(config.getUrl());List<String> keys = params.keySet().stream().collect(Collectors.toList());for (int i = 0; i < keys.size(); i++) {url.append(i==0?"?":"&").append(keys.get(i)).append("=").append(params.get(keys.get(i)));}config.setUrl(url.toString());}HttpGet httpGet = new HttpGet(config.getUrl());// 设置类型 "application/x-www-form-urlencoded" "application/json"httpGet.setHeader("Content-Type", "application/x-www-form-urlencoded");httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3");httpGet.setHeader("Accept-Language", "zh-CN,zh;q=0.9");httpGet.setHeader("Connection", "keep-alive");httpGet.setHeader("Cookie","jsid=SEO-BAIDU-ALL-SY-000001; TYCID=82cfdd80b69011ed9869e902bd16ded9; ssuid=1672639749; _ga=GA1.2.756005888.1677757778; tyc-user-info=%7B%22state%22%3A%220%22%2C%22vipManager%22%3A%220%22%2C%22mobile%22%3A%2213975099232%22%7D; tyc-user-info-save-time=1677761899034; auth_token=eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxMzk3NTA5OTIzMiIsImlhdCI6MTY3Nzc2MTg5OCwiZXhwIjoxNjgwMzUzODk4fQ.IDdFfrxNArt1Zb0MDc37SskoP50KSQ6fRia-1QitSrYmtbBeFgOV_nR6DXf_AUFqDbtMB0GhEwJDY2_BPcq9-w; HWWAFSESID=f14fc1021cfb6dbb7b3a; HWWAFSESTIME=1678437808496; csrfToken=Cch1T41amdgu8er2_cIH1bw1; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%22289172060%22%2C%22first_id%22%3A%221869298c4d74dc-0aba302e12ae998-16525635-1296000-1869298c4d89be%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E8%87%AA%E7%84%B6%E6%90%9C%E7%B4%A2%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC%22%2C%22%24latest_referrer%22%3A%22https%3A%2F%2Fwww.baidu%2Flink%22%7D%2C%22identities%22%3A%22eyIkaWRlbnRpdHlfY29va2llX2lkIjoiMTg2OTI5OGM0ZDc0ZGMtMGFiYTMwMmUxMmFlOTk4LTE2NTI1NjM1LTEyOTYwMDAtMTg2OTI5OGM0ZDg5YmUiLCIkaWRlbnRpdHlfbG9naW5faWQiOiIyODkxNzIwNjAifQ%3D%3D%22%2C%22history_login_id%22%3A%7B%22name%22%3A%22%24identity_login_id%22%2C%22value%22%3A%22289172060%22%7D%2C%22%24device_id%22%3A%221869298c4d74dc-0aba302e12ae998-16525635-1296000-1869298c4d89be%22%7D; bannerFlag=true; Hm_lvt_e92c8d65d92d534b0fc290df538b4758=1677496731,1677756314,1678437815; bdHomeCount=3; cloud_token=69af3ada4a434957a1497d99ea558704; searchSessionId=1679556189.09849499; Hm_lpvt_e92c8d65d92d534b0fc290df538b4758=1679556191");// 创建Post请求return httpGet;}/*** 发送httpPost请求* @param config* @return*/public static String sendHttpClient(HttpClientConfig config,String requestType)  {CloseableHttpClient httpClient = HttpClientBuilder.create().build();// 由客户端执行(发送)Post请求CloseableHttpResponse response = null;String result = null;try {response = httpClient.execute(HTTP_POST.equals(requestType)?crateHttpPost(config):crateHttpGet(config));// 从响应模型中获取响应实体HttpEntity responseEntity = response.getEntity();if (responseEntity != null) {result = EntityUtils.toString(responseEntity);}} catch (IOException e) {log.error("[{}]系统异常信息:", config.getUrl(), e);e.printStackTrace();}finally {log.info("sendHttpClient, config:{},size:{}",config, ValueUtils.isNotEmpty(result) ? result.length() : 0);try {httpClient.close();return result;} catch (IOException e) {log.debug("释放资源:", e);}}return result;}public static CloseableHttpClient get(boolean httpsFlag) throws Exception{CloseableHttpClient httpClient;if(httpsFlag) {SSLConnectionSocketFactory scsf = new SSLConnectionSocketFactory(SSLContexts.custom().loadTrustMaterial(null, new TrustSelfSignedStrategy()).build(),NoopHostnameVerifier.INSTANCE);httpClient = HttpClients.custom().setSSLSocketFactory(scsf).build();}else {httpClient = HttpClientBuilder.create().build();}return httpClient;}public static void close(CloseableHttpClient httpClient){try {if (httpClient != null) {httpClient.close();}} catch (IOException e) {log.debug("释放资源:", e);}}
}

二、查询工具类

package com.hanzhigu.utils;import org.apachemons.lang3.StringUtils;
import org.json.JSONArray;
import org.json.JSONObject;import java.io.UnsupportedEncodingException;
import java.URLEncoder;
import java.util.*;/*** 爬取天眼查公司信息的工具类** @author Obito* @date 2023-03-14*/
public class CrawlerCompanyUtil {/** 根据关键字查询企业URL*/private static final String SEARCH_URL = "=";/** 根据企业主键查询企业详情信息URL*/private static final String COMPANY_DETAILS_URL = "/";/** 根据企业名称模糊查询URL*/private static final String SEARCH_LIKE_URL = "=";public static JSONObject send(String url,String param) throws UnsupportedEncodingException {String sendUrl = url + URLEncoder.encode(param, "UTF-8");sendUrl = url.equals(SEARCH_URL) ? sendUrl+"&sessionNo=1677210245.06585246" : sendUrl;JSONObject ans = new JSONObject(StringUtils.substringBetween(HttpClientUtils.sendHttpClient(HttpClientUtils.HttpClientConfig.builder().url(sendUrl).build(),HttpClientUtils.HTTP_GET),"<script id=\"__NEXT_DATA__\" type=\"application/json\">", "</script></body></html>")).getJSONObject("props").getJSONObject("pageProps").getJSONObject("dehydratedState").getJSONArray("queries").getJSONObject(0).getJSONObject("state").getJSONObject("data").getJSONObject("data");return ans;}/*** 爬取天眼查公司信息** @param entName 企业名称* @throws Exception*/public static JSONObject getEndData(String entName) throws Exception {JSONObject data = send(SEARCH_URL,entName);JSONArray companyList = data.getJSONArray("companyList");if(companyList!=null){JSONObject company = companyList.getJSONObject(0);Object companyId = company.get("id");Object estiblishTime = company.get("estiblishTime");if (companyId!=null && StringUtils.isNotBlank(companyId.toString())){JSONObject detailData = send(COMPANY_DETAILS_URL,companyId.toString());if (estiblishTime!=null && StringUtils.isNotBlank(estiblishTime.toString())){detailData.put("estiblishTime",estiblishTime.toString().split(" ")[0]);}return detailData;}}return null;}/*** 根据关键字查天眼查公司信息列表** @param keyword 企业名称* @throws Exception*/public static List<String> getEntNames(String keyword){Map<String,String> param = new HashMap<>();param.put("keyword",keyword);String ans = HttpClientUtils.sendHttpClient(HttpClientUtils.HttpClientConfig.builder().url(SEARCH_LIKE_URL+new Date().getTime()).param(param).build(), HttpClientUtils.HTTP_POST);JSONObject ansObj = new JSONObject(ans);List<String> ansList = new ArrayList<>();Optional.ofNullable(ansObj.getJSONArray("data")).ifPresent(arr->arr.forEach(obj->Optional.ofNullable(((JSONObject) obj).get("name")).ifPresent(name->ansList.add(name.toString()))));return ansList;}
}

就是这么一回事