liuyuqi-dellpc 2 years ago
commit
4150a0758b

+ 26 - 0
.classpath

@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+	<classpathentry kind="src" output="target/classes" path="src/main/java">
+		<attributes>
+			<attribute name="optional" value="true"/>
+			<attribute name="maven.pomderived" value="true"/>
+		</attributes>
+	</classpathentry>
+	<classpathentry kind="src" output="target/test-classes" path="src/test/java">
+		<attributes>
+			<attribute name="optional" value="true"/>
+			<attribute name="maven.pomderived" value="true"/>
+		</attributes>
+	</classpathentry>
+	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8">
+		<attributes>
+			<attribute name="maven.pomderived" value="true"/>
+		</attributes>
+	</classpathentry>
+	<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
+		<attributes>
+			<attribute name="maven.pomderived" value="true"/>
+		</attributes>
+	</classpathentry>
+	<classpathentry kind="output" path="target/classes"/>
+</classpath>

+ 2 - 0
.gitignore

@@ -0,0 +1,2 @@
+/.settings
+/target/

+ 23 - 0
.project

@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>crawlbid</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.eclipse.jdt.core.javabuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+		<buildCommand>
+			<name>org.eclipse.m2e.core.maven2Builder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.eclipse.jdt.core.javanature</nature>
+		<nature>org.eclipse.m2e.core.maven2Nature</nature>
+	</natures>
+</projectDescription>

+ 5 - 0
README.md

@@ -0,0 +1,5 @@
+## crawl
+
+招投标爬虫
+
+

+ 3 - 0
config.properties

@@ -0,0 +1,3 @@
+App.chromeDriverPath=D:\\Program-Files\\browser-driver\\chromedriver.exe
+App.patten=.*/api/ctpsp-public/user-certificate/endpoint/publicity-pager.*
+App.startUrl=http://tpp.ctba.org.cn/cmsNavDetail/open/certificate

+ 31 - 0
pom.xml

@@ -0,0 +1,31 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+	<modelVersion>4.0.0</modelVersion>
+
+	<groupId>me.yoqi</groupId>
+	<artifactId>crawlbid</artifactId>
+	<version>1.0.1-SNAPSHOT</version>
+	<packaging>jar</packaging>
+
+	<name>crawlbid</name>
+	<url>http://blog.yoqi.me</url>
+
+	<properties>
+		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+	</properties>
+
+	<dependencies>
+		<dependency>
+			<groupId>org.seleniumhq.selenium</groupId>
+			<artifactId>selenium-java</artifactId>
+			<version>4.0.0-alpha-6</version>
+		</dependency>
+
+		<dependency>
+			<groupId>junit</groupId>
+			<artifactId>junit</artifactId>
+			<version>3.8.1</version>
+			<scope>test</scope>
+		</dependency>
+	</dependencies>
+</project>

+ 124 - 0
src/main/java/me/yoqi/crawlbid/App.java

@@ -0,0 +1,124 @@
+package me.yoqi.crawlbid;
+
+import java.awt.Image;
+import java.util.function.Consumer;
+import java.util.regex.Pattern;
+
+import org.openqa.selenium.OutputType;
+import org.openqa.selenium.chrome.ChromeDriver;
+import org.openqa.selenium.chrome.ChromeOptions;
+import org.openqa.selenium.devtools.DevTools;
+import org.openqa.selenium.devtools.network.Network;
+import org.openqa.selenium.devtools.network.model.ResourceType;
+import org.openqa.selenium.devtools.network.model.ResponseReceived;
+
+import me.yoqi.crawlbid.model.Config;
+
+/**
+ * Hello world!
+ *
+ */
+public class App {
+	static DevTools devTools;
+	static ChromeDriver driver;
+
+	public static void main(String[] args) {
+
+		String chromeDriverPath = Config.getString("App.chromeDriverPath");
+		System.setProperty("webdriver.chrome.driver", chromeDriverPath);
+		ChromeOptions options = new ChromeOptions();
+		options.addArguments("lang=zh_CN.UTF-8",
+				"user-agent=\"Mozilla/5.0 (iPod; U; CPU iPhone OS 2_1 like Mac OS X; ja-jp) AppleWebKit/525.18.1 (KHTML, like Gecko) Version/3.1.1 Mobile/5F137 Safari/525.20\"",
+				"cookies=");
+		// options.setProxy(proxy);
+
+		driver = new ChromeDriver(options);
+
+		// 打开devtools
+		devTools = driver.getDevTools();
+		devTools.createSession();
+		devTools.send(
+				Network.enable(java.util.Optional.empty(), java.util.Optional.empty(), java.util.Optional.empty()));
+
+		String startUrl = Config.getString("App.startUrl");
+		driver.get(startUrl);
+
+		String pattern = Config.getString("App.patten");
+		final StringBuilder sb = new StringBuilder();
+		InterceptCallback<String> callback = new InterceptCallback<String>() {
+			public void emit(String res) {
+				sb.append(res);
+			}
+		};
+		interceptResponse4XHR(pattern, callback);
+		// 点击每页100条
+		driver.findElementByCssSelector("#app div.el-input.el-input--mini.el-input--suffix").click();
+		try {
+			Thread.sleep(500);
+		} catch (InterruptedException e1) {
+			// TODO Auto-generated catch block
+			e1.printStackTrace();
+		}
+		driver.findElementByXPath("/html/body/div[2]/div[1]/div[1]/ul/li[6]").click();
+		System.out.println("res:" + sb);
+
+		for (int i = 2; i < 4; i++) {
+			// 点击下一页
+			driver.findElementByCssSelector("#app button.btn-next").click();
+			final StringBuilder sb1 = new StringBuilder();
+			InterceptCallback<String> callback1 = new InterceptCallback<String>() {
+				public void emit(String res) {
+					sb1.append(res);
+				}
+			};
+
+			try {
+				Thread.sleep(1000);
+			} catch (InterruptedException e) {
+				e.printStackTrace();
+			}
+			devTools.clearListeners();
+			interceptResponse4XHR(pattern, callback1);
+			// driver.close(); //关闭当前tab标签
+			// devTools.close();
+			System.out.println("res:" + sb1);
+		}
+		// driver.quit(); // 关闭浏览器
+	}
+
+	public static void interceptResponse4XHR(String pattern, InterceptCallback<String> callback) {
+		// 请求监听
+		devTools.addListener(Network.responseReceived(), new Consumer<ResponseReceived>() {
+			public void accept(ResponseReceived responseReceived) {
+				if (ResourceType.XHR != responseReceived.getType()) {
+					return;
+				}
+				// 获取url,判断获取指定的ajax 接口请求
+				boolean isMatch = Pattern.matches(pattern, responseReceived.getResponse().getUrl());
+				if (isMatch) {
+					String body = devTools.send(Network.getResponseBody(responseReceived.getRequestId())).getBody();
+					callback.emit(body);
+				}
+			}
+		});
+	}
+
+	public void screenShot() {
+		// 截屏
+		OutputType<Image> outputType = new OutputType<Image>() {
+
+			@Override
+			public Image convertFromBase64Png(String base64Png) {
+				// TODO Auto-generated method stub
+				return null;
+			}
+
+			@Override
+			public Image convertFromPngBytes(byte[] png) {
+				// TODO Auto-generated method stub
+				return null;
+			}
+		};
+		driver.getScreenshotAs(outputType);
+	}
+}

+ 12 - 0
src/main/java/me/yoqi/crawlbid/InterceptCallback.java

@@ -0,0 +1,12 @@
+package me.yoqi.crawlbid;
+
+/**
+ * 回调接口
+ * @author liuyuqi
+ *
+ * @param <String>
+ */
+@FunctionalInterface
+public interface InterceptCallback<String> {
+	void emit(java.lang.String res);
+}

+ 33 - 0
src/main/java/me/yoqi/crawlbid/model/Config.java

@@ -0,0 +1,33 @@
+package me.yoqi.crawlbid.model;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.Properties;
+
+/**
+* @Description: 配置
+* @Date: 2022年04月23日 Saturday
+* @Author liuyuqi.gov@msn.
+*/
+public class Config {
+
+	private static final String path = "config.properties";
+	private static Properties prop = new Properties();
+
+	private static void init() {
+		try {
+			prop.load(new FileInputStream(path));
+		} catch (IOException e) {
+			System.out.println("加载配置错误" + e.getMessage());
+		}
+	}
+
+	public static String getString(String key) {
+		init();
+		if (prop != null) {
+			return prop.getProperty(key);
+		} else {
+			return "";
+		}
+	}
+}

+ 38 - 0
src/test/java/me/yoqi/crawlbid/AppTest.java

@@ -0,0 +1,38 @@
+package me.yoqi.crawlbid;
+
+import junit.framework.Test;
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+
+/**
+ * Unit test for simple App.
+ */
+public class AppTest 
+    extends TestCase
+{
+    /**
+     * Create the test case
+     *
+     * @param testName name of the test case
+     */
+    public AppTest( String testName )
+    {
+        super( testName );
+    }
+
+    /**
+     * @return the suite of tests being tested
+     */
+    public static Test suite()
+    {
+        return new TestSuite( AppTest.class );
+    }
+
+    /**
+     * Rigourous Test :-)
+     */
+    public void testApp()
+    {
+        assertTrue( true );
+    }
+}

+ 12 - 0
src/test/java/me/yoqi/crawlbid/TString.java

@@ -0,0 +1,12 @@
+package me.yoqi.crawlbid;
+
+import java.util.regex.Pattern;
+
+public class TString {
+
+	public static void main(String[] args) {
+		String pattern = ".*/api/ctpsp-public/user-certificate/endpoint/publicity-pager.*";
+		String urlString = "http://tpp.ctba.org.cn/api/ctpsp-public/user-certificate/endpoint/publicity-pager?current=3&size=10&level=0&status=3&keyword=";
+		System.out.println(Pattern.matches(pattern, urlString));
+	}
+}