Browse Source

完事,接口限制。多次执行会出错。

liuyuqi-dellpc 4 years ago
parent
commit
21b2edaa1b

BIN
screenshot/BaiduHi_2020-2-18_11-13-33.png


BIN
screenshot/BaiduHi_2020-2-18_11-14-28.png


BIN
screenshot/BaiduHi_2020-2-18_11-16-10.png


+ 12 - 10
tools-ocr/src/main/java/com/luooqi/ocr/MainFm.java

@@ -17,27 +17,27 @@ import java.util.Collection;
  * @author liuyuqi
  */
 public class MainFm {
-   private static String outputDirPath;
+    private static String outputDirPath;
+
     public static void main(String[] args) {
         String srcDirPath = "D:/liuyuqi/fishsource/python/video-subtitle-extract/videos/大象解说《血战钢锯岭》";
-//
-        String outputDirPath = "D:/liuyuqi/fishsource/python/video-subtitle-extract/";
+        outputDirPath = "D:/liuyuqi/fishsource/python/video-subtitle-extract/res";
 
         // 获取所有png文件
         Collection<File> javaGbkFileCol = FileUtils.listFiles(new File(srcDirPath), new String[]{"png"}, true);
         String res = "";
         for (File img : javaGbkFileCol) {
 //            对每个图片识别
-            res += imgOCR(img);
+            res += imgOCR(img) + "\r\n";
         }
-
         saveRes(res);
         System.out.println(res);
     }
-    public static void saveRes(String res){
-        File file = new File(outputDirPath+Math.abs(UUID.randomUUID().hashCode())+".txt");
+
+    public static void saveRes(String res) {
+        File file = new File(outputDirPath + Math.abs(UUID.randomUUID().hashCode()) + ".txt");
         try {
-            FileUtils.writeStringToFile(file,res);
+            FileUtils.writeStringToFile(file, res);
         } catch (IOException e) {
             e.printStackTrace();
         }
@@ -53,8 +53,10 @@ public class MainFm {
         }
         byte[] bytes = CommUtils.imageToBytes(image);
         int ocrType = 0;
-        ocrType = Math.abs(UUID.randomUUID().hashCode()) % 4;
-        String text = OcrUtils.ocrImg(bytes, ocrType);
+        String text = OcrUtils.ocrImg(bytes, Math.abs(UUID.randomUUID().hashCode()) % 4);
+        if (text == "") {
+            text = OcrUtils.ocrImg(bytes, Math.abs(UUID.randomUUID().hashCode()) % 4);
+        }
         return text;
     }
 }

+ 17 - 5
tools-ocr/src/main/java/com/luooqi/ocr/utils/OcrUtils.java

@@ -13,6 +13,7 @@ import cn.hutool.json.JSONArray;
 import cn.hutool.json.JSONObject;
 import cn.hutool.json.JSONUtil;
 import com.luooqi.ocr.model.TextBlock;
+import com.sun.deploy.util.StringUtils;
 
 import java.awt.*;
 import java.util.*;
@@ -25,7 +26,7 @@ import java.util.List;
 public class OcrUtils {
 
     public static String ocrImg(byte[] imgData, int ocrType) {
-        switch (ocrType){
+        switch (ocrType) {
             case 0:
                 return bdGeneralOcr(imgData);
             case 1:
@@ -37,15 +38,15 @@ public class OcrUtils {
         }
     }
 
-    private static String bdGeneralOcr(byte[] imgData){
+    private static String bdGeneralOcr(byte[] imgData) {
         return bdBaseOcr(imgData, "general_location");
     }
 
-    private static String bdAccurateOcr(byte[] imgData){
+    private static String bdAccurateOcr(byte[] imgData) {
         return bdBaseOcr(imgData, "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate");
     }
 
-    private static String bdBaseOcr(byte[] imgData, String type){
+    private static String bdBaseOcr(byte[] imgData, String type) {
         String[] urlArr = new String[]{"http://ai.baidu.com/tech/ocr/general", "http://ai.baidu.com/index/seccode?action=show"};
         StringBuilder cookie = new StringBuilder();
         for (String url : urlArr) {
@@ -58,7 +59,7 @@ public class OcrUtils {
         HashMap<String, String> header = new HashMap<>();
         header.put("Referer", "http://ai.baidu.com/tech/ocr/general");
         header.put("Cookie", cookie.toString());
-        String data = "type="+URLUtil.encodeQuery(type)+"&detect_direction=false&image_url&image=" + URLUtil.encodeQuery("data:image/jpeg;base64," + Base64.encode(imgData)) + "&language_type=CHN_ENG";
+        String data = "type=" + URLUtil.encodeQuery(type) + "&detect_direction=false&image_url&image=" + URLUtil.encodeQuery("data:image/jpeg;base64," + Base64.encode(imgData)) + "&language_type=CHN_ENG";
         HttpResponse response = WebUtils.postRaw("http://ai.baidu.com/aidemo", data, 0, header);
         return extractBdResult(WebUtils.getSafeHtml(response));
     }
@@ -72,6 +73,11 @@ public class OcrUtils {
         return extractSogouResult(CommUtils.postMultiData(url, postData, boundary.substring(2)));
     }
 
+    /**
+     * @param imgData
+     * @return
+     *  error 签名错误
+     */
     public static String sogouWebOcr(byte[] imgData) {
         String url = "https://deepi.sogou.com/api/sogouService";
         String referer = "https://deepi.sogou.com/?from=picsearch&tdsourcetag=s_pctim_aiomsg";
@@ -96,6 +102,9 @@ public class OcrUtils {
         if (StrUtil.isBlank(html)) {
             return "";
         }
+        if (JSONUtil.isJson(html) == false) {
+            return "";
+        }
         JSONObject jsonObject = JSONUtil.parseObj(html);
         if (jsonObject.getInt("success", 0) != 1) {
             return "";
@@ -123,6 +132,9 @@ public class OcrUtils {
         if (StrUtil.isBlank(html)) {
             return "";
         }
+        if (JSONUtil.isJson(html) == false) {
+            return "";
+        }
         JSONObject jsonObject = JSONUtil.parseObj(html);
         if (jsonObject.getInt("errno", 0) != 0) {
             return "";