Browse Source

处理一个gb18030编码报错。说明f.read()最好二进制读取,编码读取还是有部分问题:不知道文件编码,而默认gbk无法打开。

liuyuqi-dellpc 7 years ago
parent
commit
5078754503
1 changed files with 32 additions and 21 deletions
  1. 32 21
      python/convert.py

+ 32 - 21
python/convert.py

@@ -1,39 +1,50 @@
 # Python3 将GBK转换成utf-8编码,明天继续实现,把*.java文件 *.porperties文件都转成utf-8
-import os, chardet, codecs
+import os
+import chardet
+import codecs
 
-def ReadFile(filePath, encoding="gbk"):
+
+def ReadFile(filePath, dst, encoding="gbk"):
     with codecs.open(filePath, "r", encoding) as f:
-        return f.read()
- 
+        try:
+            WriteFile(dst, f.read(), encoding="utf-8")
+            try:
+                print(filePath + "  " + encoding + " to utf-8  converted!")
+            except Exception:
+                print("print error")
+        except Exception:
+            print(filePath +"  "+ encoding+ "  read error")
+
+
 def WriteFile(filePath, u, encoding="utf-8"):
     with codecs.open(filePath, "w", encoding) as f:
         f.write(u)
- 
+
+
 def GBK_2_UTF8(src, dst):
-#     检测编码
+    #     检测编码
     f = open(src, "rb")
     coding = chardet.detect(f.read())["encoding"]
     f.close()
     if coding != "utf-8":
-        content = ReadFile(src, encoding=coding)
-        WriteFile(dst, content, encoding="utf-8")
-        try:
-            print(src + "  " + coding + " to utf-8  converted!")
-        except Exception:
-            print("print error")
+        ReadFile(src, dst, encoding=coding)
+
 
 # 递归遍历rootdir目录,把目录中的*.java编码由gbk转换为utf-8
 def ReadDirectoryFile(rootdir):
     for parent, dirnames, filenames in os.walk(rootdir):
             # case 1:
-            for dirname in dirnames:
-#                     print("folder is:" + os.path.join(parent, dirname))
-                    ReadDirectoryFile(dirname)
-            # case 2
-            for filename in filenames:    
-#                     print("filename with full path:" + os.path.join(parent, filename))
-                    if filename.endswith(".java"):
-                            GBK_2_UTF8(os.path.join(parent, filename), os.path.join(parent, filename))
+        for dirname in dirnames:
+            #                     print("folder is:" + os.path.join(parent, dirname))
+            ReadDirectoryFile(dirname)
+        # case 2
+        for filename in filenames:
+            #                     print("filename with full path:" + os.path.join(parent, filename))
+            if filename.endswith(".java"):
+                GBK_2_UTF8(os.path.join(parent, filename),
+                           os.path.join(parent, filename))
+
+
 if __name__ == "__main__":
-    src_path = "D:/liuyuqi/twtech/github/q 前端/java-note/notepadpp"
+    src_path = "D:/liuyuqi/twtech/github/q 前端/java-note/src"
     ReadDirectoryFile(src_path)