liuyuqi-dellpc 6 years ago
parent
commit
a7470172f4
2 changed files with 55 additions and 0 deletions
  1. 53 0
      main.py
  2. 2 0
      requirements.txt

+ 53 - 0
main.py

@@ -0,0 +1,53 @@
+#coding=utf-8
+'''
+Created on 2017年6月26日
+@vsersion:python3.6
+@author: liuyuqi
+'''
+from nt import chdir, listdir
+
+import unicodecsv as csv
+from bs4 import BeautifulSoup
+
+dataPath="D:\\t"
+resultFile="D:\\result.csv"
+
+  
+def do_list_dir():
+    for lists in listdir(dataPath):
+        saveData(dataPath+"\\"+lists)
+
+def saveData(dataFile):
+    #打开dataFile文件
+    with open(dataFile, 'r',encoding='UTF-8') as f:
+        xml_doc =f.read()   #读取xml文本内容
+    #去除空格和换行
+    xml_doc=xml_doc.replace("\n", "")
+    xml_doc=xml_doc.replace(" ", "")
+    #xml形式读取
+    soup = BeautifulSoup(xml_doc,"xml")
+    
+    #打开resultFile写文件
+    csvfile = open(resultFile, 'ab')# r只读,w可写,a追加 b二进制读写
+    writer = csv.writer(csvfile, dialect='excel', encoding='gb18030',errors="ignore")
+    needData=soup.findChild("ss")
+    for i in range(len(needData.contents)):
+        data=[]
+        flag=True
+        for j in range(len(needData.contents[i].contents)):
+            try:
+                data.append(needData.contents[i].contents[j].contents[0])
+            except Exception as e:
+                print(e)
+                flag=False
+        if flag==True:
+            writer.writerow(data)    
+    csvfile.close()    
+            
+def main():
+    print("start!")
+    chdir(dataPath)
+    do_list_dir()
+    print("finish!")
+
+main()

+ 2 - 0
requirements.txt

@@ -0,0 +1,2 @@
+beautifulsoup4==4.5.3
+unicodecsv==0.14.1