|
@@ -0,0 +1,53 @@
|
|
|
+#coding=utf-8
|
|
|
+'''
|
|
|
+Created on 2017年6月26日
|
|
|
+@vsersion:python3.6
|
|
|
+@author: liuyuqi
|
|
|
+'''
|
|
|
+from nt import chdir, listdir
|
|
|
+
|
|
|
+import unicodecsv as csv
|
|
|
+from bs4 import BeautifulSoup
|
|
|
+
|
|
|
+dataPath="D:\\t"
|
|
|
+resultFile="D:\\result.csv"
|
|
|
+
|
|
|
+
|
|
|
+def do_list_dir():
|
|
|
+ for lists in listdir(dataPath):
|
|
|
+ saveData(dataPath+"\\"+lists)
|
|
|
+
|
|
|
+def saveData(dataFile):
|
|
|
+ #打开dataFile文件
|
|
|
+ with open(dataFile, 'r',encoding='UTF-8') as f:
|
|
|
+ xml_doc =f.read() #读取xml文本内容
|
|
|
+ #去除空格和换行
|
|
|
+ xml_doc=xml_doc.replace("\n", "")
|
|
|
+ xml_doc=xml_doc.replace(" ", "")
|
|
|
+ #xml形式读取
|
|
|
+ soup = BeautifulSoup(xml_doc,"xml")
|
|
|
+
|
|
|
+ #打开resultFile写文件
|
|
|
+ csvfile = open(resultFile, 'ab')# r只读,w可写,a追加 b二进制读写
|
|
|
+ writer = csv.writer(csvfile, dialect='excel', encoding='gb18030',errors="ignore")
|
|
|
+ needData=soup.findChild("ss")
|
|
|
+ for i in range(len(needData.contents)):
|
|
|
+ data=[]
|
|
|
+ flag=True
|
|
|
+ for j in range(len(needData.contents[i].contents)):
|
|
|
+ try:
|
|
|
+ data.append(needData.contents[i].contents[j].contents[0])
|
|
|
+ except Exception as e:
|
|
|
+ print(e)
|
|
|
+ flag=False
|
|
|
+ if flag==True:
|
|
|
+ writer.writerow(data)
|
|
|
+ csvfile.close()
|
|
|
+
|
|
|
+def main():
|
|
|
+ print("start!")
|
|
|
+ chdir(dataPath)
|
|
|
+ do_list_dir()
|
|
|
+ print("finish!")
|
|
|
+
|
|
|
+main()
|