123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354 |
- #coding=utf-8
- '''
- unicodecsv,bs4,lxml
- Created on 2017年6月26日
- @vsersion:python3.6
- @author: liuyuqi
- '''
- from nt import chdir, listdir
- import unicodecsv as csv
- from bs4 import BeautifulSoup
- dataPath="D:\\t"
- resultFile="D:\\result.csv"
-
- def do_list_dir():
- for lists in listdir(dataPath):
- saveData(dataPath+"\\"+lists)
- def saveData(dataFile):
- #打开dataFile文件
- with open(dataFile, 'r',encoding='utf8', errors = 'replace') as f:
- xml_doc =f.read() #读取xml文本内容
- #去除空格和换行
- xml_doc=xml_doc.replace("\n", "")
- xml_doc=xml_doc.replace(" ", "")
- #xml形式读取
- soup = BeautifulSoup(xml_doc,"xml")
-
- #打开resultFile写文件
- csvfile = open(resultFile, 'ab')# r只读,w可写,a追加 b二进制读写
- writer = csv.writer(csvfile, dialect='excel', encoding='gb18030',errors="ignore")
- needData=soup.findChild("box1")
- for i in range(len(needData.contents)):
- data=[]
- flag=True
- for j in range(len(needData.contents[i].contents)):
- try:
- data.append(needData.contents[i].contents[j].contents[0])
- except Exception as e:
- print(e)
- flag=False
- if flag==True:
- writer.writerow(data)
- csvfile.close()
-
- def main():
- print("start!")
- chdir(dataPath)
- do_list_dir()
- print("finish!")
- main()
|