#coding=utf-8 ''' unicodecsv,bs4,lxml Created on 2017年6月26日 @vsersion:python3.6 @author: liuyuqi ''' from nt import chdir, listdir import unicodecsv as csv from bs4 import BeautifulSoup dataPath="D:\\t" resultFile="D:\\result.csv" def do_list_dir(): for lists in listdir(dataPath): saveData(dataPath+"\\"+lists) def saveData(dataFile): #打开dataFile文件 with open(dataFile, 'r',encoding='utf8', errors = 'replace') as f: xml_doc =f.read() #读取xml文本内容 #去除空格和换行 xml_doc=xml_doc.replace("\n", "") xml_doc=xml_doc.replace(" ", "") #xml形式读取 soup = BeautifulSoup(xml_doc,"xml") #打开resultFile写文件 csvfile = open(resultFile, 'ab')# r只读,w可写,a追加 b二进制读写 writer = csv.writer(csvfile, dialect='excel', encoding='gb18030',errors="ignore") needData=soup.findChild("box1") for i in range(len(needData.contents)): data=[] flag=True for j in range(len(needData.contents[i].contents)): try: data.append(needData.contents[i].contents[j].contents[0]) except Exception as e: print(e) flag=False if flag==True: writer.writerow(data) csvfile.close() def main(): print("start!") chdir(dataPath) do_list_dir() print("finish!") main()