main.py 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. #coding=utf-8
  2. '''
  3. Created on 2017年6月26日
  4. @vsersion:python3.6
  5. @author: liuyuqi
  6. '''
  7. from nt import chdir, listdir
  8. import unicodecsv as csv
  9. from bs4 import BeautifulSoup
  10. dataPath="D:\\t"
  11. resultFile="D:\\result.csv"
  12. def do_list_dir():
  13. for lists in listdir(dataPath):
  14. saveData(dataPath+"\\"+lists)
  15. def saveData(dataFile):
  16. #打开dataFile文件
  17. with open(dataFile, 'r',encoding='UTF-8') as f:
  18. xml_doc =f.read() #读取xml文本内容
  19. #去除空格和换行
  20. xml_doc=xml_doc.replace("\n", "")
  21. xml_doc=xml_doc.replace(" ", "")
  22. #xml形式读取
  23. soup = BeautifulSoup(xml_doc,"xml")
  24. #打开resultFile写文件
  25. csvfile = open(resultFile, 'ab')# r只读,w可写,a追加 b二进制读写
  26. writer = csv.writer(csvfile, dialect='excel', encoding='gb18030',errors="ignore")
  27. needData=soup.findChild("ss")
  28. for i in range(len(needData.contents)):
  29. data=[]
  30. flag=True
  31. for j in range(len(needData.contents[i].contents)):
  32. try:
  33. data.append(needData.contents[i].contents[j].contents[0])
  34. except Exception as e:
  35. print(e)
  36. flag=False
  37. if flag==True:
  38. writer.writerow(data)
  39. csvfile.close()
  40. def main():
  41. print("start!")
  42. chdir(dataPath)
  43. do_list_dir()
  44. print("finish!")
  45. main()