|
@@ -20,7 +20,7 @@ def do_list_dir():
|
|
|
|
|
|
def saveData(dataFile):
|
|
def saveData(dataFile):
|
|
#打开dataFile文件
|
|
#打开dataFile文件
|
|
- with open(dataFile, 'r',encoding='UTF-8') as f:
|
|
|
|
|
|
+ with open(dataFile, 'r',encoding='utf8', errors = 'replace') as f:
|
|
xml_doc =f.read() #读取xml文本内容
|
|
xml_doc =f.read() #读取xml文本内容
|
|
#去除空格和换行
|
|
#去除空格和换行
|
|
xml_doc=xml_doc.replace("\n", "")
|
|
xml_doc=xml_doc.replace("\n", "")
|
|
@@ -31,7 +31,7 @@ def saveData(dataFile):
|
|
#打开resultFile写文件
|
|
#打开resultFile写文件
|
|
csvfile = open(resultFile, 'ab')# r只读,w可写,a追加 b二进制读写
|
|
csvfile = open(resultFile, 'ab')# r只读,w可写,a追加 b二进制读写
|
|
writer = csv.writer(csvfile, dialect='excel', encoding='gb18030',errors="ignore")
|
|
writer = csv.writer(csvfile, dialect='excel', encoding='gb18030',errors="ignore")
|
|
- needData=soup.findChild("ss")
|
|
|
|
|
|
+ needData=soup.findChild("box1")
|
|
for i in range(len(needData.contents)):
|
|
for i in range(len(needData.contents)):
|
|
data=[]
|
|
data=[]
|
|
flag=True
|
|
flag=True
|