|
@@ -7,7 +7,7 @@
|
|
|
@Desc :
|
|
|
'''
|
|
|
|
|
|
-import os
|
|
|
+import os,datetime
|
|
|
import unicodecsv as csv
|
|
|
from bs4 import BeautifulSoup
|
|
|
import pandas as pd
|
|
@@ -47,12 +47,15 @@ class GooExport(object):
|
|
|
def removeDuplication(self):
|
|
|
# print("removeDuplication")
|
|
|
self.job=self.job.drop_duplicates()
|
|
|
+
|
|
|
+
|
|
|
def run(self):
|
|
|
print("start!")
|
|
|
os.chdir(self.inputDataPath)
|
|
|
for lists in os.listdir(self.inputDataPath):
|
|
|
if os.path.isfile(os.path.join(self.inputDataPath, lists)) and lists.endswith(".xml"):
|
|
|
self.parseXml(os.path.join(self.inputDataPath, lists))
|
|
|
+
|
|
|
with open(self.outResultFile, 'wb') as f:
|
|
|
writer = csv.writer(f, dialect='excel', encoding='gb18030',errors="ignore")
|
|
|
writer.writerow(self.job.columns)
|
|
@@ -64,12 +67,14 @@ if __name__=='__main__':
|
|
|
|
|
|
parser = ArgumentParser()
|
|
|
parser.add_argument("-i", "--inputDataPath", help="inputDataPath", default=".", type=str)
|
|
|
- parser.add_argument("-o", "--outResultFile", help="outResultFile", default="/workspace/result.csv", type=str)
|
|
|
+ parser.add_argument("-o", "--outResultFile", help="outResultFile", default="", type=str)
|
|
|
|
|
|
args = parser.parse_args()
|
|
|
inputDataPath=args.inputDataPath
|
|
|
outResultFile=args.outResultFile
|
|
|
-
|
|
|
+ if outResultFile == "":
|
|
|
+ outResultFile= os.path.join(inputDataPath, "result%s.csv" % datetime.datetime.now().strftime("%Y%m%d%H%M%S"))
|
|
|
+
|
|
|
if inputDataPath=="" or outResultFile=="":
|
|
|
print("please input inputDataPath and outResultFile")
|
|
|
exit()
|