Browse Source

增加 csv2excel.py

liuyuqi-dellpc 3 years ago
parent
commit
86a2a2c900
3 changed files with 20 additions and 2 deletions
  1. 8 0
      README.md
  2. 10 0
      csv2excel.py
  3. 2 2
      main.py

+ 8 - 0
README.md

@@ -6,4 +6,12 @@
 1. 安装python
 2. pip install -r requirements.txt
 3. main.py中,配置*.xml文件位置
+
+dataPath=xx
+resultFile=xx
+
+ needData=soup.findChild("box1")   # 这里 box1 为 jisouke 整理箱的名称,比如box1
+
+ 爬虫数据放到 data 目录中。
+
 4. python main.py

+ 10 - 0
csv2excel.py

@@ -0,0 +1,10 @@
+import pandas as pd
+
+
+def csv_to_xlsx():
+    csv = pd.read_csv(r"result.csv", encoding="gb2312")
+    csv.to_excel(r"result.xlsx", sheet_name="data")
+
+
+if __name__ == "__main__":
+    csv_to_xlsx()

+ 2 - 2
main.py

@@ -20,7 +20,7 @@ def do_list_dir():
 
 def saveData(dataFile):
     #打开dataFile文件
-    with open(dataFile, 'r',encoding='UTF-8') as f:
+    with open(dataFile, 'r',encoding='utf8', errors = 'replace') as f:
         xml_doc =f.read()   #读取xml文本内容
     #去除空格和换行
     xml_doc=xml_doc.replace("\n", "")
@@ -31,7 +31,7 @@ def saveData(dataFile):
     #打开resultFile写文件
     csvfile = open(resultFile, 'ab')# r只读,w可写,a追加 b二进制读写
     writer = csv.writer(csvfile, dialect='excel', encoding='gb18030',errors="ignore")
-    needData=soup.findChild("ss")
+    needData=soup.findChild("box1")
     for i in range(len(needData.contents)):
         data=[]
         flag=True