Browse Source

优化变量的使用,爬虫获取的.txt不再含标题信息

剑断了 2 years ago
parent
commit
f1f44e5a67
1 changed files with 4 additions and 4 deletions
  1. 4 4
      TitleSpider.py

+ 4 - 4
TitleSpider.py

@@ -18,10 +18,9 @@ def FinData(url):
     dataList = []
     dataList = []
     getUrl = requests.get(url=url)
     getUrl = requests.get(url=url)
     bsHtml = BeautifulSoup(getUrl.text, "html.parser")
     bsHtml = BeautifulSoup(getUrl.text, "html.parser")
-    global urlTitle
     urlTitleList = bsHtml.get_text().title().split('\n', 1)
     urlTitleList = bsHtml.get_text().title().split('\n', 1)
     urlTitle = urlTitleList[0][:-30].lstrip()
     urlTitle = urlTitleList[0][:-30].lstrip()
-    dataList.append(str(urlTitle))
+    # dataList.append(str(urlTitle))
     bsFinData = bsHtml.select('script')
     bsFinData = bsHtml.select('script')
     bsData = ''
     bsData = ''
 
 
@@ -38,7 +37,7 @@ def FinData(url):
     return dataList, urlTitle
     return dataList, urlTitle
 
 
 
 
-def saveAsTxt(video_list):
+def saveAsTxt(video_list, urlTitle):
     fileTitle = urlTitle + ".txt"  # 合成.txt格式 文件名
     fileTitle = urlTitle + ".txt"  # 合成.txt格式 文件名
 
 
     # 去除标题中的Windows不兼容的的命名字
     # 去除标题中的Windows不兼容的的命名字
@@ -63,4 +62,5 @@ def GetTxt(bid):
     url = urlPart + bv
     url = urlPart + bv
 
 
     dataList, urlTile = FinData(url)
     dataList, urlTile = FinData(url)
-    fileName = saveAsTxt(dataList)
+
+    fileName = saveAsTxt(dataList, urlTile)