|
@@ -39,20 +39,25 @@ class CrawlMrdx():
|
|
|
:param url: news url
|
|
|
:param fileName: saved file name
|
|
|
'''
|
|
|
- with closing(requests.get(url=url, headers=user_agent.getheaders(), stream=True)) as response:
|
|
|
- chunkSize = 1024
|
|
|
- contentSize = int(response.headers["content-length"])
|
|
|
- if(os.path.exists(fileName) and os.path.getsize(fileName) == contentSize):
|
|
|
- print("跳过" + fileName)
|
|
|
- else:
|
|
|
- progress = DownloadProgress.DownloadProgress(fileName, total=contentSize, unit="KB",
|
|
|
- chunk_size=chunkSize, run_status="downloading", fin_status="downloaded")
|
|
|
- if not os.path.exists(os.path.dirname(fileName)):
|
|
|
- os.makedirs(os.path.dirname(fileName))
|
|
|
- with open(fileName, "wb") as file:
|
|
|
- for data in response.iter_content(chunk_size=chunkSize):
|
|
|
- file.write(data)
|
|
|
- progress.refresh(count=len(data))
|
|
|
+ try:
|
|
|
+ with closing(requests.get(url=url, headers=user_agent.getheaders(), stream=True)) as response:
|
|
|
+ chunkSize = 1024
|
|
|
+ contentSize = int(response.headers["content-length"])
|
|
|
+ if(os.path.exists(fileName) and os.path.getsize(fileName) == contentSize):
|
|
|
+ print("跳过" + fileName)
|
|
|
+ else:
|
|
|
+ progress = DownloadProgress.DownloadProgress(fileName, total=contentSize, unit="KB",
|
|
|
+ chunk_size=chunkSize, run_status="downloading", fin_status="downloaded")
|
|
|
+ if not os.path.exists(os.path.dirname(fileName)):
|
|
|
+ os.makedirs(os.path.dirname(fileName))
|
|
|
+ with open(fileName, "wb") as file:
|
|
|
+ for data in response.iter_content(chunk_size=chunkSize):
|
|
|
+ file.write(data)
|
|
|
+ progress.refresh(count=len(data))
|
|
|
+ except Exception as e:
|
|
|
+ print(e)
|
|
|
+ finally:
|
|
|
+ pass
|
|
|
|
|
|
def crawl(self, start: str, end: str):
|
|
|
'''crawl news
|