|
@@ -6,44 +6,13 @@
|
|
@License : Copyright © 2017-2022 liuyuqi. All Rights Reserved.
|
|
@License : Copyright © 2017-2022 liuyuqi. All Rights Reserved.
|
|
@Desc : main function
|
|
@Desc : main function
|
|
'''
|
|
'''
|
|
-import time
|
|
|
|
-import sys
|
|
|
|
-import re
|
|
|
|
-import os
|
|
|
|
-from crawl_xiaohua.crawl_xiaohua import CrawlXiaohua
|
|
|
|
-from crawl_xiaohua.extractor.mzsock import Mzsock
|
|
|
|
-from crawl_xiaohua.extractor.xiaohuar import Xiaohuar
|
|
|
|
-from flask import Flask
|
|
|
|
-
|
|
|
|
|
|
|
|
-def server(config: str, argv=None):
|
|
|
|
- ''' web server mode '''
|
|
|
|
- if argv is None:
|
|
|
|
- argv = sys.argv
|
|
|
|
- else:
|
|
|
|
- sys.argv.extend(argv)
|
|
|
|
- app = Flask(__name__)
|
|
|
|
- app.run()
|
|
|
|
|
|
+from crawl_xiaohua.crawl_xiaohua import CrawlXiaohua
|
|
|
|
|
|
|
|
+def server(config:str,argv=None):
|
|
|
|
+ crawl_xiaohua = CrawlXiaohua(config)
|
|
|
|
+ crawl_xiaohua.server(config=config)
|
|
|
|
|
|
-def run(extractor: str, cmd: str, argv=None):
|
|
|
|
- ''' shell mode '''
|
|
|
|
- if argv is None:
|
|
|
|
- argv = sys.argv
|
|
|
|
- if extractor == 'xiaohua':
|
|
|
|
- crawl = CrawlXiaohua()
|
|
|
|
- if cmd == 'duanzi':
|
|
|
|
- crawl.crawlDuanzi()
|
|
|
|
- else:
|
|
|
|
- crawl.crawl()
|
|
|
|
- elif extractor == 'xiaohuar':
|
|
|
|
- crawl = Xiaohuar()
|
|
|
|
- crawl.run()
|
|
|
|
- elif extractor == 'mzsock':
|
|
|
|
- crawl = Mzsock()
|
|
|
|
- categroy_urls = crawl.get_categroy_url()
|
|
|
|
- urllist = crawl.get_urllist(categroy_urls)
|
|
|
|
- contentlist = crawl.get_contentlist(urllist)
|
|
|
|
- crawl.get_content(contentlist)
|
|
|
|
- else:
|
|
|
|
- print('unknown extractor: %s' % extractor)
|
|
|
|
|
|
+def run(extractor:str, cmd:str,argv=None):
|
|
|
|
+ crawl_xiaohua = CrawlXiaohua()
|
|
|
|
+ crawl_xiaohua.run(extractor, cmd)
|