|
@@ -0,0 +1,78 @@
|
|
|
+#!/usr/bin/env python
|
|
|
+# -*- encoding: utf-8 -*-
|
|
|
+'''
|
|
|
+@Contact : liuyuqi.gov@msn.cn
|
|
|
+@Time : 2023/04/02 12:49:59
|
|
|
+@License : Copyright © 2017-2022 liuyuqi. All Rights Reserved.
|
|
|
+@Desc :
|
|
|
+'''
|
|
|
+import requests
|
|
|
+import os,sys,re,time
|
|
|
+import json
|
|
|
+import logging
|
|
|
+import logging.config
|
|
|
+import argparse
|
|
|
+
|
|
|
+class CrawlSecondhand(object):
|
|
|
+ '''爬取二手商品'''
|
|
|
+
|
|
|
+ def __init__(self):
|
|
|
+ argparser = argparse.ArgumentParser(description='Crawl Secondhand')
|
|
|
+ argparser.add_argument('-l', '--log', help='log file path')
|
|
|
+ self.args = argparser.parse_args()
|
|
|
+ self.init_config()
|
|
|
+
|
|
|
+ def init_config(self):
|
|
|
+ '''初始化配置'''
|
|
|
+ self.log_file = self.args.log
|
|
|
+ self.log_config = {
|
|
|
+ 'version': 1,
|
|
|
+ 'disable_existing_loggers': False,
|
|
|
+ 'formatters': {
|
|
|
+ 'standard': {
|
|
|
+ 'format': '%(asctime)s [%(levelname)s] %(name)s: %(message)s'
|
|
|
+ },
|
|
|
+ },
|
|
|
+ 'handlers': {
|
|
|
+ 'default': {
|
|
|
+ 'level':'DEBUG',
|
|
|
+ 'class':'logging.StreamHandler',
|
|
|
+ },
|
|
|
+ 'file': {
|
|
|
+ 'level':'DEBUG',
|
|
|
+ 'class':'logging.handlers.RotatingFileHandler',
|
|
|
+ 'filename': self.log_file,
|
|
|
+ 'maxBytes': 1024*1024*5, # 5 MB
|
|
|
+ 'backupCount': 5,
|
|
|
+ 'formatter':'standard',
|
|
|
+ },
|
|
|
+ },
|
|
|
+ 'loggers': {
|
|
|
+ '': {
|
|
|
+ 'handlers': ['default', 'file'],
|
|
|
+ 'level': 'DEBUG',
|
|
|
+ 'propagate': True
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ logging.config.dictConfig(self.log_config)
|
|
|
+ self.logger = logging.getLogger(__name__)
|
|
|
+
|
|
|
+ if not os.path.exists(self.log_file):
|
|
|
+ self.logger.info('log file not exists, create it')
|
|
|
+ with open(self.log_file, 'w') as f:
|
|
|
+ f.write('')
|
|
|
+
|
|
|
+ if not os.path.exists("conf"):
|
|
|
+ self.logger.info('conf file not exists, create it')
|
|
|
+ os.mkdir("conf")
|
|
|
+
|
|
|
+ if not os.path.exists("data"):
|
|
|
+ self.logger.info('data file not exists, create it')
|
|
|
+ os.mkdir("data")
|
|
|
+ # save args to config file "conf/args.json"
|
|
|
+ with open("conf/args.json", 'w') as f:
|
|
|
+ json.dump(self.args.__dict__, f)
|
|
|
+
|
|
|
+ def crawl(self):
|
|
|
+ pass
|