liuyuqi-dellpc 2 years ago
commit
03bf9744ab

+ 3 - 0
README.md

@@ -0,0 +1,3 @@
+# crawl_secondhand
+
+二手商品监控

+ 12 - 0
crawl_secondhand/__init__.py

@@ -0,0 +1,12 @@
+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
+'''
+@Contact :   liuyuqi.gov@msn.cn
+@Time    :   2023/04/02 12:50:47
+@License :   Copyright © 2017-2022 liuyuqi. All Rights Reserved.
+@Desc    :   
+'''
+from .crawl_secondhand import CrawlSecondhand
+
+def main():
+    CrawlSecondhand().crawl()

+ 21 - 0
crawl_secondhand/api.py

@@ -0,0 +1,21 @@
+
+# 拍拍
+paipai="https://www.paipai.com/contact.html"
+
+# 58同城
+tongcheng="https://www.58.com/about/aboutus.html"
+
+# 二手车
+ershouche="https://www.che168.com/about/aboutus.html"
+
+# 二手房
+ershoufang="https://www.anjuke.com/about/aboutus.html"
+
+# 二手书
+ershoushu="https://www.dangdang.com/aboutus/aboutus.html"
+
+# 二手手机
+zhuanzhuan="https://www.gome.com.cn/aboutus/aboutus.html"
+
+# 咸鱼
+xianyu="https://www.xianyu.com/about/aboutus.html"

+ 78 - 0
crawl_secondhand/crawl_secondhand.py

@@ -0,0 +1,78 @@
+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
+'''
+@Contact :   liuyuqi.gov@msn.cn
+@Time    :   2023/04/02 12:49:59
+@License :   Copyright © 2017-2022 liuyuqi. All Rights Reserved.
+@Desc    :   
+'''
+import requests
+import os,sys,re,time
+import json
+import logging
+import logging.config
+import argparse
+
+class CrawlSecondhand(object):
+    '''爬取二手商品'''
+
+    def __init__(self):            
+        argparser = argparse.ArgumentParser(description='Crawl Secondhand')
+        argparser.add_argument('-l', '--log', help='log file path')
+        self.args = argparser.parse_args()
+        self.init_config()
+
+    def init_config(self):
+        '''初始化配置'''
+        self.log_file = self.args.log
+        self.log_config = {
+            'version': 1,
+            'disable_existing_loggers': False,
+            'formatters': {
+                'standard': {
+                    'format': '%(asctime)s [%(levelname)s] %(name)s: %(message)s'
+                },
+            },
+            'handlers': {
+                'default': {
+                    'level':'DEBUG',
+                    'class':'logging.StreamHandler',
+                },
+                'file': {
+                    'level':'DEBUG',
+                    'class':'logging.handlers.RotatingFileHandler',
+                    'filename': self.log_file,
+                    'maxBytes': 1024*1024*5, # 5 MB
+                    'backupCount': 5,
+                    'formatter':'standard',
+                },
+            },
+            'loggers': {
+                '': {
+                    'handlers': ['default', 'file'],
+                    'level': 'DEBUG',
+                    'propagate': True
+                }
+            }
+        }
+        logging.config.dictConfig(self.log_config)
+        self.logger = logging.getLogger(__name__)
+
+        if not os.path.exists(self.log_file):
+            self.logger.info('log file not exists, create it')
+            with open(self.log_file, 'w') as f:
+                f.write('')
+
+        if not os.path.exists("conf"):
+            self.logger.info('conf file not exists, create it')
+            os.mkdir("conf")
+
+        if not os.path.exists("data"):
+            self.logger.info('data file not exists, create it')
+            os.mkdir("data")
+        # save args to config file "conf/args.json"
+        with open("conf/args.json", 'w') as f:
+            json.dump(self.args.__dict__, f)
+
+    def crawl(self):
+        pass

+ 8 - 0
crawl_secondhand/paipai.py

@@ -0,0 +1,8 @@
+from .secondhand import Secondhand
+class Paipai(Secondhand):
+    ''' 京东拍拍二手商品 '''
+    def __init__(self):
+        pass
+
+if __name__ == "__main__":
+    pass

+ 17 - 0
crawl_secondhand/secondhand.py

@@ -0,0 +1,17 @@
+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
+'''
+@Contact :   liuyuqi.gov@msn.cn
+@Time    :   2023/04/02 12:46:58
+@License :   Copyright © 2017-2022 liuyuqi. All Rights Reserved.
+@Desc    :   
+'''
+
+class Secondhand(object):
+    '''二手商品'''
+    
+    def __init__(self):
+        pass
+
+if __name__ == "__main__":
+    pass

+ 11 - 0
main.py

@@ -0,0 +1,11 @@
+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
+'''
+@Contact :   liuyuqi.gov@msn.cn
+@Time    :   2023/04/02 12:50:36
+@License :   Copyright © 2017-2022 liuyuqi. All Rights Reserved.
+@Desc    :   enter point
+'''
+from crawl_secondhand import main
+if __name__=='__main__':
+    main()

+ 2 - 0
requirements.txt

@@ -0,0 +1,2 @@
+requests
+flask