liuyuqi-dellpc 1 year ago
parent
commit
92571b2236
5 changed files with 46 additions and 4 deletions
  1. 2 0
      README.md
  2. 40 1
      crawl_secondhand/extractor/xianyu.py
  3. 2 0
      docs/xianyu.http
  4. 0 2
      main.py
  5. 2 1
      requirements.txt

+ 2 - 0
README.md

@@ -11,6 +11,8 @@
 
 
 ## Usage
 ## Usage
 
 
+
+
 **Web版本**
 **Web版本**
 
 
 ```
 ```

+ 40 - 1
crawl_secondhand/extractor/xianyu.py

@@ -6,11 +6,50 @@
 @License :   Copyright © 2017-2022 liuyuqi. All Rights Reserved.
 @License :   Copyright © 2017-2022 liuyuqi. All Rights Reserved.
 @Desc    :   闲鱼
 @Desc    :   闲鱼
 '''
 '''
+import os,sys,re,csv
+import requests
+from bs4 import BeautifulSoup
+from lxml import etree
 
 
 class Xianyu(object):
 class Xianyu(object):
     
     
     def __init__(self):
     def __init__(self):
+        self.sess = requests.Session()
+
+    def crawl(self, keyword):
+        payload = {
+            "st_edtime ": 1,  # 最新发布
+            "_input_charset": "utf8",
+            "search_type": "item",
+            "q": self.keyword,
+            "page": self.page
+            # "start": minPrice,  # 价格范围
+            # "end": maxPrice,
+        }
+        try:
+            rep = self.sess.get(url="https://s.2.taobao.com/list/?", params=payload)
+            rep.encoding = rep.apparent_encoding
+            res = rep.text
+            return res
+        except Exception as e:
+            print('error' * 22)
+            return False
+        self.save_to_csv()
         pass
         pass
 
 
+    def save_to_csv(self, filename, data):
+        if not os.path.exists('data'):
+            os.mkdir('data')
+        with open(filename, 'w') as f:
+            csv_header = ['title', 'price', 'location', 'url']
+            writer = csv.DictWriter(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
+            writer.writerow(csv_header)
+            writer.writerows(data)
+
 if __name__ == "__main__":
 if __name__ == "__main__":
-    pass
+    if not os.path.exists('data'):
+        os.mkdir('data')
+    keywords = ['羽毛球拍', 'iphone']
+    for keyword in keywords:
+        xianyu = Xianyu()
+        xianyu.crawl(keyword)

File diff suppressed because it is too large
+ 2 - 0
docs/xianyu.http


+ 0 - 2
main.py

@@ -13,8 +13,6 @@ parser = argparse.ArgumentParser(description='Crawl Secondhand')
 parser.add_argument('command', help='command to run')
 parser.add_argument('command', help='command to run')
 parser.add_argument('-l', '--log', help='log file path')
 parser.add_argument('-l', '--log', help='log file path')
 
 
-
-
 if __name__=='__main__':
 if __name__=='__main__':
     args = parser.parse_args()
     args = parser.parse_args()
     if args.command == 'server':
     if args.command == 'server':

+ 2 - 1
requirements.txt

@@ -1,3 +1,4 @@
 requests
 requests
 flask
 flask
-dotenv
+dotenv
+pandas

Some files were not shown because too many files changed in this diff