|
@@ -0,0 +1,296 @@
|
|
|
+#!/usr/bin/env python
|
|
|
+# -*- encoding: utf-8 -*-
|
|
|
+'''
|
|
|
+@Contact : liuyuqi.gov@msn.cn
|
|
|
+@Time : 2023/05/17 12:38:45
|
|
|
+@License : Copyright © 2017-2022 liuyuqi. All Rights Reserved.
|
|
|
+@Desc : order bot
|
|
|
+'''
|
|
|
+from selenium.common import exceptions
|
|
|
+from selenium import webdriver
|
|
|
+from selenium.webdriver import ActionChains
|
|
|
+import time
|
|
|
+import re
|
|
|
+import requests
|
|
|
+import json
|
|
|
+import sys
|
|
|
+from bs4 import BeautifulSoup
|
|
|
+
|
|
|
+class TaobaoClimber:
|
|
|
+ '''
|
|
|
+ 淘宝爬虫
|
|
|
+ '''
|
|
|
+ def __init__(self, username, password):
|
|
|
+ ''' 初始化 '''
|
|
|
+ self.__session = requests.Session()
|
|
|
+ self.__username = username
|
|
|
+ self.__password = password
|
|
|
+
|
|
|
+ driver = None
|
|
|
+ action = None
|
|
|
+
|
|
|
+ # 是否登录
|
|
|
+ __is_logined = False
|
|
|
+
|
|
|
+ # 淘宝账户
|
|
|
+ __username = ""
|
|
|
+ # 登录密码
|
|
|
+ __password = ""
|
|
|
+ # 登陆URL
|
|
|
+ __login_url = "https://login.taobao.com/member/login.jhtml"
|
|
|
+ # 卖家待发货订单URL
|
|
|
+ __orders_url = "https://trade.taobao.com/trade/itemlist/list_sold_items.htm?action=itemlist/SoldQueryAction&event_submit_do_query=1&auctionStatus=PAID&tabCode=waitSend"
|
|
|
+ # 卖家正出售宝贝URL
|
|
|
+ __auction_url = "https://sell.taobao.com/auction/merchandise/auction_list.htm"
|
|
|
+ # 卖家仓库中宝贝URL
|
|
|
+ __repository_url = "https://sell.taobao.com/auction/merchandise/auction_list.htm?type=1"
|
|
|
+ # 卖家确认发货URL
|
|
|
+ __deliver_url = "https://wuliu.taobao.com/user/consign.htm?trade_id="
|
|
|
+ # 卖家退款URL
|
|
|
+ __refunding_url = "https://trade.taobao.com/trade/itemlist/list_sold_items.htm?action=itemlist/SoldQueryAction&event_submit_do_query=1&auctionStatus=REFUNDING&tabCode=refunding"
|
|
|
+ # 请求留言URL
|
|
|
+ __message_url = "https://trade.taobao.com/trade/json/getMessage.htm?archive=false&biz_order_id="
|
|
|
+ # requests会话
|
|
|
+ __session = None
|
|
|
+
|
|
|
+ def __login(self):
|
|
|
+ # 1.登陆
|
|
|
+ try:
|
|
|
+ self.driver.get(self.__login_url)
|
|
|
+ except exceptions.TimeoutException: # 当页面加载时间超过设定时间,JS来停止加载
|
|
|
+ self.driver.execute_script('window.stop()')
|
|
|
+
|
|
|
+ count = 0
|
|
|
+ while count < 5: # 重试5次
|
|
|
+ count += 1
|
|
|
+ if self.__login_one() is True:
|
|
|
+ break
|
|
|
+ if count == 5:
|
|
|
+ return False
|
|
|
+
|
|
|
+ # 2.保存cookies
|
|
|
+ # driver.switch_to_default_content() #需要返回主页面,不然获取的cookies不是登陆后cookies
|
|
|
+ list_cookies = self.driver.get_cookies()
|
|
|
+ cookies = {}
|
|
|
+ for s in list_cookies:
|
|
|
+ cookies[s['name']] = s['value']
|
|
|
+ requests.utils.add_dict_to_cookiejar(self.__session.cookies, cookies) # 将获取的cookies设置到session
|
|
|
+ return True
|
|
|
+
|
|
|
+ def __login_one(self):
|
|
|
+ try:
|
|
|
+ # 1.点击密码登录,切换到密码登录模式 默认是二维码登录
|
|
|
+ username_login_btn = self.driver.find_element_by_xpath("//a[@class='forget-pwd J_Quick2Static']")
|
|
|
+ if username_login_btn.is_displayed() is True:
|
|
|
+ username_login_btn.click()
|
|
|
+ except exceptions.ElementNotInteractableException:
|
|
|
+ pass
|
|
|
+
|
|
|
+ # 2.获取账户、密码输入框
|
|
|
+ username_input = self.driver.find_element_by_id("TPL_username_1")
|
|
|
+ password_input = self.driver.find_element_by_id("TPL_password_1")
|
|
|
+ # 3.为账户、密码赋值
|
|
|
+ username_input.clear()
|
|
|
+ username_input.send_keys(self.__username)
|
|
|
+ password_input.send_keys(self.__password)
|
|
|
+
|
|
|
+ # 4.滑块判断
|
|
|
+ self.__slide_login()
|
|
|
+
|
|
|
+ # 5.获取登陆按钮,并点击登录
|
|
|
+ submit_btn = self.driver.find_element_by_id("J_SubmitStatic")
|
|
|
+ submit_btn.click()
|
|
|
+ # 6.根据提示判断是否登录成功
|
|
|
+ try:
|
|
|
+ message = self.driver.find_element_by_id("J_Message").find_element_by_class_name("error")
|
|
|
+ if message.text == u"为了你的账户安全,请拖动滑块完成验证":
|
|
|
+ self.driver.execute_script(
|
|
|
+ "document.getElementById('J_Message').children[1].innerText='发货机器人:请滑动滑块,协助完成验证!';")
|
|
|
+ return False
|
|
|
+ except exceptions.NoSuchElementException:
|
|
|
+ pass
|
|
|
+
|
|
|
+ # 7.有时检测当前环境是否异常,此时休眠一段时间让它检测
|
|
|
+ while True:
|
|
|
+ try:
|
|
|
+ self.driver.find_element_by_id("J_SiteNav")
|
|
|
+ break
|
|
|
+ except exceptions.NoSuchElementException:
|
|
|
+ time.sleep(1)
|
|
|
+
|
|
|
+ return True
|
|
|
+
|
|
|
+ def __slide_login(self):
|
|
|
+ # 取得滑块所在div,判断是否display 一般首次登陆不需要滑块验证
|
|
|
+ slide_div = self.driver.find_element_by_id("nocaptcha")
|
|
|
+ if slide_div.is_displayed() is True:
|
|
|
+ self.driver.execute_script(
|
|
|
+ "document.getElementById('J_Message').children[1].innerText='发货机器人:请滑动滑块,协助完成验证!';")
|
|
|
+ while True:
|
|
|
+ try:
|
|
|
+ text = self.driver.find_element_by_id("nc_1__scale_text").text
|
|
|
+ if text == '验证通过':
|
|
|
+ break
|
|
|
+ time.sleep(0.5)
|
|
|
+ except exceptions.NoSuchElementException: # 此时处于刷新按钮状态
|
|
|
+ pass
|
|
|
+
|
|
|
+ def __get_orders_page(self):
|
|
|
+ # 1.bs4将资源转html
|
|
|
+ html = BeautifulSoup(self.driver.page_source, "html5lib")
|
|
|
+ # 2.取得所有的订单div
|
|
|
+ order_div_list = html.find_all("div", {"class": "item-mod__trade-order___2LnGB trade-order-main"})
|
|
|
+ # 3.遍历每个订单div,获取数据
|
|
|
+ data_array = []
|
|
|
+ for index, order_div in enumerate(order_div_list):
|
|
|
+ order_id = order_div.find("input", attrs={"name": "orderid"}).attrs["value"]
|
|
|
+ order_date = order_div.find("span",
|
|
|
+ attrs={"data-reactid": re.compile(r"\.0\.5\.3:.+\.0\.1\.0\.0\.0\.6")}).text
|
|
|
+ order_buyer = order_div.find("a", attrs={"class": "buyer-mod__name___S9vit"}).text
|
|
|
+ # 4.根据订单id组合url,请求订单对应留言
|
|
|
+ order_message = json.loads(self.__session.get(self.__message_url + order_id).text)['tip']
|
|
|
+ data_array.append((order_id, order_date, order_buyer, order_message))
|
|
|
+ return data_array
|
|
|
+
|
|
|
+ def climb(self):
|
|
|
+ # FIXME 没有真实订单的模拟测试,生产环境注释即可
|
|
|
+ # order_test = [("Test_1548615412315", "2018-08-07 15:00:03", "疯狂的石头",
|
|
|
+ u"留言: test@qq.com http://download.csdn.net/download/lqkitten/10113904")]
|
|
|
+ # return order_test
|
|
|
+
|
|
|
+ # 切换回淘宝窗口
|
|
|
+ self.driver.switch_to_window(self.driver.window_handles[0])
|
|
|
+
|
|
|
+ result = []
|
|
|
+
|
|
|
+ if self.__is_logined is False:
|
|
|
+ if self.__login() is False:
|
|
|
+ return result
|
|
|
+ else:
|
|
|
+ self.__is_logined = True
|
|
|
+
|
|
|
+ # 1.进入待发货订单页面
|
|
|
+ self.driver.get(self.__orders_url)
|
|
|
+ while True:
|
|
|
+ # 2.获取当前页面的订单信息
|
|
|
+ time.sleep(2) # 两秒等待页面加载
|
|
|
+ _orders = self.__get_orders_page()
|
|
|
+ result.extend(_orders)
|
|
|
+ try:
|
|
|
+ # 3.获取下一页按钮
|
|
|
+ next_page_li = self.driver.find_element_by_class_name("pagination-next")
|
|
|
+ # 4.判断按钮是否可点击,否则退出循环
|
|
|
+ next_page_li.get_attribute("class").index("pagination-disabled")
|
|
|
+ # 到达最后一页
|
|
|
+ break
|
|
|
+ except ValueError:
|
|
|
+ # 跳转到下一页
|
|
|
+ print(next_page_li.find_element_by_tag_name("a").text)
|
|
|
+ next_page_li.click()
|
|
|
+ time.sleep(1)
|
|
|
+ except exceptions.NoSuchElementException:
|
|
|
+ pass
|
|
|
+ return result
|
|
|
+
|
|
|
+ def unshelve(self):
|
|
|
+ # 切换回淘宝窗口
|
|
|
+ self.driver.switch_to_window(self.driver.window_handles[0])
|
|
|
+
|
|
|
+ if self.__is_logined is False:
|
|
|
+ if self.__login() is False:
|
|
|
+ return False
|
|
|
+ else:
|
|
|
+ self.__is_logined = True
|
|
|
+
|
|
|
+ try:
|
|
|
+ # 1.进入正出售宝贝页面
|
|
|
+ self.driver.get(self.__auction_url)
|
|
|
+ # 2.点击下架
|
|
|
+ choose_checkbox = self.driver.find_element_by_xpath(
|
|
|
+ "//*[@id='J_DataTable']/table/tbody[1]/tr[1]/td/input[1]")
|
|
|
+ choose_checkbox.click()
|
|
|
+ unshelve_btn = self.driver.find_element_by_xpath(
|
|
|
+ "//*[@id='J_DataTable']/div[2]/table/thead/tr[2]/td/div/button[2]")
|
|
|
+ unshelve_btn.click()
|
|
|
+ return True
|
|
|
+ except:
|
|
|
+ return False
|
|
|
+
|
|
|
+ def shelve(self):
|
|
|
+ # 切换回淘宝窗口
|
|
|
+ try:
|
|
|
+ self.driver.switch_to_window(self.driver.window_handles[0])
|
|
|
+ except exceptions:
|
|
|
+ print exceptions
|
|
|
+
|
|
|
+ if self.__is_logined is False:
|
|
|
+ if self.__login() is False:
|
|
|
+ return False
|
|
|
+ else:
|
|
|
+ self.__is_logined = True
|
|
|
+
|
|
|
+ # 1.进入仓库宝贝页面
|
|
|
+ self.driver.get(self.__repository_url)
|
|
|
+ # 2.点击上架
|
|
|
+ try:
|
|
|
+ choose_checkbox = self.driver.find_element_by_xpath("//*[@id='J_DataTable']/table/tbody[1]/tr[1]/td/input")
|
|
|
+ choose_checkbox.click()
|
|
|
+ shelve_btn = self.driver.find_element_by_xpath(
|
|
|
+ "//*[@id='J_DataTable']/div[3]/table/tbody/tr/td/div/button[2]")
|
|
|
+ shelve_btn.click()
|
|
|
+ except exceptions.NoSuchElementException:
|
|
|
+ pass
|
|
|
+
|
|
|
+ def delivered(self, orderId):
|
|
|
+ # 切换回淘宝窗口
|
|
|
+ self.driver.switch_to_window(self.driver.window_handles[0])
|
|
|
+
|
|
|
+ if self.__is_logined is False:
|
|
|
+ if self.__login() is False:
|
|
|
+ return False
|
|
|
+ else:
|
|
|
+ self.__is_logined = True
|
|
|
+ try:
|
|
|
+ # 1.进入确认发货页面
|
|
|
+ self.driver.get(self.__deliver_url + orderId)
|
|
|
+ no_need_logistics_a = self.driver.find_element_by_xpath("//*[@id='dummyTab']/a")
|
|
|
+ no_need_logistics_a.click()
|
|
|
+ self.driver.find_element_by_id("logis:noLogis").click()
|
|
|
+ time.sleep(1)
|
|
|
+ return True
|
|
|
+ except:
|
|
|
+ return False
|
|
|
+
|
|
|
+ def exists_refunding(self):
|
|
|
+ # 切换回淘宝窗口
|
|
|
+ self.driver.switch_to_window(self.driver.window_handles[0])
|
|
|
+
|
|
|
+ if self.__is_logined is False:
|
|
|
+ if self.__login() is False:
|
|
|
+ return False
|
|
|
+ else:
|
|
|
+ self.__is_logined = True
|
|
|
+ try:
|
|
|
+ # 1.进入退款页面
|
|
|
+ self.driver.get(self.__refunding_url)
|
|
|
+ self.driver.find_element_by_class_name("item-mod__trade-order___2LnGB trade-order-main")
|
|
|
+ return True
|
|
|
+ except exceptions.NoSuchElementException:
|
|
|
+ return False
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ # 初始化
|
|
|
+ TaobaoClimber.driver = webdriver.Firefox() # 应将浏览器驱动放于python根目录下,且python已配置path环境变量
|
|
|
+ TaobaoClimber.action = ActionChains(TaobaoClimber.driver)
|
|
|
+ TaobaoClimber.driver.maximize_window() # 浏览器最大化
|
|
|
+ TaobaoClimber.driver.execute_script("window.open('')")
|
|
|
+
|
|
|
+ climber = TaobaoClimber(u"test", "123456")
|
|
|
+ while True:
|
|
|
+ # 循环爬取订单
|
|
|
+ orders = climber.climb()
|
|
|
+ for order in orders:
|
|
|
+ print_msg("淘宝订单产生:订单号:%s\t订单日期:%s \t买家:%s\t备注:%s" % order)
|
|
|
+ # 每30秒抓一次
|
|
|
+ time.sleep(30)
|