#!/usr/bin/env python # -*- encoding: utf-8 -*- ''' @Contact : liuyuqi.gov@msn.cn @Time : 2023/05/17 12:38:45 @License : Copyright © 2017-2022 liuyuqi. All Rights Reserved. @Desc : order bot ''' from selenium.common import exceptions from selenium import webdriver from selenium.webdriver import ActionChains import time import re import requests import json import sys from bs4 import BeautifulSoup class TaobaoClimber: ''' 淘宝爬虫 ''' def __init__(self, username, password): ''' 初始化 ''' self.__session = requests.Session() self.__username = username self.__password = password driver = None action = None # 是否登录 __is_logined = False # 淘宝账户 __username = "" # 登录密码 __password = "" # 登陆URL __login_url = "https://login.taobao.com/member/login.jhtml" # 卖家待发货订单URL __orders_url = "https://trade.taobao.com/trade/itemlist/list_sold_items.htm?action=itemlist/SoldQueryAction&event_submit_do_query=1&auctionStatus=PAID&tabCode=waitSend" # 卖家正出售宝贝URL __auction_url = "https://sell.taobao.com/auction/merchandise/auction_list.htm" # 卖家仓库中宝贝URL __repository_url = "https://sell.taobao.com/auction/merchandise/auction_list.htm?type=1" # 卖家确认发货URL __deliver_url = "https://wuliu.taobao.com/user/consign.htm?trade_id=" # 卖家退款URL __refunding_url = "https://trade.taobao.com/trade/itemlist/list_sold_items.htm?action=itemlist/SoldQueryAction&event_submit_do_query=1&auctionStatus=REFUNDING&tabCode=refunding" # 请求留言URL __message_url = "https://trade.taobao.com/trade/json/getMessage.htm?archive=false&biz_order_id=" # requests会话 __session = None def __login(self): # 1.登陆 try: self.driver.get(self.__login_url) except exceptions.TimeoutException: # 当页面加载时间超过设定时间,JS来停止加载 self.driver.execute_script('window.stop()') count = 0 while count < 5: # 重试5次 count += 1 if self.__login_one() is True: break if count == 5: return False # 2.保存cookies # driver.switch_to_default_content() #需要返回主页面,不然获取的cookies不是登陆后cookies list_cookies = self.driver.get_cookies() cookies = {} for s in list_cookies: cookies[s['name']] = s['value'] requests.utils.add_dict_to_cookiejar(self.__session.cookies, cookies) # 将获取的cookies设置到session return True def __login_one(self): try: # 1.点击密码登录,切换到密码登录模式 默认是二维码登录 username_login_btn = self.driver.find_element_by_xpath("//a[@class='forget-pwd J_Quick2Static']") if username_login_btn.is_displayed() is True: username_login_btn.click() except exceptions.ElementNotInteractableException: pass # 2.获取账户、密码输入框 username_input = self.driver.find_element_by_id("TPL_username_1") password_input = self.driver.find_element_by_id("TPL_password_1") # 3.为账户、密码赋值 username_input.clear() username_input.send_keys(self.__username) password_input.send_keys(self.__password) # 4.滑块判断 self.__slide_login() # 5.获取登陆按钮,并点击登录 submit_btn = self.driver.find_element_by_id("J_SubmitStatic") submit_btn.click() # 6.根据提示判断是否登录成功 try: message = self.driver.find_element_by_id("J_Message").find_element_by_class_name("error") if message.text == u"为了你的账户安全,请拖动滑块完成验证": self.driver.execute_script( "document.getElementById('J_Message').children[1].innerText='发货机器人:请滑动滑块,协助完成验证!';") return False except exceptions.NoSuchElementException: pass # 7.有时检测当前环境是否异常,此时休眠一段时间让它检测 while True: try: self.driver.find_element_by_id("J_SiteNav") break except exceptions.NoSuchElementException: time.sleep(1) return True def __slide_login(self): # 取得滑块所在div,判断是否display 一般首次登陆不需要滑块验证 slide_div = self.driver.find_element_by_id("nocaptcha") if slide_div.is_displayed() is True: self.driver.execute_script( "document.getElementById('J_Message').children[1].innerText='发货机器人:请滑动滑块,协助完成验证!';") while True: try: text = self.driver.find_element_by_id("nc_1__scale_text").text if text == '验证通过': break time.sleep(0.5) except exceptions.NoSuchElementException: # 此时处于刷新按钮状态 pass def __get_orders_page(self): # 1.bs4将资源转html html = BeautifulSoup(self.driver.page_source, "html5lib") # 2.取得所有的订单div order_div_list = html.find_all("div", {"class": "item-mod__trade-order___2LnGB trade-order-main"}) # 3.遍历每个订单div,获取数据 data_array = [] for index, order_div in enumerate(order_div_list): order_id = order_div.find("input", attrs={"name": "orderid"}).attrs["value"] order_date = order_div.find("span", attrs={"data-reactid": re.compile(r"\.0\.5\.3:.+\.0\.1\.0\.0\.0\.6")}).text order_buyer = order_div.find("a", attrs={"class": "buyer-mod__name___S9vit"}).text # 4.根据订单id组合url,请求订单对应留言 order_message = json.loads(self.__session.get(self.__message_url + order_id).text)['tip'] data_array.append((order_id, order_date, order_buyer, order_message)) return data_array def climb(self): # FIXME 没有真实订单的模拟测试,生产环境注释即可 # order_test = [("Test_1548615412315", "2018-08-07 15:00:03", "疯狂的石头", u"留言: test@qq.com http://download.csdn.net/download/lqkitten/10113904")] # return order_test # 切换回淘宝窗口 self.driver.switch_to_window(self.driver.window_handles[0]) result = [] if self.__is_logined is False: if self.__login() is False: return result else: self.__is_logined = True # 1.进入待发货订单页面 self.driver.get(self.__orders_url) while True: # 2.获取当前页面的订单信息 time.sleep(2) # 两秒等待页面加载 _orders = self.__get_orders_page() result.extend(_orders) try: # 3.获取下一页按钮 next_page_li = self.driver.find_element_by_class_name("pagination-next") # 4.判断按钮是否可点击,否则退出循环 next_page_li.get_attribute("class").index("pagination-disabled") # 到达最后一页 break except ValueError: # 跳转到下一页 print(next_page_li.find_element_by_tag_name("a").text) next_page_li.click() time.sleep(1) except exceptions.NoSuchElementException: pass return result def unshelve(self): # 切换回淘宝窗口 self.driver.switch_to_window(self.driver.window_handles[0]) if self.__is_logined is False: if self.__login() is False: return False else: self.__is_logined = True try: # 1.进入正出售宝贝页面 self.driver.get(self.__auction_url) # 2.点击下架 choose_checkbox = self.driver.find_element_by_xpath( "//*[@id='J_DataTable']/table/tbody[1]/tr[1]/td/input[1]") choose_checkbox.click() unshelve_btn = self.driver.find_element_by_xpath( "//*[@id='J_DataTable']/div[2]/table/thead/tr[2]/td/div/button[2]") unshelve_btn.click() return True except: return False def shelve(self): # 切换回淘宝窗口 try: self.driver.switch_to_window(self.driver.window_handles[0]) except exceptions: print exceptions if self.__is_logined is False: if self.__login() is False: return False else: self.__is_logined = True # 1.进入仓库宝贝页面 self.driver.get(self.__repository_url) # 2.点击上架 try: choose_checkbox = self.driver.find_element_by_xpath("//*[@id='J_DataTable']/table/tbody[1]/tr[1]/td/input") choose_checkbox.click() shelve_btn = self.driver.find_element_by_xpath( "//*[@id='J_DataTable']/div[3]/table/tbody/tr/td/div/button[2]") shelve_btn.click() except exceptions.NoSuchElementException: pass def delivered(self, orderId): # 切换回淘宝窗口 self.driver.switch_to_window(self.driver.window_handles[0]) if self.__is_logined is False: if self.__login() is False: return False else: self.__is_logined = True try: # 1.进入确认发货页面 self.driver.get(self.__deliver_url + orderId) no_need_logistics_a = self.driver.find_element_by_xpath("//*[@id='dummyTab']/a") no_need_logistics_a.click() self.driver.find_element_by_id("logis:noLogis").click() time.sleep(1) return True except: return False def exists_refunding(self): # 切换回淘宝窗口 self.driver.switch_to_window(self.driver.window_handles[0]) if self.__is_logined is False: if self.__login() is False: return False else: self.__is_logined = True try: # 1.进入退款页面 self.driver.get(self.__refunding_url) self.driver.find_element_by_class_name("item-mod__trade-order___2LnGB trade-order-main") return True except exceptions.NoSuchElementException: return False if __name__ == '__main__': # 初始化 TaobaoClimber.driver = webdriver.Firefox() # 应将浏览器驱动放于python根目录下,且python已配置path环境变量 TaobaoClimber.action = ActionChains(TaobaoClimber.driver) TaobaoClimber.driver.maximize_window() # 浏览器最大化 TaobaoClimber.driver.execute_script("window.open('')") climber = TaobaoClimber(u"test", "123456") while True: # 循环爬取订单 orders = climber.climb() for order in orders: print_msg("淘宝订单产生:订单号:%s\t订单日期:%s \t买家:%s\t备注:%s" % order) # 每30秒抓一次 time.sleep(30)