spider.py 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343
  1. import os
  2. import requests
  3. import time
  4. import re
  5. import json
  6. import schedule
  7. import tempfile
  8. from selenium.common.exceptions import NoSuchElementException, ElementNotInteractableException
  9. from selenium.webdriver import DesiredCapabilities, Chrome, ChromeOptions
  10. from datetime import datetime
  11. from random import randrange, choice
  12. from selenium.webdriver.common import utils
  13. # verification code Identification settings
  14. code_url = 'http://apigateway.jianjiaoshuju.com/api/v_1/yzmCustomized.html'
  15. code_headers = {
  16. 'appCode': 'X',
  17. 'appKey': 'X',
  18. 'appSecret': 'X'
  19. }
  20. headers = {
  21. 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
  22. 'accept-encoding': 'deflate',
  23. 'accept-language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7',
  24. 'cache-control': 'max-age=0',
  25. 'dnt': '1',
  26. 'sec-fetch-dest': 'document',
  27. 'sec-fetch-mode': 'navigate',
  28. 'sec-fetch-site': 'none',
  29. 'sec-fetch-user': '?1',
  30. 'upgrade-insecure-requests': '1',
  31. 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36',
  32. }
  33. url = 'https://signup.live.com/signup'
  34. check_timeout = 0
  35. sign_list = ['~', '!', '@', '#', '$', '%', '^', '&', '*',
  36. '(', ')', '_+', '<', '>', '?', ':', '"', '{', '}', '|']
  37. name_list = [
  38. 'Emma',
  39. 'Olivia',
  40. 'Ava',
  41. 'Isabella',
  42. 'Sophia',
  43. 'Charlotte',
  44. 'Mia',
  45. 'Amelia',
  46. 'Harper',
  47. 'Evelyn',
  48. 'Abigail',
  49. 'Emily',
  50. 'Elizabeth',
  51. 'Mila',
  52. 'Ella',
  53. 'Avery',
  54. 'Sofia',
  55. 'Camila',
  56. 'Aria',
  57. 'Scarlett',
  58. 'Victoria',
  59. 'Madison',
  60. 'Luna',
  61. 'Grace',
  62. 'Chloe',
  63. 'Penelope',
  64. 'Layla',
  65. 'Riley',
  66. 'Zoey',
  67. 'Nora',
  68. 'Lily',
  69. 'Eleanor',
  70. 'Hannah',
  71. 'Lillian',
  72. 'Addison',
  73. 'Aubrey',
  74. 'Ellie',
  75. 'Stella',
  76. 'Natalie',
  77. 'Zoe',
  78. 'Leah',
  79. 'Hazel',
  80. 'Violet',
  81. 'Aurora',
  82. 'Savannah',
  83. 'Audrey',
  84. 'Brooklyn',
  85. 'Bella',
  86. 'Claire',
  87. 'Skylar'
  88. ]
  89. last_image_data = ''
  90. last_code = ''
  91. def find_element_by_css_selector(driver, css_selector):
  92. try:
  93. time.sleep(0.1)
  94. return driver.find_element_by_css_selector(css_selector)
  95. except Exception:
  96. time.sleep(check_timeout)
  97. return find_element_by_css_selector(driver, css_selector)
  98. def find_elements_by_css_selector(driver, css_selector):
  99. try:
  100. time.sleep(0.1)
  101. return driver.find_elements_by_css_selector(css_selector)
  102. except Exception:
  103. time.sleep(check_timeout)
  104. return find_elements_by_css_selector(driver, css_selector)
  105. def find_element_by_link_text(driver, link_text):
  106. try:
  107. time.sleep(0.1)
  108. return driver.find_element_by_link_text(link_text)
  109. except Exception:
  110. time.sleep(check_timeout)
  111. return find_element_by_link_text(driver, link_text)
  112. def find_elements_by_tag_name(driver, tag_name, target_number=None, try_times=None):
  113. time.sleep(0.1)
  114. if try_times <= 0:
  115. raise Exception('Can not find element.')
  116. if try_times is not None:
  117. try_times -= 1
  118. try:
  119. l = driver.find_elements_by_tag_name(tag_name)
  120. if target_number is None:
  121. return l
  122. if len(l) == target_number:
  123. return l
  124. else:
  125. time.sleep(check_timeout)
  126. return find_elements_by_tag_name(driver, tag_name, target_number, try_times)
  127. except Exception:
  128. time.sleep(check_timeout)
  129. return find_elements_by_tag_name(driver, tag_name, target_number, try_times)
  130. def find_element_by_tag_name(driver, tag_name):
  131. try:
  132. time.sleep(0.1)
  133. return driver.find_element_by_tag_name(tag_name)
  134. except Exception:
  135. time.sleep(check_timeout)
  136. return find_element_by_tag_name(driver, tag_name)
  137. def get_code(img_data):
  138. global last_image_data
  139. global last_code
  140. if last_image_data == img_data:
  141. return last_code
  142. last_image_data = img_data
  143. data = {
  144. 'v_pic': img_data,
  145. 'pri_id': 'ne',
  146. }
  147. response = requests.post(code_url, headers=code_headers, data=data)
  148. code = json.loads(response.text)['v_code']
  149. last_code = code
  150. print('code is {}'.format(code))
  151. return code
  152. def register_email(driver, email, password):
  153. driver.get(url)
  154. time.sleep(check_timeout*2)
  155. sleep_timess = 0
  156. while True:
  157. try:
  158. driver.find_element_by_css_selector('#liveSwitch').click()
  159. break
  160. except Exception:
  161. time.sleep(check_timeout)
  162. sleep_timess += 1
  163. if sleep_timess > 10:
  164. return False
  165. while True:
  166. try:
  167. find_element_by_css_selector(driver, '#MemberName').clear()
  168. find_element_by_css_selector(
  169. driver, '#MemberName').send_keys(email)
  170. find_element_by_css_selector(driver, '#iSignupAction').click()
  171. break
  172. except Exception as e:
  173. time.sleep(check_timeout)
  174. sleep_timess = 0
  175. while driver.title != 'Create a password' and driver.title != '创建密码':
  176. time.sleep(check_timeout)
  177. sleep_timess += 1
  178. if sleep_timess > 15:
  179. return 'exist'
  180. find_element_by_css_selector(
  181. driver, '#PasswordInput').send_keys(password)
  182. find_element_by_css_selector(driver, '#iOptinEmail').click()
  183. find_element_by_css_selector(driver, '#iSignupAction').click()
  184. time.sleep(check_timeout)
  185. find_element_by_css_selector(
  186. driver, '#LastName').send_keys(choice(name_list))
  187. find_element_by_css_selector(
  188. driver, '#FirstName').send_keys(choice(name_list))
  189. find_element_by_css_selector(driver, '#iSignupAction').click()
  190. time.sleep(check_timeout)
  191. find_element_by_css_selector(
  192. driver, '#BirthYear option:nth-child({})'.format(randrange(2, 25))).click()
  193. find_element_by_css_selector(
  194. driver, '#BirthMonth option:nth-child({})'.format(randrange(2, 11))).click()
  195. find_element_by_css_selector(
  196. driver, '#BirthDay option:nth-child({})'.format(randrange(2, 22))).click()
  197. find_element_by_css_selector(driver, '#iSignupAction').click()
  198. time.sleep(check_timeout)
  199. t_url = driver.current_url
  200. try:
  201. while True:
  202. code_element = find_elements_by_tag_name(driver, 'input', 5, 2)[0]
  203. code_element.clear()
  204. code_element.send_keys(
  205. get_code(
  206. find_elements_by_tag_name(
  207. driver, 'img', 5, 2
  208. )[-1].screenshot_as_base64
  209. )
  210. )
  211. find_element_by_css_selector(driver, '#iSignupAction').click()
  212. time.sleep(check_timeout)
  213. try_times = 0
  214. while t_url == driver.current_url:
  215. time.sleep(check_timeout)
  216. try:
  217. driver.find_element_by_css_selector(
  218. '#iSignupAction').click()
  219. except Exception:
  220. pass
  221. try_times += 1
  222. if try_times > 8:
  223. break
  224. if t_url != driver.current_url:
  225. return True
  226. except Exception as e:
  227. print(e)
  228. return False
  229. def get_email_password():
  230. def _get_random_sign():
  231. return choice(sign_list)
  232. def _get_random_char(is_low=None):
  233. if is_low is None:
  234. r = randrange(0, 2)
  235. elif is_low is True:
  236. r = 0
  237. else:
  238. r = 1
  239. if r == 0:
  240. return chr(randrange(65, 91))
  241. else:
  242. return chr(randrange(97, 123))
  243. email = str(randrange(0, 10)).join(_get_random_char() for i in range(5))
  244. password = (
  245. str(randrange(0, 100)) + _get_random_sign()
  246. ).join(
  247. _get_random_char(True) + _get_random_char() + _get_random_char(False) for i in range(3)
  248. )
  249. return email, password
  250. def start_register(driver):
  251. email, password = get_email_password()
  252. result = register_email(driver, email, password)
  253. while result == 'exist':
  254. print('Already Register Account {}@outlook.com'.format(email))
  255. email, password = get_email_password()
  256. result = register_email(driver, email, password)
  257. if result:
  258. sleep_times = 0
  259. while 'account.microsoft.com' not in driver.current_url:
  260. sleep_times += 1
  261. time.sleep(check_timeout)
  262. if sleep_times >= 20:
  263. break
  264. if sleep_times >= 10:
  265. print('waiting time too long')
  266. else:
  267. with open('pass.txt', 'a+') as f:
  268. f.write(email + '@outlook.com ' + password + '\n')
  269. print('Success Register Account {}@outlook.com'.format(email))
  270. return True
  271. else:
  272. print('Fail')
  273. return False
  274. def create_driver(tp, ip_port):
  275. port = utils.free_port()
  276. options = ChromeOptions()
  277. desired_capabilities = DesiredCapabilities().CHROME
  278. desired_capabilities['pageLoadStrategy'] = 'none'
  279. os.popen(
  280. 'chrome.exe --remote-debugging-port={} --user-data-dir={} --proxy-server={}={}'.format(
  281. port, tempfile.mkdtemp(), tp, ip_port)
  282. )
  283. options.add_experimental_option(
  284. 'debuggerAddress', '127.0.0.1:{}'.format(port))
  285. driver = Chrome(
  286. options=options, desired_capabilities=desired_capabilities)
  287. driver.set_window_position(0, 0)
  288. driver.set_window_size(700, 600)
  289. return driver
  290. def run_driver():
  291. consecutive_fail_number = 0
  292. try:
  293. for tp, ip_port in get_ip_list():
  294. if datetime.now().minute == 10 or consecutive_fail_number > 10:
  295. break
  296. print('Proxy: {}://{}'.format(tp, ip_port))
  297. driver = create_driver(tp, ip_port)
  298. register_success = start_register(driver)
  299. driver.close()
  300. if register_success:
  301. consecutive_fail_number = 0
  302. if not register_success:
  303. consecutive_fail_number += 1
  304. except Exception as e:
  305. print(e)
  306. return schedule.every().minute.at(':10').do(run_driver)
  307. if __name__ == '__main__':
  308. run_driver()