fuck12306.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146
  1. import re
  2. import ssl
  3. import urllib
  4. import requests
  5. from PIL import Image
  6. from PIL import ImageFilter
  7. if hasattr(ssl, '_create_unverified_context'):
  8. ssl._create_default_https_context = ssl._create_unverified_context
  9. UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36"
  10. pic_url = "https://kyfw.12306.cn/otn/passcodeNew/getPassCodeNew?module=login&rand=sjrand&0.21191171556711197"
  11. def get_img():
  12. ''' 获取验证码图片 '''
  13. resp = urllib.urlopen(pic_url)
  14. raw = resp.read()
  15. with open("./tmp.jpg", 'wb') as fp:
  16. fp.write(raw)
  17. return Image.open("./tmp.jpg")
  18. def get_sub_img(im, x, y):
  19. assert 0 <= x <= 3
  20. assert 0 <= y <= 2
  21. WITH = HEIGHT = 68
  22. left = 5 + (67 + 5) * x
  23. top = 41 + (67 + 5) * y
  24. right = left + 67
  25. bottom = top + 67
  26. return im.crop((left, top, right, bottom))
  27. def baidu_image_upload(im):
  28. url = "http://image.baidu.com/pictureup/uploadshitu?fr=flash&fm=index&pos=upload"
  29. im.save("./query_temp_img.png")
  30. raw = open("./query_temp_img.png", 'rb').read()
  31. files = {
  32. 'fileheight': "0",
  33. 'newfilesize': str(len(raw)),
  34. 'compresstime': "0",
  35. 'Filename': "image.png",
  36. 'filewidth': "0",
  37. 'filesize': str(len(raw)),
  38. 'filetype': 'image/png',
  39. 'Upload': "Submit Query",
  40. 'filedata': ("image.png", raw)
  41. }
  42. resp = requests.post(url, files=files, headers={'User-Agent': UA})
  43. # resp.url
  44. redirect_url = "http://image.baidu.com" + resp.text
  45. return redirect_url
  46. def baidu_stu_lookup(im):
  47. redirect_url = baidu_image_upload(im)
  48. # print redirect_url
  49. resp = requests.get(redirect_url)
  50. html = resp.text
  51. return baidu_stu_html_extract(html)
  52. def baidu_stu_html_extract(html):
  53. pattern = re.compile(r"'multitags':\s*'(.*?)'")
  54. matches = pattern.findall(html)
  55. if not matches:
  56. return '[ERROR?]'
  57. tags_str = matches[0]
  58. result = list(filter(None, tags_str.replace('\t', ' ').split()))
  59. return '|'.join(result) if result else '[UNKOWN]'
  60. def ocr_question_extract(im):
  61. # git@github.com:madmaze/pytesseract.git
  62. global pytesseract
  63. try:
  64. import pytesseract
  65. except:
  66. print
  67. "[ERROR] pytesseract not installed"
  68. return
  69. im = im.crop((127, 3, 260, 22))
  70. im = pre_ocr_processing(im)
  71. return pytesseract.image_to_string(im, lang='chi_sim').strip()
  72. def pre_ocr_processing(im):
  73. im = im.convert("RGB")
  74. width, height = im.size
  75. white = im.filter(ImageFilter.BLUR).filter(ImageFilter.MaxFilter(23))
  76. grey = im.convert('L')
  77. impix = im.load()
  78. whitepix = white.load()
  79. greypix = grey.load()
  80. for y in range(height):
  81. for x in range(width):
  82. greypix[x, y] = min(255, max(255 + impix[x, y][0] - whitepix[x, y][0],
  83. 255 + impix[x, y][1] -
  84. whitepix[x, y][1],
  85. 255 + impix[x, y][2] - whitepix[x, y][2]))
  86. new_im = grey.copy()
  87. binarize(new_im, 150)
  88. return new_im
  89. def binarize(im, thresh=120):
  90. assert 0 < thresh < 255
  91. assert im.mode == 'L'
  92. w, h = im.size
  93. for y in range(0, h):
  94. for x in range(0, w):
  95. if im.getpixel((x, y)) < thresh:
  96. im.putpixel((x, y), 0)
  97. else:
  98. im.putpixel((x, y), 255)
  99. if __name__ == '__main__':
  100. im = get_img()
  101. try:
  102. print
  103. 'OCR Question:', ocr_question_extract(im)
  104. except Exception as e:
  105. print
  106. '<OCR failed>', e
  107. for y in range(2):
  108. for x in range(4):
  109. im2 = get_sub_img(im, x, y)
  110. result = baidu_stu_lookup(im2)
  111. print(y, x), result