wxbot.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617
  1. #!/usr/bin/env python
  2. # coding: utf-8
  3. from collections import defaultdict
  4. import pyqrcode
  5. import requests
  6. import json
  7. import xml.dom.minidom
  8. import multiprocessing
  9. import urllib
  10. import time, re, sys, os, random
  11. UNKONWN = 'unkonwn'
  12. SUCCESS = '200'
  13. SCANED = '201'
  14. TIMEOUT = '408'
  15. def utf82gbk(string):
  16. return string.decode('utf8').encode('gbk')
  17. def make_unicode(data):
  18. if not data:
  19. return data
  20. result = None
  21. if type(data) == unicode:
  22. result = data
  23. elif type(data) == str:
  24. result = data.decode('utf-8')
  25. return result
  26. class WXBot:
  27. def __init__(self):
  28. self.DEBUG = False
  29. self.uuid = ''
  30. self.base_uri = ''
  31. self.redirect_uri= ''
  32. self.uin = ''
  33. self.sid = ''
  34. self.skey = ''
  35. self.pass_ticket = ''
  36. self.device_id = 'e' + repr(random.random())[2:17]
  37. self.base_request = {}
  38. self.sync_key_str = ''
  39. self.sync_key = []
  40. self.user = []
  41. self.member_list = []
  42. self.contact_list = [] # contact list
  43. self.public_list = [] # public account list
  44. self.group_list = [] # group chat list
  45. self.special_list = [] # special list account
  46. self.sync_host = ''
  47. self.session = requests.Session()
  48. self.session.headers.update({'User-Agent': 'Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5'})
  49. self.conf = {'qr': 'png',}
  50. def get_uuid(self):
  51. url = 'https://login.weixin.qq.com/jslogin'
  52. params = {
  53. 'appid': 'wx782c26e4c19acffb',
  54. 'fun': 'new',
  55. 'lang': 'zh_CN',
  56. '_': int(time.time())*1000 + random.randint(1,999),
  57. }
  58. r = self.session.get(url, params=params)
  59. r.encoding = 'utf-8'
  60. data = r.text
  61. regx = r'window.QRLogin.code = (\d+); window.QRLogin.uuid = "(\S+?)"'
  62. pm = re.search(regx, data)
  63. if pm:
  64. code = pm.group(1)
  65. self.uuid = pm.group(2)
  66. return code == '200'
  67. return False
  68. def gen_qr_code(self, qr_file_path):
  69. string = 'https://login.weixin.qq.com/l/' + self.uuid
  70. qr = pyqrcode.create(string)
  71. if self.conf['qr'] == 'png':
  72. qr.png(qr_file_path)
  73. elif self.conf['qr'] == 'tty':
  74. print 'Not support tty'
  75. pass
  76. #qr.print_tty()
  77. def do_request(self, url):
  78. r = self.session.get(url)
  79. r.encoding = 'utf-8'
  80. data = r.text
  81. param = re.search(r'window.code=(\d+);', data)
  82. code = param.group(1)
  83. return code, data
  84. def wait4login(self):
  85. '''
  86. http comet:
  87. tip=1, the request wait for user to scan the qr,
  88. 201: scaned
  89. 408: timeout
  90. tip=0, the request wait for user confirm,
  91. 200: confirmed
  92. '''
  93. LOGIN_TEMPLATE = 'https://login.weixin.qq.com/cgi-bin/mmwebwx-bin/login?tip=%s&uuid=%s&_=%s'
  94. tip = 1
  95. try_later_secs = 1
  96. MAX_RETRY_TIMES = 10
  97. code = UNKONWN
  98. retry_time = MAX_RETRY_TIMES
  99. while retry_time > 0:
  100. url = LOGIN_TEMPLATE % (tip, self.uuid, int(time.time()))
  101. code, data = self.do_request(url)
  102. if code == SCANED:
  103. print '[INFO] Please confirm to login .'
  104. tip = 0
  105. elif code == SUCCESS: #confirmed sucess
  106. param = re.search(r'window.redirect_uri="(\S+?)";', data)
  107. redirect_uri = param.group(1) + '&fun=new'
  108. self.redirect_uri = redirect_uri
  109. self.base_uri = redirect_uri[:redirect_uri.rfind('/')]
  110. return code
  111. elif code == TIMEOUT:
  112. print '[ERROR] WeChat login timeout. retry in %s secs later...'%(try_later_secs, )
  113. tip = 1 #need to reset tip, because the server will reset the peer connection
  114. retry_time -= 1
  115. time.sleep(try_later_secs)
  116. else:
  117. print ('[ERROR] WeChat login exception return_code=%s. retry in %s secs later...' %
  118. (code, try_later_secs))
  119. tip = 1
  120. retry_time -= 1
  121. time.sleep(try_later_secs)
  122. return code
  123. def login(self):
  124. r = self.session.get(self.redirect_uri)
  125. r.encoding = 'utf-8'
  126. data = r.text
  127. doc = xml.dom.minidom.parseString(data)
  128. root = doc.documentElement
  129. for node in root.childNodes:
  130. if node.nodeName == 'skey':
  131. self.skey = node.childNodes[0].data
  132. elif node.nodeName == 'wxsid':
  133. self.sid = node.childNodes[0].data
  134. elif node.nodeName == 'wxuin':
  135. self.uin = node.childNodes[0].data
  136. elif node.nodeName == 'pass_ticket':
  137. self.pass_ticket = node.childNodes[0].data
  138. if '' in (self.skey, self.sid, self.uin, self.pass_ticket):
  139. return False
  140. self.base_request = {
  141. 'Uin': self.uin,
  142. 'Sid': self.sid,
  143. 'Skey': self.skey,
  144. 'DeviceID': self.device_id,
  145. }
  146. return True
  147. def init(self):
  148. url = self.base_uri + '/webwxinit?r=%i&lang=en_US&pass_ticket=%s' % (int(time.time()), self.pass_ticket)
  149. params = {
  150. 'BaseRequest': self.base_request
  151. }
  152. r = self.session.post(url, data=json.dumps(params))
  153. r.encoding = 'utf-8'
  154. dic = json.loads(r.text)
  155. self.sync_key = dic['SyncKey']
  156. self.user = dic['User']
  157. self.sync_key_str = '|'.join([ str(keyVal['Key']) + '_' + str(keyVal['Val']) for keyVal in self.sync_key['List'] ])
  158. return dic['BaseResponse']['Ret'] == 0
  159. def status_notify(self):
  160. url = self.base_uri + '/webwxstatusnotify?lang=zh_CN&pass_ticket=%s' % (self.pass_ticket)
  161. self.base_request['Uin'] = int(self.base_request['Uin'])
  162. params = {
  163. 'BaseRequest': self.base_request,
  164. "Code": 3,
  165. "FromUserName": self.user['UserName'],
  166. "ToUserName": self.user['UserName'],
  167. "ClientMsgId": int(time.time())
  168. }
  169. r = self.session.post(url, data=json.dumps(params))
  170. r.encoding = 'utf-8'
  171. dic = json.loads(r.text)
  172. return dic['BaseResponse']['Ret'] == 0
  173. def get_contact(self):
  174. url = self.base_uri + '/webwxgetcontact?pass_ticket=%s&skey=%s&r=%s' % (self.pass_ticket, self.skey, int(time.time()))
  175. r = self.session.post(url, data='{}')
  176. r.encoding = 'utf-8'
  177. if self.DEBUG:
  178. with open('contacts.json', 'w') as f:
  179. f.write(r.text.encode('utf-8'))
  180. dic = json.loads(r.text)
  181. self.member_list = dic['MemberList']
  182. SpecialUsers = ['newsapp','fmessage','filehelper','weibo','qqmail','fmessage','tmessage','qmessage','qqsync','floatbottle','lbsapp','shakeapp','medianote',
  183. 'qqfriend','readerapp','blogapp','facebookapp','masssendapp','meishiapp','feedsapp','voip','blogappweixin','weixin','brandsessionholder','weixinreminder','wxid_novlwrv3lqwv11',
  184. 'gh_22b87fa7cb3c','officialaccounts','notification_messages','wxid_novlwrv3lqwv11','gh_22b87fa7cb3c','wxitil','userexperience_alarm','notification_messages']
  185. self.contact_list = []
  186. self.public_list = []
  187. self.special_list = []
  188. self.group_list = []
  189. for contact in self.member_list:
  190. if contact['VerifyFlag'] & 8 != 0: # public account
  191. self.public_list.append(contact)
  192. elif contact['UserName'] in SpecialUsers: # special account
  193. self.special_list.append(contact)
  194. elif contact['UserName'].find('@@') != -1: # group
  195. self.group_list.append(contact)
  196. elif contact['UserName'] == self.user['UserName']: # self
  197. pass
  198. else:
  199. self.contact_list.append(contact)
  200. if self.DEBUG:
  201. with open('contact_list.json', 'w') as f:
  202. f.write(json.dumps(self.contact_list))
  203. with open('special_list.json', 'w') as f:
  204. f.write(json.dumps(self.special_list))
  205. with open('group_list.json', 'w') as f:
  206. f.write(json.dumps(self.group_list))
  207. with open('public_list.json', 'w') as f:
  208. f.write(json.dumps(self.public_list))
  209. return True
  210. def batch_get_contact(self):
  211. url = self.base_uri + '/webwxbatchgetcontact?type=ex&r=%s&pass_ticket=%s' % (int(time.time()), self.pass_ticket)
  212. params = {
  213. 'BaseRequest': self.base_request,
  214. "Count": len(self.group_list),
  215. "List": [ {"UserName": g['UserName'], "EncryChatRoomId":""} for g in self.group_list ]
  216. }
  217. r = self.session.post(url, data=params)
  218. r.encoding = 'utf-8'
  219. dic = json.loads(r.text)
  220. return True
  221. def test_sync_check(self):
  222. for host in ['webpush', 'webpush2']:
  223. self.sync_host = host
  224. [retcode, selector] = self.sync_check()
  225. if retcode == '0':
  226. return True
  227. return False
  228. def sync_check(self):
  229. params = {
  230. 'r': int(time.time()),
  231. 'sid': self.sid,
  232. 'uin': self.uin,
  233. 'skey': self.skey,
  234. 'deviceid': self.device_id,
  235. 'synckey': self.sync_key_str,
  236. '_': int(time.time()),
  237. }
  238. url = 'https://' + self.sync_host + '.weixin.qq.com/cgi-bin/mmwebwx-bin/synccheck?' + urllib.urlencode(params)
  239. r = self.session.get(url)
  240. r.encoding = 'utf-8'
  241. data = r.text
  242. pm = re.search(r'window.synccheck={retcode:"(\d+)",selector:"(\d+)"}', data)
  243. retcode = pm.group(1)
  244. selector = pm.group(2)
  245. return [retcode, selector]
  246. def sync(self):
  247. url = self.base_uri + '/webwxsync?sid=%s&skey=%s&lang=en_US&pass_ticket=%s' % (self.sid, self.skey, self.pass_ticket)
  248. params = {
  249. 'BaseRequest': self.base_request,
  250. 'SyncKey': self.sync_key,
  251. 'rr': ~int(time.time())
  252. }
  253. r = self.session.post(url, data=json.dumps(params))
  254. r.encoding = 'utf-8'
  255. dic = json.loads(r.text)
  256. if dic['BaseResponse']['Ret'] == 0:
  257. self.sync_key = dic['SyncKey']
  258. self.sync_key_str = '|'.join([ str(keyVal['Key']) + '_' + str(keyVal['Val']) for keyVal in self.sync_key['List'] ])
  259. return dic
  260. def get_icon(self, id):
  261. url = self.base_uri + '/webwxgeticon?username=%s&skey=%s' % (id, self.skey)
  262. r = self.session.get(url)
  263. data = r.content
  264. fn = 'img_'+id+'.jpg'
  265. with open(fn, 'wb') as f:
  266. f.write(data)
  267. return fn
  268. def get_head_img(self, id):
  269. url = self.base_uri + '/webwxgetheadimg?username=%s&skey=%s' % (id, self.skey)
  270. r = self.session.get(url)
  271. data = r.content
  272. fn = 'img_'+id+'.jpg'
  273. with open(fn, 'wb') as f:
  274. f.write(data)
  275. return fn
  276. def get_msg_img_url(self, msgid):
  277. return self.base_uri + '/webwxgetmsgimg?MsgID=%s&skey=%s' % (msgid, self.skey)
  278. def get_msg_img(self, msgid):
  279. url = self.base_uri + '/webwxgetmsgimg?MsgID=%s&skey=%s' % (msgid, self.skey)
  280. r = self.session.get(url)
  281. data = r.content
  282. fn = 'img_'+msgid+'.jpg'
  283. with open(fn, 'wb') as f:
  284. f.write(data)
  285. return fn
  286. def get_voice_url(self, msgid):
  287. return self.base_uri + '/webwxgetvoice?msgid=%s&skey=%s' % (msgid, self.skey)
  288. def get_voice(self, msgid):
  289. url = self.base_uri + '/webwxgetvoice?msgid=%s&skey=%s' % (msgid, self.skey)
  290. r = self.session.get(url)
  291. data = r.content
  292. fn = 'voice_'+msgid+'.mp3'
  293. with open(fn, 'wb') as f:
  294. f.write(data)
  295. return fn
  296. #Get the NickName or RemarkName of an user by user id
  297. def get_user_remark_name(self, uid):
  298. name = 'unknown group' if uid[:2] == '@@' else 'stranger'
  299. for member in self.member_list:
  300. if member['UserName'] == uid:
  301. name = member['RemarkName'] if member['RemarkName'] else member['NickName']
  302. return name
  303. #Get user id of an user
  304. def get_user_id(self, name):
  305. for member in self.member_list:
  306. if name == member['RemarkName'] or name == member['NickName'] or name == member['UserName']:
  307. return member['UserName']
  308. return None
  309. def get_user_type(self, wx_user_id):
  310. for account in self.contact_list:
  311. if wx_user_id == account['UserName']:
  312. return 'contact'
  313. for account in self.public_list:
  314. if wx_user_id == account['UserName']:
  315. return 'public'
  316. for account in self.special_list:
  317. if wx_user_id == account['UserName']:
  318. return 'special'
  319. for account in self.group_list:
  320. if wx_user_id == account['UserName']:
  321. return 'group'
  322. return 'unknown'
  323. '''
  324. msg:
  325. user_type
  326. msg_id
  327. msg_type_id
  328. user_id
  329. user_name
  330. content
  331. '''
  332. def handle_msg_all(self, msg):
  333. pass
  334. '''
  335. msg_type_id:
  336. 1 -> Location
  337. 2 -> FileHelper
  338. 3 -> Self
  339. 4 -> Group
  340. 5 -> User Text Message
  341. 6 -> Image
  342. 7 -> Voice
  343. 8 -> Recommend
  344. 9 -> Animation
  345. 10 -> Share
  346. 11 -> Video
  347. 12 -> Video Call
  348. 13 -> Redraw
  349. 14 -> Init Message
  350. 99 -> Unknown
  351. '''
  352. def handle_msg(self, r):
  353. for msg in r['AddMsgList']:
  354. mtype = msg['MsgType']
  355. wx_user_id = msg['FromUserName']
  356. user_type = self.get_user_type(wx_user_id)
  357. name = self.get_user_remark_name(wx_user_id)
  358. content = msg['Content'].replace('&lt;','<').replace('&gt;','>')
  359. msg_id = msg['MsgId']
  360. msg_type_id = 99
  361. if mtype == 51: #init message
  362. msg_type_id = 14
  363. elif mtype == 1:
  364. if content.find('http://weixin.qq.com/cgi-bin/redirectforward?args=') != -1:
  365. r = self.session.get(content)
  366. r.encoding = 'gbk'
  367. data = r.text
  368. pos = self.search_content('title', data, 'xml')
  369. msg_type_id = 1
  370. content = {'location': pos, 'xml': data}
  371. if self.DEBUG:
  372. print '[Location] %s : I am at %s ' % (name, pos)
  373. elif msg['ToUserName'] == 'filehelper':
  374. msg_type_id = 2
  375. content = content.replace('<br/>','\n')
  376. if self.DEBUG:
  377. print '[File] %s : %s' % (name, content)
  378. elif msg['FromUserName'] == self.user['UserName']: #self
  379. msg_type_id = 3
  380. elif msg['FromUserName'][:2] == '@@':
  381. [people, content] = content.split(':<br/>')
  382. group = self.get_user_remark_name(msg['FromUserName'])
  383. name = self.get_user_remark_name(people)
  384. msg_type_id = 4
  385. content = {'group_id': msg['FromUserName'], 'group_name': group, 'user': people, 'user_name': name, 'msg': content}
  386. if self.DEBUG:
  387. print '[Group] |%s| %s: %s' % (group, name, content.replace('<br/>','\n'))
  388. else:
  389. msg_type_id = 5
  390. if self.DEBUG:
  391. print '[Text] ', name, ' : ', content
  392. elif mtype == 3:
  393. msg_type_id = 6
  394. content = self.get_msg_img_url(msg_id)
  395. if self.DEBUG:
  396. image = self.get_msg_img(msg_id)
  397. print '[Image] %s : %s' % (name, image)
  398. elif mtype == 34:
  399. msg_type_id = 7
  400. content = self.get_voice_url(msg_id)
  401. if self.DEBUG:
  402. voice = self.get_voice(msg_id)
  403. print '[Voice] %s : %s' % (name, voice)
  404. elif mtype == 42:
  405. msg_type_id = 8
  406. info = msg['RecommendInfo']
  407. content = {}
  408. content['nickname'] = info['NickName']
  409. content['alias'] = info['Alias']
  410. content['province'] = info['Province']
  411. content['city'] = info['City']
  412. content['gender'] = ['unknown', 'male', 'female'][info['Sex']]
  413. if self.DEBUG:
  414. print '[Recommend] %s : ' % name
  415. print '========================='
  416. print '= NickName: %s' % info['NickName']
  417. print '= Alias: %s' % info['Alias']
  418. print '= Local: %s %s' % (info['Province'], info['City'])
  419. print '= Gender: %s' % ['unknown', 'male', 'female'][info['Sex']]
  420. print '========================='
  421. elif mtype == 47:
  422. msg_type_id = 9
  423. url = self.search_content('cdnurl', content)
  424. content = url
  425. if self.DEBUG:
  426. print '[Animation] %s : %s' % (name, url)
  427. elif mtype == 49:
  428. msg_type_id = 10
  429. appMsgType = defaultdict(lambda : "")
  430. appMsgType.update({5:'link', 3:'music', 7:'weibo'})
  431. content = {'type': appMsgType[msg['AppMsgType']], 'title': msg['FileName'], 'desc': self.search_content('des', content, 'xml'), 'url': msg['Url'], 'from': self.search_content('appname', content, 'xml')}
  432. if self.DEBUG:
  433. print '[Share] %s : %s' % (name, appMsgType[msg['AppMsgType']])
  434. print '========================='
  435. print '= title: %s' % msg['FileName']
  436. print '= desc: %s' % self.search_content('des', content, 'xml')
  437. print '= link: %s' % msg['Url']
  438. print '= from: %s' % self.search_content('appname', content, 'xml')
  439. print '========================='
  440. elif mtype == 62:
  441. msg_type_id = 11
  442. if self.DEBUG:
  443. print '[Video] ', name, ' sent you a video, please check on mobiles'
  444. elif mtype == 53:
  445. msg_type_id = 12
  446. if self.DEBUG:
  447. print '[Video Call] ', name, ' call you'
  448. elif mtype == 10002:
  449. msg_type_id = 13
  450. if self.DEBUG:
  451. print '[Redraw] ', name, ' redraw back a message'
  452. else:
  453. msg_type_id = 99
  454. if self.DEBUG:
  455. print '[Unknown] : %s' % str(mtype)
  456. print msg
  457. message = {'user_type': user_type, 'msg_id':msg_id, 'msg_type_id': msg_type_id, 'content': content, 'user_id': msg['FromUserName'], 'user_name': name}
  458. self.handle_msg_all(message)
  459. def schedule(self):
  460. pass
  461. def proc_msg(self):
  462. self.test_sync_check()
  463. while True:
  464. [retcode, selector] = self.sync_check()
  465. if retcode == '1100': # User have login on mobile
  466. pass
  467. elif retcode == '0':
  468. if selector == '2':
  469. r = self.sync()
  470. if r is not None:
  471. self.handle_msg(r)
  472. elif selector == '7': # Play WeChat on mobile
  473. r = self.sync()
  474. if r is not None:
  475. self.handle_msg(r)
  476. elif selector == '0':
  477. time.sleep(1)
  478. self.schedule()
  479. def send_msg_by_uid(self, word, dst = 'filehelper'):
  480. url = self.base_uri + '/webwxsendmsg?pass_ticket=%s' % (self.pass_ticket)
  481. msg_id = str(int(time.time()*1000)) + str(random.random())[:5].replace('.','')
  482. params = {
  483. 'BaseRequest': self.base_request,
  484. 'Msg': {
  485. "Type": 1,
  486. "Content": make_unicode(word),
  487. "FromUserName": self.user['UserName'],
  488. "ToUserName": dst,
  489. "LocalID": msg_id,
  490. "ClientMsgId": msg_id
  491. }
  492. }
  493. headers = {'content-type': 'application/json; charset=UTF-8'}
  494. data = json.dumps(params, ensure_ascii=False).encode('utf8')
  495. r = self.session.post(url, data = data, headers = headers)
  496. dic = r.json()
  497. return dic['BaseResponse']['Ret'] == 0
  498. def send_msg(self, name, word, isfile = False):
  499. uid = self.get_user_id(name)
  500. if uid:
  501. if isfile:
  502. with open(word, 'r') as f:
  503. result = True
  504. for line in f.readlines():
  505. line = line.replace('\n','')
  506. print '-> '+name+': '+line
  507. if self.send_msg_by_uid(line, uid):
  508. pass
  509. else:
  510. result = False
  511. time.sleep(1)
  512. return result
  513. else:
  514. if self.send_msg_by_uid(word, uid):
  515. return True
  516. else:
  517. return False
  518. else:
  519. if self.DEBUG:
  520. print '[ERROR] This user does not exist .'
  521. return True
  522. def search_content(self, key, content, fmat = 'attr'):
  523. if fmat == 'attr':
  524. pm = re.search(key+'\s?=\s?"([^"<]+)"', content)
  525. if pm: return pm.group(1)
  526. elif fmat == 'xml':
  527. pm=re.search('<{0}>([^<]+)</{0}>'.format(key),content)
  528. if pm: return pm.group(1)
  529. return 'unknown'
  530. def run(self):
  531. self.get_uuid()
  532. self.gen_qr_code('qr.png')
  533. print '[INFO] Please use WeCaht to scan the QR code .'
  534. result = self.wait4login()
  535. if result != SUCCESS:
  536. print '[ERROR] Web WeChat login failed. failed code=%s'%(result, )
  537. return
  538. if self.login():
  539. print '[INFO] Web WeChat login succeed .'
  540. else:
  541. print '[ERROR] Web WeChat login failed.'
  542. return
  543. if self.init():
  544. print '[INFO] Web WeChat init succeed .'
  545. else:
  546. print '[INFO] Web WeChat init failed'
  547. return
  548. self.status_notify()
  549. self.get_contact()
  550. print '[INFO] Get %d contacts' % len(self.contact_list)
  551. print '[INFO] Start to process messages .'
  552. self.proc_msg()