update_hosts.py 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. import os
  4. import sys
  5. import re
  6. import socket
  7. import getopt
  8. import threading
  9. import subprocess
  10. import shlex
  11. import time
  12. import select
  13. blackhole = (
  14. '10::2222',
  15. '101::1234',
  16. '2001::212',
  17. '2001:da8:112::21ae',
  18. '2003:ff:1:2:3:4:5fff:6',
  19. '2003:ff:1:2:3:4:5fff:7',
  20. '2003:ff:1:2:3:4:5fff:8',
  21. '2003:ff:1:2:3:4:5fff:9',
  22. '2003:ff:1:2:3:4:5fff:10',
  23. '2003:ff:1:2:3:4:5fff:11',
  24. '2003:ff:1:2:3:4:5fff:12',
  25. '21:2::2',
  26. '2123::3e12')
  27. dns = {
  28. 'google_a':'2001:4860:4860::8888',
  29. 'google_b':'2001:4860:4860::8844',
  30. 'he_net':'2001:470:20::2',
  31. 'lax_he_net':'2001:470:0:9d::2'
  32. }
  33. config = {
  34. 'dns':dns['google_b'],
  35. 'infile':'',
  36. 'outfile':'',
  37. 'querytype':'aaaa',
  38. 'threadnum':10
  39. }
  40. hosts = []
  41. done_num = 0
  42. thread_lock = threading.Lock()
  43. running = True
  44. class worker_thread(threading.Thread):
  45. def __init__(self, start_pt, end_pt):
  46. threading.Thread.__init__(self)
  47. self.start_pt = start_pt
  48. self.end_pt = end_pt
  49. def run(self):
  50. global hosts, done_num
  51. for i in range(self.start_pt, self.end_pt):
  52. if not running: break
  53. line = hosts[i].strip()
  54. with thread_lock:
  55. done_num += 1
  56. if line == '' or line[0:2] == '##':
  57. hosts[i] = line + '\r\n'
  58. continue
  59. arr = line.lstrip('#').split()
  60. if len(arr) == 1:
  61. domain = arr[0]
  62. else:
  63. domain = arr[1]
  64. flag = False
  65. if validate_domain(domain):
  66. ret = query_domain(domain, False)
  67. if ret in blackhole or ret == '':
  68. ret = query_domain(domain, True)
  69. if ret:
  70. flag = True
  71. arr[0] = ret
  72. if flag:
  73. if len(arr) == 1:
  74. arr.append(domain)
  75. else:
  76. arr[0] = '#' + arr[0]
  77. hosts[i] = ' '.join(arr)
  78. hosts[i] += '\r\n'
  79. class watcher_thread(threading.Thread):
  80. def run(self):
  81. total_num = len(hosts)
  82. wn = int(config['threadnum'])
  83. if wn > total_num:
  84. wn = total_num
  85. print "There are %d threads working..." % wn
  86. print "Press 'Enter' to exit.\n"
  87. while True:
  88. if sys.stdin in select.select([sys.stdin], [], [], 0)[0]:
  89. t = raw_input()
  90. global running
  91. with thread_lock:
  92. running = False
  93. print 'Waiting threads to exit...'
  94. break
  95. with thread_lock:
  96. dn = done_num
  97. outbuf = "Total: %d lines, Done: %d lines, Ratio: %d %%.\r"\
  98. % (total_num, dn, dn * 100 / total_num)
  99. print outbuf,
  100. sys.stdout.flush()
  101. if done_num == total_num:
  102. print outbuf
  103. break
  104. time.sleep(1)
  105. def query_domain(domain, tcp):
  106. cmd = "dig +short +time=2 -6 %s @'%s' '%s'"\
  107. % (config['querytype'], config['dns'], domain)
  108. if tcp:
  109. cmd = cmd[:3] + ' +tcp' + cmd[3:]
  110. proc = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE)
  111. out, err = proc.communicate()
  112. outarr = out.splitlines()
  113. if len(outarr) == 0:
  114. ret = ''
  115. else:
  116. if validate_ip_addr(outarr[-1]):
  117. ret = outarr[-1]
  118. else:
  119. ret = ''
  120. return ret
  121. def validate_domain(domain):
  122. pattern = '^((?!-)[*A-Za-z0-9-]{1,63}(?<!-)\\.)+[A-Za-z]{2,6}$'
  123. p = re.compile(pattern)
  124. m = p.match(domain)
  125. if m:
  126. return True
  127. else:
  128. return False
  129. def validate_ip_addr(ip_addr):
  130. if ':' in ip_addr:
  131. try:
  132. socket.inet_pton(socket.AF_INET6, ip_addr)
  133. return True
  134. except socket.error:
  135. return False
  136. else:
  137. try:
  138. socket.inet_pton(socket.AF_INET, ip_addr)
  139. return True
  140. except socket.error:
  141. return False
  142. def print_help():
  143. print('''usage: update_hosts [OPTIONS] FILE
  144. A simple multi-threading tool used to update hosts file.
  145. Options:
  146. -h, --help show this help message and exit
  147. -s DNS set another dns server, default: 2001:4860:4860::8844
  148. -o OUT_FILE ouput file, default: inputfilename.out
  149. -t QUERY_TYPE dig command query type, defalut: aaaa
  150. -n THREAD_NUM set the number of worker threads, default: 10
  151. ''')
  152. def get_config():
  153. shortopts = 'hs:o:t:n:'
  154. longopts = ['help']
  155. try:
  156. optlist, args = getopt.gnu_getopt(sys.argv[1:], shortopts, longopts)
  157. except getopt.GetoptError as e:
  158. print e, '\n'
  159. print_help()
  160. sys.exit(1)
  161. global config
  162. for key, value in optlist:
  163. if key == '-s':
  164. config['dns'] = value
  165. elif key == '-o':
  166. config['outfile'] = value
  167. elif key == '-t':
  168. config['querytype'] = value
  169. elif key == '-m':
  170. config['method'] = value
  171. elif key == '-n':
  172. config['threadnum'] = int(value)
  173. elif key in ('-h', '--help'):
  174. print_help()
  175. sys.exit(0)
  176. if len(args) != 1:
  177. print "You must specify the input hosts file (only one)."
  178. sys.exit(1)
  179. config['infile'] = args[0]
  180. if config['outfile'] == '':
  181. config['outfile'] = config['infile'] + '.out'
  182. def main():
  183. get_config()
  184. global hosts
  185. try:
  186. with open(config['infile'], 'r') as infile:
  187. hosts = infile.readlines()
  188. except IOError as e:
  189. print e
  190. sys.exit(e.errno)
  191. if os.path.exists(config['outfile']):
  192. config['outfile'] += '.new'
  193. try:
  194. outfile = open(config['outfile'], 'w')
  195. except IOError as e:
  196. print e
  197. sys.exit(e.errno)
  198. print "Input: %s Output: %s\n" % (config['infile'], config['outfile'])
  199. threads = []
  200. t = watcher_thread()
  201. t.start()
  202. threads.append(t)
  203. worker_num = config['threadnum']
  204. lines_num = len(hosts)
  205. lines_per_thread = lines_num / worker_num
  206. lines_remain = lines_num % worker_num
  207. start_pt = 0
  208. for i in range(worker_num):
  209. if not running: break
  210. lines_for_thread = lines_per_thread
  211. if lines_for_thread == 0 and lines_remain == 0:
  212. break
  213. if lines_remain > 0:
  214. lines_for_thread += 1
  215. lines_remain -= 1
  216. t = worker_thread(start_pt, start_pt + lines_for_thread)
  217. start_pt += lines_for_thread
  218. t.start()
  219. threads.append(t)
  220. for t in threads:
  221. t.join()
  222. try:
  223. outfile.writelines(hosts)
  224. except IOError as e:
  225. print e
  226. sys.exit(e.errno)
  227. sys.exit(0)
  228. if __name__ == '__main__':
  229. main()