update_hosts.py 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. import os
  4. import sys
  5. import re
  6. import socket
  7. import getopt
  8. import threading
  9. import subprocess
  10. import shlex
  11. import time
  12. import select
  13. blackhole = (
  14. '10::2222',
  15. '101::1234',
  16. '2001::212',
  17. '2001:da8:112::21ae',
  18. '2003:ff:1:2:3:4:5fff:6',
  19. '2003:ff:1:2:3:4:5fff:7',
  20. '2003:ff:1:2:3:4:5fff:8',
  21. '2003:ff:1:2:3:4:5fff:9',
  22. '2003:ff:1:2:3:4:5fff:10',
  23. '2003:ff:1:2:3:4:5fff:11',
  24. '2003:ff:1:2:3:4:5fff:12',
  25. '21:2::2',
  26. '2123::3e12')
  27. dns = {
  28. 'google_a':'2001:4860:4860::8888',
  29. 'google_b':'2001:4860:4860::8844',
  30. 'he_net':'2001:470:20::2',
  31. 'lax_he_net':'2001:470:0:9d::2'
  32. }
  33. config = {
  34. 'dns':dns['google_b'],
  35. 'infile':'hosts',
  36. 'outfile':'hosts.out',
  37. 'querytype':'aaaa',
  38. 'threadnum':10
  39. }
  40. hosts = []
  41. done_num = 0
  42. thread_lock = threading.Lock()
  43. running = True
  44. class worker_thread(threading.Thread):
  45. def __init__(self, start_pt, end_pt):
  46. threading.Thread.__init__(self)
  47. self.start_pt = start_pt
  48. self.end_pt = end_pt
  49. def run(self):
  50. global hosts, done_num
  51. for i in range(self.start_pt, self.end_pt):
  52. if not running: break
  53. line = hosts[i].strip()
  54. with thread_lock:
  55. done_num += 1
  56. if line == '' or line[0:2] == '##':
  57. hosts[i] = line + '\r\n'
  58. continue
  59. arr = line.lstrip('#').split()
  60. if len(arr) == 1:
  61. domain = arr[0]
  62. else:
  63. domain = arr[1]
  64. flag = False
  65. if validate_domain(domain):
  66. ret = query_domain(domain, False)
  67. if ret in blackhole or ret == '':
  68. ret = query_domain(domain, True)
  69. if ret:
  70. flag = True
  71. arr[0] = ret
  72. if flag:
  73. if len(arr) == 1:
  74. arr.append(domain)
  75. else:
  76. arr[0] = '#' + arr[0]
  77. hosts[i] = ' '.join(arr)
  78. hosts[i] += '\r\n'
  79. class watcher_thread(threading.Thread):
  80. def run(self):
  81. total_num = len(hosts)
  82. wn = int(config['threadnum'])
  83. if wn > total_num:
  84. wn = total_num
  85. print "There are %d threads working..." % wn
  86. print "Press 'Enter' to exit.\n"
  87. while True:
  88. if sys.stdin in select.select([sys.stdin], [], [], 0)[0]:
  89. t = raw_input()
  90. global running
  91. with thread_lock:
  92. running = False
  93. print 'Waiting threads to exit...'
  94. break
  95. with thread_lock:
  96. dn = done_num
  97. outbuf = "Total: %d lines, Done: %d lines, Ratio: %d %%.\r"\
  98. % (total_num, dn, dn * 100 / total_num)
  99. print outbuf,
  100. sys.stdout.flush()
  101. if done_num == total_num:
  102. print outbuf
  103. break
  104. time.sleep(1)
  105. def query_domain(domain, tcp):
  106. cmd = "dig +short +time=2 -6 %s @'%s' '%s'"\
  107. % (config['querytype'], config['dns'], domain)
  108. if tcp:
  109. cmd = cmd[:3] + ' +tcp' + cmd[3:]
  110. proc = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE)
  111. out, err = proc.communicate()
  112. outarr = out.splitlines()
  113. if len(outarr) == 0:
  114. ret = ''
  115. else:
  116. if validate_ip_addr(outarr[-1]):
  117. ret = outarr[-1]
  118. else:
  119. ret = ''
  120. return ret
  121. def validate_domain(domain):
  122. pattern = '^((?!-)[*A-Za-z0-9-]{1,63}(?<!-)\\.)+[A-Za-z]{2,6}$'
  123. p = re.compile(pattern)
  124. m = p.match(domain)
  125. if m:
  126. return True
  127. else:
  128. return False
  129. def validate_ip_addr(ip_addr):
  130. if ':' in ip_addr:
  131. try:
  132. socket.inet_pton(socket.AF_INET6, ip_addr)
  133. return True
  134. except socket.error:
  135. return False
  136. else:
  137. try:
  138. socket.inet_pton(socket.AF_INET, ip_addr)
  139. return True
  140. except socket.error:
  141. return False
  142. def print_help():
  143. print('''usage: update_hosts [OPTIONS] FILE
  144. A simple multi-threading tool used to update hosts file.
  145. Options:
  146. -h, --help show this help message and exit
  147. -s DNS set another dns server, default: 2001:4860:4860::8844
  148. -o OUT_FILE ouput file, default: inputfilename.out
  149. -t QUERY_TYPE dig command query type, defalut: aaaa
  150. -n THREAD_NUM set the number of worker threads, default: 10
  151. ''')
  152. def get_config():
  153. shortopts = 'hs:o:t:n:'
  154. longopts = ['help']
  155. try:
  156. optlist, args = getopt.gnu_getopt(sys.argv[1:], shortopts, longopts)
  157. except getopt.GetoptError as e:
  158. print e, '\n'
  159. print_help()
  160. sys.exit(1)
  161. global config
  162. if len(args) != 1:
  163. print "You must specify the input hosts file (only one)."
  164. sys.exit(1)
  165. config['infile'] = args[0]
  166. config['outfile'] = args[0] + '.out'
  167. for key, value in optlist:
  168. if key == '-s':
  169. config['dns'] = value
  170. elif key == '-o':
  171. config['outfile'] = value
  172. elif key == '-t':
  173. config['querytype'] = value
  174. elif key == '-m':
  175. config['method'] = value
  176. elif key == '-n':
  177. config['threadnum'] = int(value)
  178. elif key in ('-h', '--help'):
  179. print_help()
  180. sys.exit(0)
  181. def main():
  182. get_config()
  183. global hosts
  184. try:
  185. with open(config['infile'], 'r') as infile:
  186. hosts = infile.readlines()
  187. except IOError as e:
  188. print e
  189. sys.exit(e.errno)
  190. if os.path.exists(config['outfile']):
  191. config['outfile'] += '.new'
  192. try:
  193. outfile = open(config['outfile'], 'w')
  194. except IOError as e:
  195. print e
  196. sys.exit(e.errno)
  197. print "Input: %s Output: %s\n" % (config['infile'], config['outfile'])
  198. threads = []
  199. t = watcher_thread()
  200. t.start()
  201. threads.append(t)
  202. worker_num = config['threadnum']
  203. lines_num = len(hosts)
  204. lines_per_thread = lines_num / worker_num
  205. lines_remain = lines_num % worker_num
  206. start_pt = 0
  207. for i in range(worker_num):
  208. if not running: break
  209. lines_for_thread = lines_per_thread
  210. if lines_for_thread == 0 and lines_remain == 0:
  211. break
  212. if lines_remain > 0:
  213. lines_for_thread += 1
  214. lines_remain -= 1
  215. t = worker_thread(start_pt, start_pt + lines_for_thread)
  216. start_pt += lines_for_thread
  217. t.start()
  218. threads.append(t)
  219. for t in threads:
  220. t.join()
  221. try:
  222. outfile.writelines(hosts)
  223. except IOError as e:
  224. print e
  225. sys.exit(e.errno)
  226. sys.exit(0)
  227. if __name__ == '__main__':
  228. main()