update_hosts.py 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. import os
  4. import sys
  5. import re
  6. import socket
  7. import getopt
  8. import threading
  9. import subprocess
  10. import shlex
  11. import time
  12. import select
  13. blackhole = (
  14. '10::2222',
  15. '101::1234',
  16. '2001::212',
  17. '2001:da8:112::21ae',
  18. '2003:ff:1:2:3:4:5fff:6',
  19. '2003:ff:1:2:3:4:5fff:7',
  20. '2003:ff:1:2:3:4:5fff:8',
  21. '2003:ff:1:2:3:4:5fff:9',
  22. '2003:ff:1:2:3:4:5fff:10',
  23. '2003:ff:1:2:3:4:5fff:11',
  24. '2003:ff:1:2:3:4:5fff:12',
  25. '21:2::2',
  26. '2123::3e12'
  27. )
  28. dns = {
  29. 'google_a': '2001:4860:4860::8888',
  30. 'google_b': '2001:4860:4860::8844',
  31. 'he_net': '2001:470:20::2',
  32. 'lax_he_net': '2001:470:0:9d::2'
  33. }
  34. config = {
  35. 'dns': dns['google_b'],
  36. 'infile': '',
  37. 'outfile': '',
  38. 'querytype': 'aaaa',
  39. 'cname': False,
  40. 'threadnum': 10
  41. }
  42. hosts = []
  43. done_num = 0
  44. thread_lock = threading.Lock()
  45. running = True
  46. class worker_thread(threading.Thread):
  47. def __init__(self, start_pt, end_pt):
  48. threading.Thread.__init__(self)
  49. self.start_pt = start_pt
  50. self.end_pt = end_pt
  51. def run(self):
  52. global hosts, done_num
  53. for i in range(self.start_pt, self.end_pt):
  54. if not running: break
  55. line = hosts[i].strip()
  56. if line == '' or line[0:2] == '##':
  57. hosts[i] = line + '\r\n'
  58. with thread_lock: done_num += 1
  59. continue
  60. # uncomment line
  61. line = line.lstrip('#')
  62. # split comment that appended to line
  63. comment = ''
  64. p = line.find('#')
  65. if p > 0:
  66. comment = line[p:]
  67. line = line[:p]
  68. arr = line.split()
  69. if len(arr) == 1:
  70. domain = arr[0]
  71. else:
  72. domain = arr[1]
  73. flag = False
  74. if validate_domain(domain):
  75. cname, ip = query_domain(domain, False)
  76. if ip == '' or ip in blackhole:
  77. cname, ip = query_domain(domain, True)
  78. if ip:
  79. flag = True
  80. arr[0] = ip
  81. if len(arr) == 1:
  82. arr.append(domain)
  83. if config['cname'] and cname:
  84. arr.append('#' + cname)
  85. else:
  86. if comment:
  87. arr.append(comment)
  88. if not flag:
  89. arr[0] = '#' + arr[0]
  90. if comment:
  91. arr.append(comment)
  92. hosts[i] = ' '.join(arr)
  93. hosts[i] += '\r\n'
  94. with thread_lock: done_num += 1
  95. class watcher_thread(threading.Thread):
  96. def run(self):
  97. total_num = len(hosts)
  98. wn = int(config['threadnum'])
  99. if wn > total_num:
  100. wn = total_num
  101. print "There are %d threads working..." % wn
  102. print "Press 'Enter' to exit.\n"
  103. while True:
  104. if sys.stdin in select.select([sys.stdin], [], [], 0)[0]:
  105. raw_input()
  106. print 'Waiting threads to exit...'
  107. global running
  108. with thread_lock:
  109. running = False
  110. break
  111. dn = done_num
  112. outbuf = "Total: %d lines, Done: %d lines, Ratio: %d %%.\r"\
  113. % (total_num, dn, dn * 100 / total_num)
  114. print outbuf,
  115. sys.stdout.flush()
  116. if dn == total_num:
  117. print outbuf
  118. break
  119. time.sleep(1)
  120. def query_domain(domain, tcp):
  121. cmd = "dig +short +time=2 -6 %s @'%s' '%s'"\
  122. % (config['querytype'], config['dns'], domain)
  123. if tcp:
  124. cmd = cmd + ' +tcp'
  125. proc = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE)
  126. out, _ = proc.communicate()
  127. outarr = out.splitlines()
  128. cname = ip = ''
  129. for v in outarr:
  130. if cname == '' and validate_domain(v[:-1]):
  131. cname = v[:-1]
  132. if ip == '' and validate_ip_addr(v):
  133. ip = v
  134. break
  135. return (cname, ip)
  136. def validate_domain(domain):
  137. pattern = '^((?!-)[*A-Za-z0-9-]{1,63}(?<!-)\\.)+[A-Za-z]{2,6}$'
  138. p = re.compile(pattern)
  139. m = p.match(domain)
  140. if m:
  141. return True
  142. else:
  143. return False
  144. def validate_ip_addr(ip_addr):
  145. if ':' in ip_addr:
  146. try:
  147. socket.inet_pton(socket.AF_INET6, ip_addr)
  148. return True
  149. except socket.error:
  150. return False
  151. else:
  152. try:
  153. socket.inet_pton(socket.AF_INET, ip_addr)
  154. return True
  155. except socket.error:
  156. return False
  157. def print_help():
  158. print '''usage: update_hosts [OPTIONS] FILE
  159. A simple multi-threading tool used to update hosts file.
  160. Options:
  161. -h, --help show this help message and exit
  162. -s DNS set another dns server, default: 2001:4860:4860::8844
  163. -o OUT_FILE ouput file, default: inputfilename.out
  164. -t QUERY_TYPE dig command query type, defalut: aaaa
  165. -c, --cname write canonical name into hosts file
  166. -n THREAD_NUM set the number of worker threads, default: 10
  167. '''
  168. def get_config():
  169. shortopts = 'hs:o:t:n:c'
  170. longopts = ['help', 'cname']
  171. try:
  172. optlist, args = getopt.gnu_getopt(sys.argv[1:], shortopts, longopts)
  173. except getopt.GetoptError as e:
  174. print e, '\n'
  175. print_help()
  176. sys.exit(1)
  177. global config
  178. for key, value in optlist:
  179. if key == '-s':
  180. config['dns'] = value
  181. elif key == '-o':
  182. config['outfile'] = value
  183. elif key == '-t':
  184. config['querytype'] = value
  185. elif key in ('-c', '--cname'):
  186. config['cname'] = True
  187. elif key == '-n':
  188. config['threadnum'] = int(value)
  189. elif key in ('-h', '--help'):
  190. print_help()
  191. sys.exit(0)
  192. if len(args) != 1:
  193. print "You must specify the input hosts file (only one)."
  194. sys.exit(1)
  195. config['infile'] = args[0]
  196. if config['outfile'] == '':
  197. config['outfile'] = config['infile'] + '.out'
  198. def main():
  199. get_config()
  200. dig_path = '/usr/bin/dig'
  201. if not os.path.isfile(dig_path) or not os.access(dig_path, os.X_OK):
  202. print "It seems you don't have 'dig' command installed properly "\
  203. "on your system."
  204. sys.exit(2)
  205. global hosts
  206. try:
  207. with open(config['infile'], 'r') as infile:
  208. hosts = infile.readlines()
  209. except IOError as e:
  210. print e
  211. sys.exit(e.errno)
  212. if os.path.exists(config['outfile']):
  213. config['outfile'] += '.new'
  214. try:
  215. outfile = open(config['outfile'], 'w')
  216. except IOError as e:
  217. print e
  218. sys.exit(e.errno)
  219. print "Input: %s Output: %s\n" % (config['infile'], config['outfile'])
  220. threads = []
  221. t = watcher_thread()
  222. t.start()
  223. threads.append(t)
  224. worker_num = config['threadnum']
  225. lines_num = len(hosts)
  226. lines_per_thread = lines_num / worker_num
  227. lines_remain = lines_num % worker_num
  228. start_pt = 0
  229. for _ in range(worker_num):
  230. if not running: break
  231. lines_for_thread = lines_per_thread
  232. if lines_for_thread == 0 and lines_remain == 0:
  233. break
  234. if lines_remain > 0:
  235. lines_for_thread += 1
  236. lines_remain -= 1
  237. t = worker_thread(start_pt, start_pt + lines_for_thread)
  238. start_pt += lines_for_thread
  239. t.start()
  240. threads.append(t)
  241. for t in threads:
  242. t.join()
  243. try:
  244. outfile.writelines(hosts)
  245. except IOError as e:
  246. print e
  247. sys.exit(e.errno)
  248. sys.exit(0)
  249. if __name__ == '__main__':
  250. main()