update_hosts.py 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. import os
  4. import sys
  5. import re
  6. import socket
  7. import getopt
  8. import threading
  9. import subprocess
  10. import shlex
  11. import time
  12. import select
  13. blackhole = (
  14. '10::2222',
  15. '101::1234',
  16. '2001::212',
  17. '2001:da8:112::21ae',
  18. '2003:ff:1:2:3:4:5fff:6',
  19. '2003:ff:1:2:3:4:5fff:7',
  20. '2003:ff:1:2:3:4:5fff:8',
  21. '2003:ff:1:2:3:4:5fff:9',
  22. '2003:ff:1:2:3:4:5fff:10',
  23. '2003:ff:1:2:3:4:5fff:11',
  24. '2003:ff:1:2:3:4:5fff:12',
  25. '21:2::2',
  26. '2123::3e12',
  27. '1.2.3.4',
  28. '159.106.121.75',
  29. '202.181.7.85',
  30. '203.98.7.65',
  31. '243.185.187.39',
  32. '37.61.54.158',
  33. '4.36.66.178',
  34. '46.82.174.68',
  35. '59.24.3.173',
  36. '64.33.88.161',
  37. '78.16.49.15',
  38. '8.7.198.45',
  39. '93.46.8.89',
  40. )
  41. dns = {
  42. 'google_a': '2001:4860:4860::8888',
  43. 'google_b': '2001:4860:4860::8844',
  44. 'he_net': '2001:470:20::2',
  45. 'lax_he_net': '2001:470:0:9d::2'
  46. }
  47. config = {
  48. 'dns': dns['google_b'],
  49. 'infile': '',
  50. 'outfile': '',
  51. 'querytype': 'aaaa',
  52. 'cname': False,
  53. 'threadnum': 10
  54. }
  55. hosts = []
  56. done_num = 0
  57. thread_lock = threading.Lock()
  58. running = True
  59. class worker_thread(threading.Thread):
  60. def __init__(self, start_pt, end_pt):
  61. threading.Thread.__init__(self)
  62. self.start_pt = start_pt
  63. self.end_pt = end_pt
  64. def run(self):
  65. global hosts, done_num
  66. for i in range(self.start_pt, self.end_pt):
  67. if not running: break
  68. line = hosts[i].strip()
  69. if line == '' or line[0:2] == '##':
  70. hosts[i] = line + '\r\n'
  71. with thread_lock: done_num += 1
  72. continue
  73. # uncomment line
  74. line = line.lstrip('#')
  75. # split comment that appended to line
  76. comment = ''
  77. p = line.find('#')
  78. if p > 0:
  79. comment = line[p:]
  80. line = line[:p]
  81. arr = line.split()
  82. if len(arr) == 1:
  83. domain = arr[0]
  84. else:
  85. domain = arr[1]
  86. flag = False
  87. if validate_domain(domain):
  88. cname, ip = query_domain(domain, False)
  89. if ip == '' or ip in blackhole:
  90. cname, ip = query_domain(domain, True)
  91. if ip:
  92. flag = True
  93. arr[0] = ip
  94. if len(arr) == 1:
  95. arr.append(domain)
  96. if config['cname'] and cname:
  97. arr.append('#' + cname)
  98. else:
  99. if comment:
  100. arr.append(comment)
  101. if not flag:
  102. arr[0] = '#' + arr[0]
  103. if comment:
  104. arr.append(comment)
  105. hosts[i] = ' '.join(arr)
  106. hosts[i] += '\r\n'
  107. with thread_lock: done_num += 1
  108. class watcher_thread(threading.Thread):
  109. def run(self):
  110. total_num = len(hosts)
  111. wn = int(config['threadnum'])
  112. if wn > total_num:
  113. wn = total_num
  114. print "There are %d threads working..." % wn
  115. print "Press 'Enter' to exit.\n"
  116. while True:
  117. if sys.stdin in select.select([sys.stdin], [], [], 0)[0]:
  118. raw_input()
  119. print 'Waiting threads to exit...'
  120. global running
  121. with thread_lock:
  122. running = False
  123. break
  124. dn = done_num
  125. outbuf = "Total: %d lines, Done: %d lines, Ratio: %d %%.\r"\
  126. % (total_num, dn, dn * 100 / total_num)
  127. print outbuf,
  128. sys.stdout.flush()
  129. if dn == total_num:
  130. print outbuf
  131. break
  132. time.sleep(1)
  133. def query_domain(domain, tcp):
  134. cmd = "dig +short +time=2 -6 %s @'%s' '%s'"\
  135. % (config['querytype'], config['dns'], domain)
  136. if tcp:
  137. cmd = cmd + ' +tcp'
  138. proc = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE)
  139. out, _ = proc.communicate()
  140. outarr = out.splitlines()
  141. cname = ip = ''
  142. for v in outarr:
  143. if cname == '' and validate_domain(v[:-1]):
  144. cname = v[:-1]
  145. if ip == '' and validate_ip_addr(v):
  146. ip = v
  147. break
  148. return (cname, ip)
  149. def validate_domain(domain):
  150. pattern = '^((?!-)[*A-Za-z0-9-]{1,63}(?<!-)\\.)+[A-Za-z]{2,6}$'
  151. p = re.compile(pattern)
  152. m = p.match(domain)
  153. if m:
  154. return True
  155. else:
  156. return False
  157. def validate_ip_addr(ip_addr):
  158. if ':' in ip_addr:
  159. try:
  160. socket.inet_pton(socket.AF_INET6, ip_addr)
  161. return True
  162. except socket.error:
  163. return False
  164. else:
  165. try:
  166. socket.inet_pton(socket.AF_INET, ip_addr)
  167. return True
  168. except socket.error:
  169. return False
  170. def print_help():
  171. print '''usage: update_hosts [OPTIONS] FILE
  172. A simple multi-threading tool used to update hosts file.
  173. Options:
  174. -h, --help show this help message and exit
  175. -s DNS set another dns server, default: 2001:4860:4860::8844
  176. -o OUT_FILE ouput file, default: inputfilename.out
  177. -t QUERY_TYPE dig command query type, defalut: aaaa
  178. -c, --cname write canonical name into hosts file
  179. -n THREAD_NUM set the number of worker threads, default: 10
  180. '''
  181. def get_config():
  182. shortopts = 'hs:o:t:n:c'
  183. longopts = ['help', 'cname']
  184. try:
  185. optlist, args = getopt.gnu_getopt(sys.argv[1:], shortopts, longopts)
  186. except getopt.GetoptError as e:
  187. print e, '\n'
  188. print_help()
  189. sys.exit(1)
  190. global config
  191. for key, value in optlist:
  192. if key == '-s':
  193. config['dns'] = value
  194. elif key == '-o':
  195. config['outfile'] = value
  196. elif key == '-t':
  197. config['querytype'] = value
  198. elif key in ('-c', '--cname'):
  199. config['cname'] = True
  200. elif key == '-n':
  201. config['threadnum'] = int(value)
  202. elif key in ('-h', '--help'):
  203. print_help()
  204. sys.exit(0)
  205. if len(args) != 1:
  206. print "You must specify the input hosts file (only one)."
  207. sys.exit(1)
  208. config['infile'] = args[0]
  209. if config['outfile'] == '':
  210. config['outfile'] = config['infile'] + '.out'
  211. def main():
  212. get_config()
  213. dig_path = '/usr/bin/dig'
  214. if not os.path.isfile(dig_path) or not os.access(dig_path, os.X_OK):
  215. print "It seems you don't have 'dig' command installed properly "\
  216. "on your system."
  217. sys.exit(2)
  218. global hosts
  219. try:
  220. with open(config['infile'], 'r') as infile:
  221. hosts = infile.readlines()
  222. except IOError as e:
  223. print e
  224. sys.exit(e.errno)
  225. if os.path.exists(config['outfile']):
  226. config['outfile'] += '.new'
  227. try:
  228. outfile = open(config['outfile'], 'w')
  229. except IOError as e:
  230. print e
  231. sys.exit(e.errno)
  232. print "Input: %s Output: %s\n" % (config['infile'], config['outfile'])
  233. threads = []
  234. t = watcher_thread()
  235. t.start()
  236. threads.append(t)
  237. worker_num = config['threadnum']
  238. lines_num = len(hosts)
  239. lines_per_thread = lines_num / worker_num
  240. lines_remain = lines_num % worker_num
  241. start_pt = 0
  242. for _ in range(worker_num):
  243. if not running: break
  244. lines_for_thread = lines_per_thread
  245. if lines_for_thread == 0 and lines_remain == 0:
  246. break
  247. if lines_remain > 0:
  248. lines_for_thread += 1
  249. lines_remain -= 1
  250. t = worker_thread(start_pt, start_pt + lines_for_thread)
  251. start_pt += lines_for_thread
  252. t.start()
  253. threads.append(t)
  254. for t in threads:
  255. t.join()
  256. try:
  257. outfile.writelines(hosts)
  258. except IOError as e:
  259. print e
  260. sys.exit(e.errno)
  261. sys.exit(0)
  262. if __name__ == '__main__':
  263. main()