update_hosts.py 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. import os
  4. import sys
  5. import re
  6. import socket
  7. import getopt
  8. import threading
  9. import subprocess
  10. import shlex
  11. import time
  12. import select
  13. blackhole = (
  14. '10::2222',
  15. '21:2::2',
  16. '101::1234',
  17. '200:2:807:c62d::',
  18. '200:2:253d:369e::',
  19. '200:2:2e52:ae44::',
  20. '200:2:3b18:3ad::',
  21. '200:2:4e10:310f::',
  22. '200:2:5d2e:859::',
  23. '200:2:9f6a:794b::',
  24. '200:2:cb62:741::',
  25. '200:2:cc9b:953e::',
  26. '200:2:f3b9:bb27::',
  27. '2001::212',
  28. '2001::1f0d:6644',
  29. '2001::45ab:e038',
  30. '2001::48e8:aa1a',
  31. '2001::c73b:95e6',
  32. '2001:da8:112::21ae',
  33. '2003:ff:1:2:3:4:5fff:6',
  34. '2003:ff:1:2:3:4:5fff:7',
  35. '2003:ff:1:2:3:4:5fff:8',
  36. '2003:ff:1:2:3:4:5fff:9',
  37. '2003:ff:1:2:3:4:5fff:10',
  38. '2003:ff:1:2:3:4:5fff:11',
  39. '2003:ff:1:2:3:4:5fff:12',
  40. '2123::3e12',
  41. '3059:83eb::e015:2bee:0:0',
  42. 'a028:a3e9:657f::d028:a3e9:657f:0'
  43. 'a048:6838:517f::d048:6838:517f:0',
  44. 'a068:3850:fc7e::d068:3850:fc7e:0',
  45. 'a068:dd8a:b57f::d068:dd8a:b57f:0',
  46. 'a0a8:851c:d17f::d0a8:851c:d17f:0',
  47. 'a0c8:ad86:4c7f::d0c8:ad86:4c7f:0',
  48. 'a0e8:20f9:617f::d0e8:20f9:617f:0',
  49. 'a0f8:7d0c:ad7f::d0f8:7d0c:ad7f:0',
  50. '1.2.3.4',
  51. '4.36.66.178',
  52. '8.7.198.45',
  53. '37.61.54.158',
  54. '46.82.174.68',
  55. '59.24.3.173',
  56. '64.33.88.161',
  57. '78.16.49.15',
  58. '93.46.8.89',
  59. '127.0.0.1',
  60. '159.106.121.75',
  61. '202.181.7.85',
  62. '203.98.7.65',
  63. '243.185.187.39'
  64. )
  65. dns = {
  66. 'google_a': '2001:4860:4860::8888',
  67. 'google_b': '2001:4860:4860::8844',
  68. 'he_net': '2001:470:20::2',
  69. 'lax_he_net': '2001:470:0:9d::2'
  70. }
  71. config = {
  72. 'dns': dns['google_b'],
  73. 'infile': '',
  74. 'outfile': '',
  75. 'querytype': 'aaaa',
  76. 'cname': False,
  77. 'threadnum': 10
  78. }
  79. hosts = []
  80. done_num = 0
  81. thread_lock = threading.Lock()
  82. running = True
  83. class worker_thread(threading.Thread):
  84. def __init__(self, start_pt, end_pt):
  85. threading.Thread.__init__(self)
  86. self.start_pt = start_pt
  87. self.end_pt = end_pt
  88. def run(self):
  89. global hosts, done_num
  90. for i in range(self.start_pt, self.end_pt):
  91. if not running: break
  92. line = hosts[i].strip()
  93. if line == '' or line[0:2] == '##':
  94. hosts[i] = line + '\r\n'
  95. with thread_lock: done_num += 1
  96. continue
  97. # uncomment line
  98. line = line.lstrip('#')
  99. # split comment that appended to line
  100. comment = ''
  101. p = line.find('#')
  102. if p > 0:
  103. comment = line[p:]
  104. line = line[:p]
  105. arr = line.split()
  106. if len(arr) == 1:
  107. domain = arr[0]
  108. else:
  109. domain = arr[1]
  110. flag = False
  111. if validate_domain(domain):
  112. cname, ip = query_domain(domain, False)
  113. if ip == '' or ip in blackhole:
  114. cname, ip = query_domain(domain, True)
  115. if ip:
  116. flag = True
  117. arr[0] = ip
  118. if len(arr) == 1:
  119. arr.append(domain)
  120. if config['cname'] and cname:
  121. arr.append('#' + cname)
  122. else:
  123. if comment:
  124. arr.append(comment)
  125. if not flag:
  126. arr[0] = '#' + arr[0]
  127. if comment:
  128. arr.append(comment)
  129. hosts[i] = ' '.join(arr)
  130. hosts[i] += '\r\n'
  131. with thread_lock: done_num += 1
  132. class watcher_thread(threading.Thread):
  133. def run(self):
  134. total_num = len(hosts)
  135. wn = int(config['threadnum'])
  136. if wn > total_num:
  137. wn = total_num
  138. print "There are %d threads working..." % wn
  139. print "Press 'Enter' to exit.\n"
  140. while True:
  141. if sys.stdin in select.select([sys.stdin], [], [], 0)[0]:
  142. raw_input()
  143. print 'Waiting threads to exit...'
  144. global running
  145. with thread_lock:
  146. running = False
  147. break
  148. dn = done_num
  149. outbuf = "Total: %d lines, Done: %d lines, Ratio: %d %%.\r"\
  150. % (total_num, dn, dn * 100 / total_num)
  151. print outbuf,
  152. sys.stdout.flush()
  153. if dn == total_num:
  154. print outbuf
  155. break
  156. time.sleep(1)
  157. def query_domain(domain, tcp):
  158. cmd = "dig +short +time=2 -6 %s @'%s' '%s'"\
  159. % (config['querytype'], config['dns'], domain)
  160. if tcp:
  161. cmd = cmd + ' +tcp'
  162. proc = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE)
  163. out, _ = proc.communicate()
  164. outarr = out.splitlines()
  165. cname = ip = ''
  166. for v in outarr:
  167. if cname == '' and validate_domain(v[:-1]):
  168. cname = v[:-1]
  169. if ip == '' and validate_ip_addr(v):
  170. ip = v
  171. break
  172. return (cname, ip)
  173. def validate_domain(domain):
  174. pattern = '^((?!-)[*A-Za-z0-9-]{1,63}(?<!-)\\.)+[A-Za-z]{2,6}$'
  175. p = re.compile(pattern)
  176. m = p.match(domain)
  177. if m:
  178. return True
  179. else:
  180. return False
  181. def validate_ip_addr(ip_addr):
  182. if ':' in ip_addr:
  183. try:
  184. socket.inet_pton(socket.AF_INET6, ip_addr)
  185. return True
  186. except socket.error:
  187. return False
  188. else:
  189. try:
  190. socket.inet_pton(socket.AF_INET, ip_addr)
  191. return True
  192. except socket.error:
  193. return False
  194. def print_help():
  195. print '''usage: update_hosts [OPTIONS] FILE
  196. A simple multi-threading tool used for updating hosts file.
  197. Options:
  198. -h, --help show this help message and exit
  199. -s DNS set another dns server, default: 2001:4860:4860::8844
  200. -o OUT_FILE output file, default: inputfilename.out
  201. -t QUERY_TYPE dig command query type, default: aaaa
  202. -c, --cname write canonical name into hosts file
  203. -n THREAD_NUM set the number of worker threads, default: 10
  204. '''
  205. def get_config():
  206. shortopts = 'hs:o:t:n:c'
  207. longopts = ['help', 'cname']
  208. try:
  209. optlist, args = getopt.gnu_getopt(sys.argv[1:], shortopts, longopts)
  210. except getopt.GetoptError as e:
  211. print e, '\n'
  212. print_help()
  213. sys.exit(1)
  214. global config
  215. for key, value in optlist:
  216. if key == '-s':
  217. config['dns'] = value
  218. elif key == '-o':
  219. config['outfile'] = value
  220. elif key == '-t':
  221. config['querytype'] = value
  222. elif key in ('-c', '--cname'):
  223. config['cname'] = True
  224. elif key == '-n':
  225. config['threadnum'] = int(value)
  226. elif key in ('-h', '--help'):
  227. print_help()
  228. sys.exit(0)
  229. if len(args) != 1:
  230. print "You must specify the input hosts file (only one)."
  231. sys.exit(1)
  232. config['infile'] = args[0]
  233. if config['outfile'] == '':
  234. config['outfile'] = config['infile'] + '.out'
  235. def main():
  236. get_config()
  237. dig_path = '/usr/bin/dig'
  238. if not os.path.isfile(dig_path) or not os.access(dig_path, os.X_OK):
  239. print "It seems you don't have 'dig' command installed properly "\
  240. "on your system."
  241. sys.exit(2)
  242. global hosts
  243. try:
  244. with open(config['infile'], 'r') as infile:
  245. hosts = infile.readlines()
  246. except IOError as e:
  247. print e
  248. sys.exit(e.errno)
  249. if os.path.exists(config['outfile']):
  250. config['outfile'] += '.new'
  251. try:
  252. outfile = open(config['outfile'], 'w')
  253. except IOError as e:
  254. print e
  255. sys.exit(e.errno)
  256. print "Input: %s Output: %s\n" % (config['infile'], config['outfile'])
  257. threads = []
  258. t = watcher_thread()
  259. t.start()
  260. threads.append(t)
  261. worker_num = config['threadnum']
  262. lines_num = len(hosts)
  263. lines_per_thread = lines_num / worker_num
  264. lines_remain = lines_num % worker_num
  265. start_pt = 0
  266. for _ in range(worker_num):
  267. if not running: break
  268. lines_for_thread = lines_per_thread
  269. if lines_for_thread == 0 and lines_remain == 0:
  270. break
  271. if lines_remain > 0:
  272. lines_for_thread += 1
  273. lines_remain -= 1
  274. t = worker_thread(start_pt, start_pt + lines_for_thread)
  275. start_pt += lines_for_thread
  276. t.start()
  277. threads.append(t)
  278. for t in threads:
  279. t.join()
  280. try:
  281. outfile.writelines(hosts)
  282. except IOError as e:
  283. print e
  284. sys.exit(e.errno)
  285. sys.exit(0)
  286. if __name__ == '__main__':
  287. main()