update_hosts.py 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. import os
  4. import sys
  5. import re
  6. import socket
  7. import getopt
  8. import threading
  9. import subprocess
  10. import shlex
  11. import time
  12. import select
  13. blackhole = (
  14. '10::2222',
  15. '21:2::2',
  16. '101::1234',
  17. '200:2:807:c62d::',
  18. '200:2:253d:369e::',
  19. '200:2:2e52:ae44::',
  20. '200:2:3b18:3ad::',
  21. '200:2:4e10:310f::',
  22. '200:2:5d2e:859::',
  23. '200:2:9f6a:794b::',
  24. '200:2:cb62:741::',
  25. '200:2:cc9b:953e::',
  26. '200:2:f3b9:bb27::',
  27. '2001::212',
  28. '2001:da8:112::21ae',
  29. '2003:ff:1:2:3:4:5fff:6',
  30. '2003:ff:1:2:3:4:5fff:7',
  31. '2003:ff:1:2:3:4:5fff:8',
  32. '2003:ff:1:2:3:4:5fff:9',
  33. '2003:ff:1:2:3:4:5fff:10',
  34. '2003:ff:1:2:3:4:5fff:11',
  35. '2003:ff:1:2:3:4:5fff:12',
  36. '2123::3e12',
  37. '3059:83eb::e015:2bee:0:0',
  38. 'a068:3850:fc7e::d068:3850:fc7e:0',
  39. 'a068:dd8a:b57f::d068:dd8a:b57f:0',
  40. 'a0a8:851c:d17f::d0a8:851c:d17f:0',
  41. 'a0c8:ad86:4c7f::d0c8:ad86:4c7f:0',
  42. 'a0e8:20f9:617f::d0e8:20f9:617f:0',
  43. 'a0f8:7d0c:ad7f::d0f8:7d0c:ad7f:0',
  44. '1.2.3.4',
  45. '4.36.66.178',
  46. '8.7.198.45',
  47. '37.61.54.158',
  48. '46.82.174.68',
  49. '59.24.3.173',
  50. '64.33.88.161',
  51. '78.16.49.15',
  52. '93.46.8.89',
  53. '127.0.0.1',
  54. '159.106.121.75',
  55. '202.181.7.85',
  56. '203.98.7.65',
  57. '243.185.187.39'
  58. )
  59. dns = {
  60. 'google_a': '2001:4860:4860::8888',
  61. 'google_b': '2001:4860:4860::8844',
  62. 'he_net': '2001:470:20::2',
  63. 'lax_he_net': '2001:470:0:9d::2'
  64. }
  65. config = {
  66. 'dns': dns['google_b'],
  67. 'infile': '',
  68. 'outfile': '',
  69. 'querytype': 'aaaa',
  70. 'cname': False,
  71. 'threadnum': 10
  72. }
  73. hosts = []
  74. done_num = 0
  75. thread_lock = threading.Lock()
  76. running = True
  77. class worker_thread(threading.Thread):
  78. def __init__(self, start_pt, end_pt):
  79. threading.Thread.__init__(self)
  80. self.start_pt = start_pt
  81. self.end_pt = end_pt
  82. def run(self):
  83. global hosts, done_num
  84. for i in range(self.start_pt, self.end_pt):
  85. if not running: break
  86. line = hosts[i].strip()
  87. if line == '' or line[0:2] == '##':
  88. hosts[i] = line + '\r\n'
  89. with thread_lock: done_num += 1
  90. continue
  91. # uncomment line
  92. line = line.lstrip('#')
  93. # split comment that appended to line
  94. comment = ''
  95. p = line.find('#')
  96. if p > 0:
  97. comment = line[p:]
  98. line = line[:p]
  99. arr = line.split()
  100. if len(arr) == 1:
  101. domain = arr[0]
  102. else:
  103. domain = arr[1]
  104. flag = False
  105. if validate_domain(domain):
  106. cname, ip = query_domain(domain, False)
  107. if ip == '' or ip in blackhole:
  108. cname, ip = query_domain(domain, True)
  109. if ip:
  110. flag = True
  111. arr[0] = ip
  112. if len(arr) == 1:
  113. arr.append(domain)
  114. if config['cname'] and cname:
  115. arr.append('#' + cname)
  116. else:
  117. if comment:
  118. arr.append(comment)
  119. if not flag:
  120. arr[0] = '#' + arr[0]
  121. if comment:
  122. arr.append(comment)
  123. hosts[i] = ' '.join(arr)
  124. hosts[i] += '\r\n'
  125. with thread_lock: done_num += 1
  126. class watcher_thread(threading.Thread):
  127. def run(self):
  128. total_num = len(hosts)
  129. wn = int(config['threadnum'])
  130. if wn > total_num:
  131. wn = total_num
  132. print "There are %d threads working..." % wn
  133. print "Press 'Enter' to exit.\n"
  134. while True:
  135. if sys.stdin in select.select([sys.stdin], [], [], 0)[0]:
  136. raw_input()
  137. print 'Waiting threads to exit...'
  138. global running
  139. with thread_lock:
  140. running = False
  141. break
  142. dn = done_num
  143. outbuf = "Total: %d lines, Done: %d lines, Ratio: %d %%.\r"\
  144. % (total_num, dn, dn * 100 / total_num)
  145. print outbuf,
  146. sys.stdout.flush()
  147. if dn == total_num:
  148. print outbuf
  149. break
  150. time.sleep(1)
  151. def query_domain(domain, tcp):
  152. cmd = "dig +short +time=2 -6 %s @'%s' '%s'"\
  153. % (config['querytype'], config['dns'], domain)
  154. if tcp:
  155. cmd = cmd + ' +tcp'
  156. proc = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE)
  157. out, _ = proc.communicate()
  158. outarr = out.splitlines()
  159. cname = ip = ''
  160. for v in outarr:
  161. if cname == '' and validate_domain(v[:-1]):
  162. cname = v[:-1]
  163. if ip == '' and validate_ip_addr(v):
  164. ip = v
  165. break
  166. return (cname, ip)
  167. def validate_domain(domain):
  168. pattern = '^((?!-)[*A-Za-z0-9-]{1,63}(?<!-)\\.)+[A-Za-z]{2,6}$'
  169. p = re.compile(pattern)
  170. m = p.match(domain)
  171. if m:
  172. return True
  173. else:
  174. return False
  175. def validate_ip_addr(ip_addr):
  176. if ':' in ip_addr:
  177. try:
  178. socket.inet_pton(socket.AF_INET6, ip_addr)
  179. return True
  180. except socket.error:
  181. return False
  182. else:
  183. try:
  184. socket.inet_pton(socket.AF_INET, ip_addr)
  185. return True
  186. except socket.error:
  187. return False
  188. def print_help():
  189. print '''usage: update_hosts [OPTIONS] FILE
  190. A simple multi-threading tool used for updating hosts file.
  191. Options:
  192. -h, --help show this help message and exit
  193. -s DNS set another dns server, default: 2001:4860:4860::8844
  194. -o OUT_FILE output file, default: inputfilename.out
  195. -t QUERY_TYPE dig command query type, default: aaaa
  196. -c, --cname write canonical name into hosts file
  197. -n THREAD_NUM set the number of worker threads, default: 10
  198. '''
  199. def get_config():
  200. shortopts = 'hs:o:t:n:c'
  201. longopts = ['help', 'cname']
  202. try:
  203. optlist, args = getopt.gnu_getopt(sys.argv[1:], shortopts, longopts)
  204. except getopt.GetoptError as e:
  205. print e, '\n'
  206. print_help()
  207. sys.exit(1)
  208. global config
  209. for key, value in optlist:
  210. if key == '-s':
  211. config['dns'] = value
  212. elif key == '-o':
  213. config['outfile'] = value
  214. elif key == '-t':
  215. config['querytype'] = value
  216. elif key in ('-c', '--cname'):
  217. config['cname'] = True
  218. elif key == '-n':
  219. config['threadnum'] = int(value)
  220. elif key in ('-h', '--help'):
  221. print_help()
  222. sys.exit(0)
  223. if len(args) != 1:
  224. print "You must specify the input hosts file (only one)."
  225. sys.exit(1)
  226. config['infile'] = args[0]
  227. if config['outfile'] == '':
  228. config['outfile'] = config['infile'] + '.out'
  229. def main():
  230. get_config()
  231. dig_path = '/usr/bin/dig'
  232. if not os.path.isfile(dig_path) or not os.access(dig_path, os.X_OK):
  233. print "It seems you don't have 'dig' command installed properly "\
  234. "on your system."
  235. sys.exit(2)
  236. global hosts
  237. try:
  238. with open(config['infile'], 'r') as infile:
  239. hosts = infile.readlines()
  240. except IOError as e:
  241. print e
  242. sys.exit(e.errno)
  243. if os.path.exists(config['outfile']):
  244. config['outfile'] += '.new'
  245. try:
  246. outfile = open(config['outfile'], 'w')
  247. except IOError as e:
  248. print e
  249. sys.exit(e.errno)
  250. print "Input: %s Output: %s\n" % (config['infile'], config['outfile'])
  251. threads = []
  252. t = watcher_thread()
  253. t.start()
  254. threads.append(t)
  255. worker_num = config['threadnum']
  256. lines_num = len(hosts)
  257. lines_per_thread = lines_num / worker_num
  258. lines_remain = lines_num % worker_num
  259. start_pt = 0
  260. for _ in range(worker_num):
  261. if not running: break
  262. lines_for_thread = lines_per_thread
  263. if lines_for_thread == 0 and lines_remain == 0:
  264. break
  265. if lines_remain > 0:
  266. lines_for_thread += 1
  267. lines_remain -= 1
  268. t = worker_thread(start_pt, start_pt + lines_for_thread)
  269. start_pt += lines_for_thread
  270. t.start()
  271. threads.append(t)
  272. for t in threads:
  273. t.join()
  274. try:
  275. outfile.writelines(hosts)
  276. except IOError as e:
  277. print e
  278. sys.exit(e.errno)
  279. sys.exit(0)
  280. if __name__ == '__main__':
  281. main()