update_hosts.py 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. import os
  4. import sys
  5. import re
  6. import socket
  7. import getopt
  8. import threading
  9. import subprocess
  10. import shlex
  11. import time
  12. import select
  13. blackhole = (
  14. '10::2222',
  15. '21:2::2',
  16. '101::1234',
  17. '200:2:807:c62d::',
  18. '200:2:253d:369e::',
  19. '200:2:2e52:ae44::',
  20. '200:2:3b18:3ad::',
  21. '200:2:4e10:310f::',
  22. '200:2:5d2e:859::',
  23. '200:2:9f6a:794b::',
  24. '200:2:cb62:741::',
  25. '200:2:cc9b:953e::',
  26. '200:2:f3b9:bb27::',
  27. '2001::212',
  28. '2001::1f0d:6644',
  29. '2001::45ab:e038',
  30. '2001::48e8:aa1a',
  31. '2001::c73b:95e6',
  32. '2001:da8:112::21ae',
  33. '2003:ff:1:2:3:4:5fff:6',
  34. '2003:ff:1:2:3:4:5fff:7',
  35. '2003:ff:1:2:3:4:5fff:8',
  36. '2003:ff:1:2:3:4:5fff:9',
  37. '2003:ff:1:2:3:4:5fff:10',
  38. '2003:ff:1:2:3:4:5fff:11',
  39. '2003:ff:1:2:3:4:5fff:12',
  40. '2123::3e12',
  41. '3059:83eb::e015:2bee:0:0',
  42. 'a028:a3e9:657f::d028:a3e9:657f:0'
  43. 'a048:6838:517f::d048:6838:517f:0',
  44. 'a068:3850:fc7e::d068:3850:fc7e:0',
  45. 'a068:dd8a:b57f::d068:dd8a:b57f:0',
  46. 'a0a8:851c:d17f::d0a8:851c:d17f:0',
  47. 'a0c8:ad86:4c7f::d0c8:ad86:4c7f:0',
  48. 'a0e8:20f9:617f::d0e8:20f9:617f:0',
  49. 'a0f8:7d0c:ad7f::d0f8:7d0c:ad7f:0',
  50. '1.2.3.4',
  51. '4.36.66.178',
  52. '8.7.198.45',
  53. '37.61.54.158',
  54. '46.82.174.68',
  55. '59.24.3.173',
  56. '64.33.88.161',
  57. '78.16.49.15',
  58. '93.46.8.89',
  59. '127.0.0.1',
  60. '159.106.121.75',
  61. '202.181.7.85',
  62. '203.98.7.65',
  63. '243.185.187.39'
  64. )
  65. dns = {
  66. 'google_a': '2001:4860:4860::8888',
  67. 'google_b': '2001:4860:4860::8844',
  68. 'he_net': '2001:470:20::2',
  69. 'lax_he_net': '2001:470:0:9d::2'
  70. }
  71. config = {
  72. 'dns': dns['google_b'],
  73. 'infile': '',
  74. 'outfile': '',
  75. 'querytype': 'aaaa',
  76. 'cname': False,
  77. 'threadnum': 10
  78. }
  79. hosts = []
  80. done_num = 0
  81. thread_lock = threading.Lock()
  82. running = True
  83. class worker_thread(threading.Thread):
  84. def __init__(self, start_pt, end_pt):
  85. threading.Thread.__init__(self)
  86. self.start_pt = start_pt
  87. self.end_pt = end_pt
  88. def run(self):
  89. global hosts, done_num
  90. for i in range(self.start_pt, self.end_pt):
  91. if not running: break
  92. line = hosts[i].strip()
  93. if line == '' or line[0:2] == '##':
  94. hosts[i] = line + '\r\n'
  95. with thread_lock: done_num += 1
  96. continue
  97. # uncomment line
  98. line = line.lstrip('#')
  99. # split comment that appended to line
  100. comment = ''
  101. p = line.find('#')
  102. if p > 0:
  103. comment = line[p:]
  104. line = line[:p]
  105. arr = line.split()
  106. if len(arr) == 1:
  107. domain = arr[0]
  108. else:
  109. domain = arr[1]
  110. flag = False
  111. if validate_domain(domain):
  112. cname, ip = query_domain(domain, False)
  113. if ip == '' or ip in blackhole:
  114. cname, ip = query_domain(domain, True)
  115. if ip:
  116. flag = True
  117. arr[0] = ip
  118. if len(arr) == 1:
  119. arr.append(domain)
  120. if config['cname'] and cname:
  121. arr.append('#' + cname)
  122. else:
  123. if comment:
  124. arr.append(comment)
  125. if not flag:
  126. arr[0] = '#' + arr[0]
  127. if comment:
  128. arr.append(comment)
  129. hosts[i] = ' '.join(arr)
  130. hosts[i] += '\r\n'
  131. with thread_lock: done_num += 1
  132. class watcher_thread(threading.Thread):
  133. def run(self):
  134. total_num = len(hosts)
  135. wn = int(config['threadnum'])
  136. if wn > total_num:
  137. wn = total_num
  138. print("There are %d threads working..." % wn)
  139. print("Press 'Enter' to exit.\n")
  140. while True:
  141. if sys.stdin in select.select([sys.stdin], [], [], 0)[0]:
  142. input()
  143. print("Waiting threads to exit...")
  144. global running
  145. with thread_lock:
  146. running = False
  147. break
  148. dn = done_num
  149. outbuf = "Total: %d lines, Done: %d lines, Ratio: %d %%.\r"\
  150. % (total_num, dn, dn * 100 / total_num)
  151. print(outbuf, end='', flush=True)
  152. if dn == total_num:
  153. print(outbuf)
  154. break
  155. time.sleep(1)
  156. def query_domain(domain, tcp):
  157. cmd = "dig +short +time=2 -6 %s @'%s' '%s'"\
  158. % (config['querytype'], config['dns'], domain)
  159. if tcp:
  160. cmd = cmd + ' +tcp'
  161. proc = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE)
  162. out, _ = proc.communicate()
  163. outarr = out.decode('utf-8').splitlines()
  164. cname = ip = ''
  165. for v in outarr:
  166. if cname == '' and validate_domain(v[:-1]):
  167. cname = v[:-1]
  168. if ip == '' and validate_ip_addr(v):
  169. ip = v
  170. break
  171. return (cname, ip)
  172. def validate_domain(domain):
  173. pattern = '^((?!-)[*A-Za-z0-9-]{1,63}(?<!-)\\.)+[A-Za-z]{2,6}$'
  174. p = re.compile(pattern)
  175. m = p.match(domain)
  176. if m:
  177. return True
  178. else:
  179. return False
  180. def validate_ip_addr(ip_addr):
  181. if ':' in ip_addr:
  182. try:
  183. socket.inet_pton(socket.AF_INET6, ip_addr)
  184. return True
  185. except socket.error:
  186. return False
  187. else:
  188. try:
  189. socket.inet_pton(socket.AF_INET, ip_addr)
  190. return True
  191. except socket.error:
  192. return False
  193. def print_help():
  194. print('''usage: update_hosts [OPTIONS] FILE
  195. A simple multi-threading tool used for updating hosts file.
  196. Options:
  197. -h, --help show this help message and exit
  198. -s DNS set another dns server, default: 2001:4860:4860::8844
  199. -o OUT_FILE output file, default: inputfilename.out
  200. -t QUERY_TYPE dig command query type, default: aaaa
  201. -c, --cname write canonical name into hosts file
  202. -n THREAD_NUM set the number of worker threads, default: 10
  203. ''')
  204. def get_config():
  205. shortopts = 'hs:o:t:n:c'
  206. longopts = ['help', 'cname']
  207. try:
  208. optlist, args = getopt.gnu_getopt(sys.argv[1:], shortopts, longopts)
  209. except getopt.GetoptError as e:
  210. print(e)
  211. print_help()
  212. sys.exit(1)
  213. global config
  214. for key, value in optlist:
  215. if key == '-s':
  216. config['dns'] = value
  217. elif key == '-o':
  218. config['outfile'] = value
  219. elif key == '-t':
  220. config['querytype'] = value
  221. elif key in ('-c', '--cname'):
  222. config['cname'] = True
  223. elif key == '-n':
  224. config['threadnum'] = int(value)
  225. elif key in ('-h', '--help'):
  226. print_help()
  227. sys.exit(0)
  228. if len(args) != 1:
  229. print("You must specify the input hosts file (only one).")
  230. sys.exit(1)
  231. config['infile'] = args[0]
  232. if config['outfile'] == '':
  233. config['outfile'] = config['infile'] + '.out'
  234. def main():
  235. get_config()
  236. dig_path = '/usr/bin/dig'
  237. if not os.path.isfile(dig_path) or not os.access(dig_path, os.X_OK):
  238. print("It seems you don't have 'dig' command installed properly "\
  239. "on your system.")
  240. sys.exit(2)
  241. global hosts
  242. try:
  243. with open(config['infile'], 'r') as infile:
  244. hosts = infile.readlines()
  245. except IOError as e:
  246. print(e)
  247. sys.exit(e.errno)
  248. if os.path.exists(config['outfile']):
  249. config['outfile'] += '.new'
  250. try:
  251. outfile = open(config['outfile'], 'w')
  252. except IOError as e:
  253. print(e)
  254. sys.exit(e.errno)
  255. print("Input: %s Output: %s\n" % (config['infile'], config['outfile']))
  256. threads = []
  257. t = watcher_thread()
  258. t.start()
  259. threads.append(t)
  260. worker_num = config['threadnum']
  261. lines_num = len(hosts)
  262. lines_per_thread = lines_num // worker_num
  263. lines_remain = lines_num % worker_num
  264. start_pt = 0
  265. for _ in range(worker_num):
  266. if not running: break
  267. lines_for_thread = lines_per_thread
  268. if lines_for_thread == 0 and lines_remain == 0:
  269. break
  270. if lines_remain > 0:
  271. lines_for_thread += 1
  272. lines_remain -= 1
  273. t = worker_thread(start_pt, start_pt + lines_for_thread)
  274. start_pt += lines_for_thread
  275. t.start()
  276. threads.append(t)
  277. for t in threads:
  278. t.join()
  279. try:
  280. outfile.writelines(hosts)
  281. except IOError as e:
  282. print(e)
  283. sys.exit(e.errno)
  284. sys.exit(0)
  285. if __name__ == '__main__':
  286. main()