update_hosts.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. import os
  4. import sys
  5. import re
  6. import socket
  7. import getopt
  8. import threading
  9. import subprocess
  10. import shlex
  11. import time
  12. blackhole = (
  13. '10::2222',
  14. '101::1234',
  15. '2001::212',
  16. '2001:da8:112::21ae',
  17. '2003:ff:1:2:3:4:5fff:6',
  18. '2003:ff:1:2:3:4:5fff:7',
  19. '2003:ff:1:2:3:4:5fff:8',
  20. '2003:ff:1:2:3:4:5fff:9',
  21. '2003:ff:1:2:3:4:5fff:10',
  22. '2003:ff:1:2:3:4:5fff:11',
  23. '2003:ff:1:2:3:4:5fff:12',
  24. '21:2::2',
  25. '2123::3e12')
  26. dns = {
  27. 'google_a':'2001:4860:4860::8888',
  28. 'google_b':'2001:4860:4860::8844',
  29. 'he_net':'2001:470:20::2',
  30. 'lax_he_net':'2001:470:0:9d::2'
  31. }
  32. config = {
  33. 'dns':dns['google_b'],
  34. 'infile':'hosts',
  35. 'outfile':'hosts.new',
  36. 'querytype':'aaaa',
  37. 'threadnum':10
  38. }
  39. hosts = []
  40. done_num = 0
  41. thread_lock = threading.Lock()
  42. class worker_thread(threading.Thread):
  43. def __init__(self, start_pt, end_pt):
  44. threading.Thread.__init__(self)
  45. self.start_pt = start_pt
  46. self.end_pt = end_pt
  47. def run(self):
  48. global hosts, done_num
  49. for i in range(self.start_pt, self.end_pt):
  50. line = hosts[i].strip()
  51. with thread_lock:
  52. done_num += 1
  53. if line == "" or line[0:2] == '##':
  54. hosts[i] = line + '\r\n'
  55. continue
  56. arr = line.lstrip('#').split()
  57. if len(arr) == 1:
  58. domain = arr[0]
  59. else:
  60. domain = arr[1]
  61. flag = False
  62. if validate_domain(domain):
  63. ret = query_domain(domain, False)
  64. if ret in blackhole or not ret:
  65. ret = query_domain(domain, True)
  66. if ret:
  67. flag = True
  68. arr[0] = ret
  69. if flag:
  70. if len(arr) == 1:
  71. arr.append(domain)
  72. else:
  73. arr[0] = '#' + arr[0]
  74. hosts[i] = ' '.join(arr)
  75. hosts[i] += '\r\n'
  76. class watcher_thread(threading.Thread):
  77. def run(self):
  78. global hosts, done_num
  79. total_num = len(hosts)
  80. wn = int(config['threadnum'])
  81. if wn > total_num:
  82. wn = total_num
  83. print "There are %d threads working..." % wn
  84. while True:
  85. with thread_lock:
  86. dn = done_num
  87. outbuf = "Total: %d lines, Done: %d lines, Ratio: %d %%.\r"\
  88. % (total_num, dn, float(dn)/total_num*100)
  89. print outbuf,
  90. sys.stdout.flush()
  91. if done_num == total_num:
  92. print outbuf
  93. break
  94. time.sleep(1)
  95. def query_domain(domain, tcp):
  96. cmd = "dig +short +time=2 -6 %s @'%s' '%s'"\
  97. % (config['querytype'], config['dns'], domain)
  98. if tcp:
  99. cmd = cmd[:3] + ' +tcp' + cmd[3:]
  100. proc = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE)
  101. out, err = proc.communicate()
  102. outarr = out.splitlines()
  103. if len(outarr) == 0:
  104. ret = ''
  105. else:
  106. if validate_ip_addr(outarr[-1]):
  107. ret = outarr[-1]
  108. else:
  109. ret = ''
  110. return ret
  111. def validate_domain(domain):
  112. pattern = '^((?!-)[*A-Za-z0-9-]{1,63}(?<!-)\\.)+[A-Za-z]{2,6}$'
  113. p = re.compile(pattern)
  114. m = p.match(domain)
  115. if m:
  116. return True
  117. else:
  118. return False
  119. def validate_ip_addr(ip_addr):
  120. if ':' in ip_addr:
  121. try:
  122. socket.inet_pton(socket.AF_INET6, ip_addr)
  123. return True
  124. except socket.error:
  125. return False
  126. else:
  127. try:
  128. socket.inet_pton(socket.AF_INET, ip_addr)
  129. return True
  130. except socket.error:
  131. return False
  132. def print_help():
  133. print('''usage: update_hosts [-h] [-s DNS] [-t QUERY_TYPE] [-n THREAD_NUM]
  134. -i IN_FILE [-o OUT_FILE]
  135. A simple multi-threading tool used to update hosts file.
  136. Options:
  137. -h, --help show this help message and exit
  138. -s DNS set another dns server, default: 2001:4860:4860::8844
  139. -i IN_FILE input hosts file, default: hosts
  140. -o OUT_FILE ouput file, default: hosts.new
  141. -t QUERY_TYPE dig command query type, defalut: aaaa
  142. -n THREAD_NUM set the number of worker thread, default: 10
  143. ''')
  144. def get_config():
  145. shortopts = 'hs:i:o:t:n:'
  146. longopts = ['help']
  147. try:
  148. optlist, args = getopt.getopt(sys.argv[1:], shortopts, longopts)
  149. except getopt.GetoptError as e:
  150. print e, '\n'
  151. print_help()
  152. sys.exit(2)
  153. global config
  154. for key, value in optlist:
  155. if key == '-i':
  156. config['infile'] = value
  157. elif key == '-s':
  158. config['dns'] = value
  159. elif key == '-o':
  160. config['outfile'] = value
  161. elif key == '-t':
  162. config['querytype'] = value
  163. elif key == '-m':
  164. config['method'] = value
  165. elif key == '-n':
  166. config['threadnum'] = int(value)
  167. elif key in ('-h', '--help'):
  168. print_help()
  169. sys.exit(0)
  170. def main():
  171. get_config()
  172. global config, hosts
  173. try:
  174. with open(config['infile'], 'r') as infile:
  175. hosts = infile.readlines()
  176. except IOError as e:
  177. print e
  178. sys.exit(e.errno)
  179. if os.path.exists(config['outfile']):
  180. config['outfile'] += '.new'
  181. try:
  182. outfile = open(config['outfile'], 'w')
  183. except IOError as e:
  184. print e
  185. sys.exit(e.errno)
  186. threads = []
  187. t = watcher_thread()
  188. t.start()
  189. threads.append(t)
  190. worker_num = config['threadnum']
  191. lines_num = len(hosts)
  192. lines_per_thread = lines_num / worker_num
  193. lines_remain = lines_num % worker_num
  194. start_pt = 0
  195. for i in range(worker_num):
  196. lines_for_thread = lines_per_thread
  197. if lines_for_thread == 0 and lines_remain == 0:
  198. break
  199. if lines_remain > 0:
  200. lines_for_thread += 1
  201. lines_remain -= 1
  202. t = worker_thread(start_pt, start_pt + lines_for_thread)
  203. start_pt += lines_for_thread
  204. t.start()
  205. threads.append(t)
  206. for t in threads:
  207. t.join()
  208. try:
  209. outfile.writelines(hosts)
  210. except IOError as e:
  211. print e
  212. sys.exit(e.errno)
  213. sys.exit(0)
  214. if __name__ == '__main__':
  215. main()