update_hosts.py 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. import os
  4. import sys
  5. import re
  6. import socket
  7. import getopt
  8. import threading
  9. import subprocess
  10. import shlex
  11. blackhole = (
  12. '10::2222',
  13. '101::1234',
  14. '2001::212',
  15. '2001:da8:112::21ae',
  16. '2003:ff:1:2:3:4:5fff:6',
  17. '2003:ff:1:2:3:4:5fff:7',
  18. '2003:ff:1:2:3:4:5fff:8',
  19. '2003:ff:1:2:3:4:5fff:9',
  20. '2003:ff:1:2:3:4:5fff:10',
  21. '2003:ff:1:2:3:4:5fff:11',
  22. '2003:ff:1:2:3:4:5fff:12',
  23. '21:2::2',
  24. '2123::3e12')
  25. dns = {
  26. 'google_a':'2001:4860:4860::8888',
  27. 'google_b':'2001:4860:4860::8844',
  28. 'he_net':'2001:470:20::2',
  29. 'lax_he_net':'2001:470:0:9d::2'
  30. }
  31. config = {
  32. 'dns':dns['google_b'],
  33. 'infile':'hosts',
  34. 'outfile':'hosts.new',
  35. 'querytype':'aaaa',
  36. 'threadnum':10
  37. }
  38. hosts = []
  39. class worker_thread(threading.Thread):
  40. def __init__(self, start_pt, end_pt):
  41. threading.Thread.__init__(self)
  42. self.start_pt = start_pt
  43. self.end_pt = end_pt
  44. def run(self):
  45. global hosts
  46. for i in range(self.start_pt, self.end_pt):
  47. line = hosts[i].strip()
  48. if line == "" or line[0:2] == '##':
  49. hosts[i] = line + '\r\n'
  50. continue
  51. arr = line.lstrip('#').split()
  52. if len(arr) == 1:
  53. domain = arr[0]
  54. else:
  55. domain = arr[1]
  56. flag = False
  57. if validate_domain(domain):
  58. ret = query_domain(domain, False)
  59. if ret in blackhole or not ret:
  60. ret = query_domain(domain, True)
  61. if ret and ret[:2] != ';;':
  62. flag = True
  63. arr[0] = ret
  64. if flag:
  65. if len(arr) == 1:
  66. arr.append(domain)
  67. else:
  68. arr[0] = '#' + arr[0]
  69. hosts[i] = ' '.join(arr)
  70. hosts[i] += '\r\n'
  71. def query_domain(domain, tcp):
  72. cmd = "dig +short +time=2 -6 %s @'%s' '%s'"\
  73. % (config['querytype'], config['dns'], domain)
  74. if tcp:
  75. cmd = cmd[:3] + ' +tcp' + cmd[3:]
  76. proc = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE)
  77. out, err = proc.communicate()
  78. outarr = out.splitlines()
  79. if len(outarr) == 0:
  80. ret = ''
  81. else:
  82. if validate_ip_addr(outarr[-1]):
  83. ret = outarr[-1]
  84. else:
  85. ret = ''
  86. return ret
  87. def validate_domain(domain):
  88. pattern = '^((?!-)[*A-Za-z0-9-]{1,63}(?<!-)\\.)+[A-Za-z]{2,6}$'
  89. p = re.compile(pattern)
  90. m = p.match(domain)
  91. if m:
  92. return True
  93. else:
  94. return False
  95. def validate_ip_addr(ip_addr):
  96. if ':' in ip_addr:
  97. try:
  98. socket.inet_pton(socket.AF_INET6, ip_addr)
  99. return True
  100. except socket.error:
  101. return False
  102. else:
  103. try:
  104. socket.inet_pton(socket.AF_INET, ip_addr)
  105. return True
  106. except socket.error:
  107. return False
  108. def print_help():
  109. print('''usage: update_hosts [-h] [-s DNS] [-t QUERY_TYPE] [-n THREAD_NUM]
  110. -i IN_FILE [-o OUT_FILE]
  111. A simple multi-threading tool used to update hosts file.
  112. Options:
  113. -h, --help show this help message and exit
  114. -s DNS set another dns server, default: 2001:4860:4860::8844
  115. -i IN_FILE input hosts file, default: hosts
  116. -o OUT_FILE ouput file, default: hosts.new
  117. -t QUERY_TYPE dig command query type, defalut: aaaa
  118. -n THREAD_NUM set the number of worker thread, default: 10
  119. ''')
  120. def get_config():
  121. shortopts = 'hs:i:o:t:n'
  122. longopts = ['help']
  123. try:
  124. optlist, args = getopt.getopt(sys.argv[1:], shortopts, longopts)
  125. except getopt.GetoptError as e:
  126. print e
  127. print_help()
  128. sys.exit(2)
  129. global config
  130. for key, value in optlist:
  131. if key == '-i':
  132. config['infile'] = value
  133. elif key == '-s':
  134. config['dns'] = value
  135. elif key == '-o':
  136. config['outfile'] = value
  137. elif key == '-t':
  138. config['querytype'] = value
  139. elif key == '-m':
  140. config['method'] = value
  141. elif key == '-n':
  142. config['threadnum'] = int(value)
  143. elif key in ('-h', '--help'):
  144. print_help()
  145. sys.exit(0)
  146. def main():
  147. get_config()
  148. global config
  149. try:
  150. infile = open(config['infile'], 'r')
  151. except IOError as e:
  152. print e
  153. sys.exit(e.errno)
  154. if os.path.exists(config['outfile']):
  155. config['outfile'] += '.new'
  156. try:
  157. outfile = open(config['outfile'], 'w')
  158. except IOError as e:
  159. print e
  160. sys.exit(e.errno)
  161. global hosts
  162. hosts = infile.readlines()
  163. threads = []
  164. thread_num = config['threadnum']
  165. lines_num = len(hosts)
  166. lines_per_thread = lines_num / thread_num
  167. lines_remain = lines_num % thread_num
  168. start_pt = 0
  169. for i in range(thread_num):
  170. lines_for_thread = lines_per_thread
  171. if lines_for_thread == 0 and lines_remain == 0:
  172. break
  173. if lines_remain > 0:
  174. lines_for_thread += 1
  175. lines_remain -= 1
  176. t = worker_thread(start_pt, start_pt + lines_for_thread)
  177. start_pt += lines_for_thread
  178. t.start()
  179. threads.append(t)
  180. for t in threads:
  181. t.join()
  182. try:
  183. outfile.writelines(hosts)
  184. except IOError as e:
  185. print e
  186. sys.exit(e.errno)
  187. sys.exit(0)
  188. if __name__ == '__main__':
  189. main()