Browse Source

a multi-threading tool to update hosts

lennylxx 9 years ago
parent
commit
edf1c9b52c
2 changed files with 237 additions and 99 deletions
  1. 237 0
      update_hosts.py
  2. 0 99
      update_hosts.sh

+ 237 - 0
update_hosts.py

@@ -0,0 +1,237 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import os
+import sys
+import re
+import socket
+import getopt
+import threading
+import subprocess
+import shlex
+
+blackhole = (
+'10::2222',
+'101::1234',
+'2001::212',
+'2001:da8:112::21ae',
+'2003:ff:1:2:3:4:5fff:6',
+'2003:ff:1:2:3:4:5fff:7',
+'2003:ff:1:2:3:4:5fff:8',
+'2003:ff:1:2:3:4:5fff:9',
+'2003:ff:1:2:3:4:5fff:10',
+'2003:ff:1:2:3:4:5fff:11',
+'2003:ff:1:2:3:4:5fff:12',
+'21:2::2',
+'2123::3e12')
+
+dns = {
+'google_a':'2001:4860:4860::8888',
+'google_b':'2001:4860:4860::8844',
+'he_net':'2001:470:20::2',
+'lax_he_net':'2001:470:0:9d::2'
+}
+
+config = {
+'dns':dns['google_b'],
+'infile':'hosts',
+'outfile':'hosts.new',
+'querytype':'aaaa',
+'threadnum':10
+}
+
+hosts = []
+
+class worker_thread(threading.Thread):
+    def __init__(self, start_pt, end_pt):
+        threading.Thread.__init__(self)
+        self.start_pt = start_pt
+        self.end_pt = end_pt
+    
+    def run(self):
+        global hosts
+        for i in range(self.start_pt, self.end_pt):
+            line = hosts[i].strip()
+            
+            if line == "" or line[0:2] == '##':
+                hosts[i] = line + '\r\n'
+                continue
+
+            arr = line.lstrip('#').split()
+
+            if len(arr) == 1:
+                domain = arr[0]
+            else:
+                domain = arr[1]
+
+            flag = False
+            if validate_domain(domain):
+                ret = query_domain(domain, False)
+
+                if ret in blackhole or not ret:
+                    ret = query_domain(domain, True)
+
+                if ret and ret[:24] != ';; connection timed out;':
+                    flag = True
+                    arr[0] = ret
+
+            if flag:
+                if len(arr) == 1:
+                   arr.append(domain)
+            else:
+                arr[0] = '#' + arr[0]
+
+            hosts[i] = ' '.join(arr)
+            hosts[i] += '\r\n'
+
+def query_domain(domain, tcp):
+    cmd = "dig +short +time=2 -6 %s @'%s' '%s'"\
+        % (config['querytype'], config['dns'], domain)
+
+    if tcp:
+        cmd = cmd[:3] + ' +tcp' + cmd[3:]
+
+    proc = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE)
+    out, err = proc.communicate()
+
+    outarr = out.splitlines()
+
+    if len(outarr) == 0:
+        ret = ''
+    else:
+        if validate_ip_addr(outarr[-1]):
+            ret = outarr[-1]
+        else:
+            ret = ''
+
+    return ret
+
+def validate_domain(domain):
+    pattern = '^((?!-)[*A-Za-z0-9-]{1,63}(?<!-)\\.)+[A-Za-z]{2,6}$'
+    p = re.compile(pattern)
+    m = p.match(domain)
+    if m:
+        return True
+    else:
+        return False
+
+def validate_ip_addr(ip_addr):
+    if ':' in ip_addr:
+        try:
+            socket.inet_pton(socket.AF_INET6, ip_addr)
+            return True
+        except socket.error:
+            return False
+    else:
+        try:
+            socket.inet_pton(socket.AF_INET, ip_addr)
+            return True
+        except socket.error:
+            return False
+
+def print_help():
+    print('''usage: update_hosts [-h] [-s DNS] [-t QUERY_TYPE] [-n THREAD_NUM]
+                -i IN_FILE [-o OUT_FILE]
+A simple multi-threading tool used to update hosts file.
+
+Options:
+  -h, --help             show this help message and exit
+  -s DNS                 set another dns server, default: 2001:4860:4860::8844
+  -i IN_FILE             input hosts file, default: hosts
+  -o OUT_FILE            ouput file, default: hosts.new
+  -t QUERY_TYPE          dig command query type, defalut: aaaa
+  -n THREAD_NUM          set the number of worker thread, default: 10
+''')
+
+def get_config():
+    shortopts = 'hs:i:o:t:n'
+    longopts = ['help']
+
+    try:
+        optlist, args = getopt.getopt(sys.argv[1:], shortopts, longopts)   
+    except getopt.GetoptError as e:
+        print e
+        print_help()
+        sys.exit(2)
+    
+    global config
+    
+    for key, value in optlist:
+        if key == '-i':
+            config['infile'] = value
+        elif key == '-s':
+            config['dns'] = value
+        elif key == '-o':
+            config['outfile'] = value
+        elif key == '-t':
+            config['querytype'] = value
+        elif key == '-m':
+            config['method'] = value
+        elif key == '-n':
+            config['threadnum'] = int(value)
+        elif key in ('-h', '--help'):
+            print_help()
+            sys.exit(0)
+
+def main():
+    get_config()
+
+    global config
+
+    try:
+        infile = open(config['infile'], 'r')
+    except IOError as e:
+        print e
+        sys.exit(e.errno)
+
+    if os.path.exists(config['outfile']):
+        config['outfile'] += '.new'
+    
+    try:
+        outfile = open(config['outfile'], 'w')
+    except IOError as e:
+        print e
+        sys.exit(e.errno)
+    
+    global hosts
+    hosts = infile.readlines()
+
+    threads = []
+    thread_num = config['threadnum']
+    lines_num = len(hosts)
+
+    lines_per_thread = lines_num / thread_num
+    lines_remain = lines_num % thread_num
+
+    start_pt = 0
+
+    for i in range(thread_num):
+        lines_for_thread = lines_per_thread
+
+        if lines_for_thread == 0 and lines_remain == 0:
+            break
+
+        if lines_remain > 0:
+            lines_for_thread += 1
+            lines_remain -= 1
+
+        t = worker_thread(start_pt, start_pt + lines_for_thread)
+        start_pt += lines_for_thread
+        
+        t.start()
+        threads.append(t)
+
+    for t in threads:
+        t.join()
+
+    try:
+        outfile.writelines(hosts)
+    except IOError as e:
+        print e
+        sys.exit(e.errno)
+    
+    sys.exit(0)
+
+if __name__ == '__main__':
+    main()
+

+ 0 - 99
update_hosts.sh

@@ -1,99 +0,0 @@
-#!/bin/bash
-
-if [ $# -ne 2 ]; then
-    echo -e "Usage:"
-    echo -e "    $ ./update_hosts.sh hosts new_hosts\n"
-    exit 1
-fi
-
-hosts_file=$1
-new_hosts_file=$2
-
-if [ ! -f "$hosts_file" ]; then
-    echo -e $1" doesn't exists, plz retry!\n"
-    exit 1
-fi
-
-if [ -f $new_hosts_file ]; then
-    new_hosts_file=${new_hosts_file}".new"
-    printf "" > $new_hosts_file
-fi
-
-he_net="2001:470:20::2"
-lax1_he_net="2001:470:0:9d::2"
-google_a="2001:4860:4860::8888"
-google_b="2001:4860:4860::8844"
-jp="203.112.2.4"
-
-dns=$lax1_he_net
-
-blackhole=(
-'10::2222'
-'101::1234'
-'21:2::2'
-'2001::212'
-'2001:da8:112::21ae'
-'2003:ff:1:2:3:4:5fff:6'
-'2003:ff:1:2:3:4:5fff:7'
-'2003:ff:1:2:3:4:5fff:8'
-'2003:ff:1:2:3:4:5fff:9'
-'2003:ff:1:2:3:4:5fff:10'
-'2003:ff:1:2:3:4:5fff:11'
-'2003:ff:1:2:3:4:5fff:12'
-'2123::3e12')
-
-num=1
-
-while read line
-do
-{
-    #delete CR
-    line=$(printf "$line"|tr -d '\r')
-    #printf "$line"|od -tx1
-
-    if [[ $line == "" ]]; then 
-        printf "\r\n" >> $new_hosts_file
-        continue
-    fi
-
-    if [ "${line:0:2}" == "##" ]; then 
-        printf "$line\r\n" >> $new_hosts_file
-        continue
-    fi
-    
-    if [ "${line:0:1}" == "#" ]; then 
-        line=${line#'#'}
-    fi
-    
-    url=$(printf "$line"|cut -d" " -f2)
-
-    result=$(nslookup -querytype=AAAA "$url" "$dns"|grep 'AAAA address'|head -1)
-    
-    name=$(printf "$result"|cut -f1)
-    ip=$(printf "$result"|cut -d' ' -f4)
-    
-    for var in "${blackhole[@]}"; do
-    if [[ $ip == "$var" && $ip != "" ]]; then
-        ip=$(nslookup -vc -querytype=AAAA "$url" "$dns"|grep 'AAAA address'|cut -d' ' -f4)
-        break
-    fi
-    done
-    
-    if [[ $ip == "" ]]; then
-        printf "#$line\r\n" >> $new_hosts_file
-        continue
-    fi
-
-    if [[ $name != $url && $name != "" ]]; then
-        url=${url}" #"${name}
-    fi
-
-    printf "$ip $url\r\n" >> $new_hosts_file
-
-    #print log to stdio
-    echo "$num" "$ip" "$url"
-    num=$((num+1))
-}
-done < $hosts_file
-
-exit 0