js_convert.py 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. import os
  2. import chardet
  3. import codecs
  4. import re
  5. def WriteFile(filePath, u, encoding="utf-8"):
  6. with codecs.open(filePath, "w", encoding) as f:
  7. print(filePath)
  8. f.write(u)
  9. def convert(src, dst):
  10. # 检测编码,coding可能检测不到编码,有异常
  11. f = open(src, "rb")
  12. coding = chardet.detect(f.read())["encoding"]
  13. f.close()
  14. with codecs.open(src, "r", coding) as f:
  15. try:
  16. pattern = 'href="javascript:if\(confirm\([^"]*"'
  17. pattern_url='(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]'
  18. data = f.read()
  19. # 获取所有正则匹配
  20. out = re.findall(pattern, data, flags=0)
  21. if out:
  22. for eachOut in out :
  23. # 取出每个链接
  24. url='href="'+re.search(pattern_url,eachOut,flags=0).group()+'"'
  25. data=data.replace(eachOut, url)
  26. WriteFile(dst, data, encoding="utf-8")
  27. except Exception:
  28. print(src + " " + coding + " read error")
  29. # 把目录中的*.java编码由gbk转换为utf-8
  30. def fix(rootdir):
  31. for parent, dirnames, filenames in os.walk(rootdir):
  32. for dirname in dirnames:
  33. # 递归函数,遍历所有子文件夹
  34. fix(dirname)
  35. for filename in filenames:
  36. if filename.endswith(".html"):
  37. convert(os.path.join(parent, filename),
  38. os.path.join(parent, filename))
  39. if __name__ == "__main__":
  40. src_path = "E:/share/linux/100/www.100.me"
  41. fix(src_path)