proxy.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124
  1. #!/usr/bin/env python
  2. # -*- encoding: utf-8 -*-
  3. """
  4. @Contact : liuyuqi.gov@msn.cn
  5. @Time : 2024/04/09 14:08:35
  6. @License : Copyright © 2017-2022 liuyuqi. All Rights Reserved.
  7. @Desc :
  8. """
  9. import re
  10. import requests
  11. from requests.exceptions import (
  12. ChunkedEncodingError,
  13. ContentDecodingError, ConnectionError, StreamConsumedError)
  14. from requests.utils import (
  15. stream_decode_response_unicode, iter_slices, CaseInsensitiveDict)
  16. from flask import Flask, Response, redirect, request
  17. from urllib3.exceptions import (
  18. DecodeError, ReadTimeoutError, ProtocolError)
  19. from urllib.parse import quote
  20. from fgh.utils.regex_util import exp1, exp2, exp3, exp4, exp5
  21. requests.sessions.default_headers = lambda: CaseInsensitiveDict()
  22. CHUNK_SIZE = 1024 * 10
  23. def check_url(u):
  24. """ 检测URL是否GitHub链接
  25. :param u: URL
  26. :return: 匹配到返回 match object,否则返回 False
  27. """
  28. for exp in (exp1, exp2, exp3, exp4, exp5):
  29. m = exp.match(u)
  30. if m:
  31. return m
  32. return False
  33. def iter_content(self, chunk_size=1, decode_unicode=False):
  34. """rewrite requests function, set decode_content with False"""
  35. def generate():
  36. # Special case for urllib3.
  37. if hasattr(self.raw, 'stream'):
  38. try:
  39. for chunk in self.raw.stream(chunk_size, decode_content=False):
  40. yield chunk
  41. except ProtocolError as e:
  42. raise ChunkedEncodingError(e)
  43. except DecodeError as e:
  44. raise ContentDecodingError(e)
  45. except ReadTimeoutError as e:
  46. raise ConnectionError(e)
  47. else:
  48. # Standard file-like object.
  49. while True:
  50. chunk = self.raw.read(chunk_size)
  51. if not chunk:
  52. break
  53. yield chunk
  54. self._content_consumed = True
  55. if self._content_consumed and isinstance(self._content, bool):
  56. raise StreamConsumedError()
  57. elif chunk_size is not None and not isinstance(chunk_size, int):
  58. raise TypeError("chunk_size must be an int, it is instead a %s." % type(chunk_size))
  59. # simulate reading small chunks of the content
  60. reused_chunks = iter_slices(self._content, chunk_size)
  61. stream_chunks = generate()
  62. chunks = reused_chunks if self._content_consumed else stream_chunks
  63. if decode_unicode:
  64. chunks = stream_decode_response_unicode(chunks, self)
  65. return chunks
  66. def proxy(u, allow_redirects=False):
  67. """rewrite requests function, set stream=True and allow_redirects=False
  68. and add size_limit to limit the size of the content
  69. 代理请求并返回相应结果,处理了请求头部、请求的数据、重定向、异常处理等情况
  70. :param u: 要代理的 URL
  71. :param allow_redirects: 是否允许重定向
  72. :return: Response
  73. """
  74. headers = {} # 请求头
  75. r_headers = dict(request.headers)
  76. size_limit = 1024 * 1024 * 1024 * 999 # 允许的文件大小,默认999GB,相当于无限制了 https://github.com/hunshcn/gh-proxy/issues/8
  77. # 删除请求头中的 Host
  78. if 'Host' in r_headers:
  79. r_headers.pop('Host')
  80. try:
  81. url = u + request.url.replace(request.base_url, '', 1)
  82. if url.startswith('https:/') and not url.startswith('https://'):
  83. url = 'https://' + url[7:]
  84. r = requests.request(method=request.method, url=url, data=request.data,
  85. headers=r_headers, stream=True, allow_redirects=allow_redirects)
  86. headers = dict(r.headers)
  87. # 是否超过设定的 size_limit,如果超过则重定向到原始URL
  88. if 'Content-length' in r.headers and int(r.headers['Content-length']) > size_limit:
  89. return redirect(u + request.url.replace(request.base_url, '', 1))
  90. def generate():
  91. for chunk in iter_content(r, chunk_size=CHUNK_SIZE):
  92. yield chunk
  93. if 'Location' in r.headers:
  94. _location = r.headers.get('Location')
  95. if check_url(_location):
  96. headers['Location'] = '/' + _location
  97. else:
  98. return proxy(_location, True)
  99. return Response(generate(), headers=headers, status=r.status_code)
  100. except Exception as e:
  101. headers['content-type'] = 'text/html; charset=UTF-8'
  102. return Response('server error ' + str(e), status=500, headers=headers)