123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124 |
- #!/usr/bin/env python
- # -*- encoding: utf-8 -*-
- """
- @Contact : liuyuqi.gov@msn.cn
- @Time : 2024/04/09 14:08:35
- @License : Copyright © 2017-2022 liuyuqi. All Rights Reserved.
- @Desc :
- """
- import re
- import requests
- from requests.exceptions import (
- ChunkedEncodingError,
- ContentDecodingError, ConnectionError, StreamConsumedError)
- from requests.utils import (
- stream_decode_response_unicode, iter_slices, CaseInsensitiveDict)
- from flask import Flask, Response, redirect, request
- from urllib3.exceptions import (
- DecodeError, ReadTimeoutError, ProtocolError)
- from urllib.parse import quote
- from fgh.utils.regex_util import exp1, exp2, exp3, exp4, exp5
- requests.sessions.default_headers = lambda: CaseInsensitiveDict()
- CHUNK_SIZE = 1024 * 10
- def check_url(u):
- """ 检测URL是否GitHub链接
- :param u: URL
- :return: 匹配到返回 match object,否则返回 False
- """
- for exp in (exp1, exp2, exp3, exp4, exp5):
- m = exp.match(u)
- if m:
- return m
- return False
- def iter_content(self, chunk_size=1, decode_unicode=False):
- """rewrite requests function, set decode_content with False"""
- def generate():
- # Special case for urllib3.
- if hasattr(self.raw, 'stream'):
- try:
- for chunk in self.raw.stream(chunk_size, decode_content=False):
- yield chunk
- except ProtocolError as e:
- raise ChunkedEncodingError(e)
- except DecodeError as e:
- raise ContentDecodingError(e)
- except ReadTimeoutError as e:
- raise ConnectionError(e)
- else:
- # Standard file-like object.
- while True:
- chunk = self.raw.read(chunk_size)
- if not chunk:
- break
- yield chunk
- self._content_consumed = True
- if self._content_consumed and isinstance(self._content, bool):
- raise StreamConsumedError()
- elif chunk_size is not None and not isinstance(chunk_size, int):
- raise TypeError("chunk_size must be an int, it is instead a %s." % type(chunk_size))
- # simulate reading small chunks of the content
- reused_chunks = iter_slices(self._content, chunk_size)
- stream_chunks = generate()
- chunks = reused_chunks if self._content_consumed else stream_chunks
- if decode_unicode:
- chunks = stream_decode_response_unicode(chunks, self)
- return chunks
- def proxy(u, allow_redirects=False):
- """rewrite requests function, set stream=True and allow_redirects=False
- and add size_limit to limit the size of the content
- 代理请求并返回相应结果,处理了请求头部、请求的数据、重定向、异常处理等情况
- :param u: 要代理的 URL
- :param allow_redirects: 是否允许重定向
- :return: Response
- """
- headers = {} # 请求头
- r_headers = dict(request.headers)
- size_limit = 1024 * 1024 * 1024 * 999 # 允许的文件大小,默认999GB,相当于无限制了 https://github.com/hunshcn/gh-proxy/issues/8
- # 删除请求头中的 Host
- if 'Host' in r_headers:
- r_headers.pop('Host')
- try:
- url = u + request.url.replace(request.base_url, '', 1)
- if url.startswith('https:/') and not url.startswith('https://'):
- url = 'https://' + url[7:]
- r = requests.request(method=request.method, url=url, data=request.data,
- headers=r_headers, stream=True, allow_redirects=allow_redirects)
- headers = dict(r.headers)
-
- # 是否超过设定的 size_limit,如果超过则重定向到原始URL
- if 'Content-length' in r.headers and int(r.headers['Content-length']) > size_limit:
- return redirect(u + request.url.replace(request.base_url, '', 1))
- def generate():
- for chunk in iter_content(r, chunk_size=CHUNK_SIZE):
- yield chunk
- if 'Location' in r.headers:
- _location = r.headers.get('Location')
- if check_url(_location):
- headers['Location'] = '/' + _location
- else:
- return proxy(_location, True)
- return Response(generate(), headers=headers, status=r.status_code)
- except Exception as e:
- headers['content-type'] = 'text/html; charset=UTF-8'
- return Response('server error ' + str(e), status=500, headers=headers)
|