7 months ago · 4512daab28
--- a/.env.example
+++ b/.env.example
@@ -1,6 +1,3 @@
 
				-"token": "xx"
			
 
				-"user_agent": "xx"
			
 
				-"base_url": "https://api.yuque.com/api/v2"
			
 
				-"data_path": "data"
			
 
				-
			
 
				+token=xx
			
 
				+cookie=_yuque_session 
			
 
				 
			
--- a/README.md
+++ b/README.md
@@ -9,13 +9,16 @@
 
				 
			
 
				 复制文档url，执行如下命令：
			
 
				 ```
			
 
				-python main.py -url https://www.yuque.com/burpheart/phpaudit
			
 
				+python main.py markdown -url https://www.yuque.com/burpheart/phpaudit
			
 
				 
			
 
				 wget https://fileshare.yoqi.me/d/dl/c/Python/crawl_yuque/crawl_yuque
			
 
				 chmod +x crawl_yuque
			
 
				-./crawl_yuque -url https://www.yuque.com/burpheart/phpaudit
			
 
				+./crawl_yuque markdown -url https://www.yuque.com/burpheart/phpaudit
			
 
				+
			
 
				+https://www.yuque.com/yuque/thyzgp
			
 
				 ```
			
 
				 
			
 
				+私有文档配置 .env 文件，chrome 获取cookie填入即可，登录状态可以看到的项目都可以获取。
			
 
				 
			
 
				 ## 源码分析
			
 
				 
			
@@ -39,3 +42,5 @@ Licensed under the [Apache 2.0](LICENSE) © [liuyuqi.gov@msn.cn](https://github.
 
				 ## Reference
			
 
				 
			
 
				 目前有一些其他语言，如php,node 实现的采集工具，本项目实现的主要用途针对自己的项目，导出markdown文件，方便多平台同步。
			
 
				+
			
 
				+- [gxr404/yuque-dl](https://github.com/gxr404/yuque-dl)
			
--- a/crawl_yuque/__init__.py
+++ b/crawl_yuque/__init__.py
@@ -8,7 +8,38 @@
 
				 '''
			
 
				 
			
 
				 from crawl_yuque.yuque import YuQue
			
 
				+import sys,re,os
			
 
				+from crawl_yuque.options import parser_args
			
 
				 
			
 
				-
			
 
				-def main():
			
 
				-    YuQue().run()
			
 
				+def main(argv=None):
			
 
				+    """Main entry point of the program"""
			
 
				+    try:
			
 
				+        args = parser_args()
			
 
				+        if args.get('version'):
			
 
				+            print("0.0.1")
			
 
				+            sys.exit(0)
			
 
				+        command = args.get('command','')
			
 
				+        if command == '':
			
 
				+            # logging.error("command is empty")
			
 
				+            # argparser.print_help()
			
 
				+            sys.exit(1)
			
 
				+        if command =="serve" or command =="server":
			
 
				+            # from apps import create_app
			
 
				+            # app = create_app()
			
 
				+            # app.run(host='127.0.0.1', port=5000, debug=True)
			
 
				+            return
			
 
				+        if command == "markdown":
			
 
				+            crawl = YuQue(args)
			
 
				+            if(args.url != ''):
			
 
				+                url = args.url
			
 
				+                crawl.get_book(url=url)
			
 
				+            else:
			
 
				+                url = input("请输入语雀文档链接：")
			
 
				+                crawl.get_book(url=url)
			
 
				+        if command == "help":
			
 
				+            return
			
 
				+        if command == "pdf":
			
 
				+            crawl = YuQue(args)
			
 
				+            crawl.pdf()
			
 
				+    except KeyboardInterrupt:
			
 
				+        sys.exit('\nERROR: Interrupted by user')
			
--- a/crawl_yuque/options.py
+++ b/crawl_yuque/options.py
@@ -13,30 +13,18 @@ import shlex
 
				 import dotenv

			
 
				 from collections import OrderedDict

			
 
				 from .utils.str_util import preferredencoding

			
 
				-

			
 
				+from crawl_yuque.utils.frozen_dir import get_app_path

			
 
				 

			
 
				 def parser_args(overrideArguments=None):

			
 
				     """解析参数"""

			
 
				 

			
 
				     argparser = argparse.ArgumentParser()

			
 
				-    argparser.add_argument('-c', '--config', help='config file', default='config.ini')

			
 
				     argparser.add_argument(

			
 
				         'command',

			
 
				         help='command: ',

			
 
				-        choices=['create', 'clone', 'push', 'delete', 'pull'],

			
 
				-    )

			
 
				-    argparser.add_argument('-d', '--debug', help='debug mode', action='store_true')

			
 
				-    argparser.add_argument(

			
 
				-        '-p',

			
 
				-        '--platform',

			
 
				-        help='set a platform',

			
 
				-        choices=['github', 'gitee', 'gitlab', 'gogs', 'gitea', 'bitbucket', 'coding'],

			
 
				-        default='github',

			
 
				+        choices=['markdown', 'pdf', 'serve', 'version', 'help'],

			
 
				     )

			
 
				-    argparser.add_argument('-token', '--token', help='set a token')

			
 
				-    argparser.add_argument(

			
 
				-        '-repo_path', '--repo_path', help='set a repo'

			
 
				-    )  # , default=os.getcwd())

			
 
				+    argparser.add_argument('-url', '--url', help='please input a url', type=str)

			
 
				     args = argparser.parse_args()

			
 
				 

			
 
				     # remove None

			
@@ -52,8 +40,10 @@ def parser_args(overrideArguments=None):
 
				 

			
 
				     system_conf.update(user_conf)

			
 
				     system_conf.update(command_line_conf)

			
 
				-    if args.command == None and args.extractor == None:

			
 
				-        raise 'Error, please input cmd and extractor params11'

			
 
				+    app_path = get_app_path()

			
 
				+    system_conf["app_path"] = app_path

			
 
				+    # if args.command == None and args.extractor == None:

			
 
				+    #     raise 'Error, please input cmd and extractor params11'

			
 
				     return system_conf

			
 
				 

			
 
				 

			
@@ -67,7 +57,7 @@ def _read_custom_conf(config_path: str) -> OrderedDict:
 
				 

			
 
				     try:

			
 
				         with open(config_path, 'r', encoding=preferredencoding()) as f:

			
 
				-            contents = f.read()

			
 
				+            contents: str = f.read()

			
 
				             res = compat_shlex_split(contents, comments=True)

			
 
				     except Exception as e:

			
 
				         return []

			
@@ -77,7 +67,7 @@ def _read_custom_conf(config_path: str) -> OrderedDict:
 
				 def _read_user_conf() -> OrderedDict:

			
 
				     """读取用户配置文件: .env 文件"""

			
 
				     user_conf = OrderedDict()

			
 
				-    dotenv_path = '.env'

			
 
				+    dotenv_path = os.path.join(get_app_path(), '.env')

			
 
				     if os.path.exists(dotenv_path):

			
 
				         user_conf = dotenv.dotenv_values(dotenv_path)

			
 
				     return OrderedDict(user_conf)

			
--- a/crawl_yuque/yuque.py
+++ b/crawl_yuque/yuque.py
@@ -60,8 +60,9 @@ class YuQue(object):
 
				         md = ""
			
 
				         table = str.maketrans('\/:*?"<>|' + "\n\r", "___________")
			
 
				         prename = ""
			
 
				-        if (os.path.exists("download/" + str(docsjson['book']['id'])) == False):
			
 
				-            os.makedirs("download/" + str(docsjson['book']['id']))
			
 
				+        download_dir= os.path.join(self.args["app_path"], "download", str(docsjson['book']['id']))
			
 
				+        if (os.path.exists(download_dir) == False):
			
 
				+            os.makedirs(download_dir)
			
 
				         # 遍历文档
			
 
				         for doc in docsjson['book']['toc']:
			
 
				             # 创建目录
			
@@ -80,8 +81,8 @@ class YuQue(object):
 
				                     else:
			
 
				                         temp[doc['uuid']] = list[uuid]['0'].translate(table) + '/' + temp[doc['uuid']]
			
 
				                         break
			
 
				-                if ((os.path.exists("download/" + str(docsjson['book']['id']) + '/' + temp[doc['uuid']])) == False):
			
 
				-                    os.makedirs("download/" + str(docsjson['book']['id']) + '/' + temp[doc['uuid']])
			
 
				+                if ((os.path.exists(f"{download_dir}/" + temp[doc['uuid']])) == False):
			
 
				+                    os.makedirs(f"{download_dir}/" + temp[doc['uuid']])
			
 
				                 if (temp[doc['uuid']].endswith("/")):
			
 
				                     md += "## " + temp[doc['uuid']][:-1] + "\n"
			
 
				                 else:
			
@@ -96,22 +97,17 @@ class YuQue(object):
 
				                         md += "  " * temp[doc['parent_uuid']].count("/") + "* [" + doc['title'] + "](" + urllib.parse.quote(
			
 
				                             temp[doc['parent_uuid']] + "/" + doc['title'].translate(table) + '.md') + ")" + "\n"
			
 
				                     self.save_page(str(docsjson['book']['id']), doc['url'],
			
 
				-                            "download/" + str(docsjson['book']['id']) + '/' + temp[doc['parent_uuid']] + "/" + doc[
			
 
				+                            f"{download_dir}/" + temp[doc['parent_uuid']] + "/" + doc[
			
 
				                                 'title'].translate(table) + '.md')
			
 
				                 else:
			
 
				                     md += " " + "* [" + doc['title'] + "](" + urllib.parse.quote(
			
 
				                         doc['title'].translate(table) + '.md') + ")" + "\n"
			
 
				                     self.save_page(str(docsjson['book']['id']), doc['url'],
			
 
				-                            "download/" + str(docsjson['book']['id']) + "/" + doc[
			
 
				+                            f"{download_dir}/" + doc[
			
 
				                                 'title'].translate(table) + '.md')
			
 
				-        with open("download/" + str(docsjson['book']['id']) + '/' + "/SUMMARY.md", 'w', encoding='utf-8') as f:
			
 
				+        with open(f"{download_dir}" + "/SUMMARY.md", 'w', encoding='utf-8') as f:
			
 
				             f.write(md)
			
 
				 
			
 
				-    def run(self):
			
 
				-        ''' 获取文档 '''
			
 
				-        if(self.args.url != ''):
			
 
				-            url = self.args.url
			
 
				-            self.get_book(url)
			
 
				-        else:
			
 
				-            url = input("请输入语雀文档链接：")
			
 
				-            self.get_book(url=url)
			
 
				+    def pdf(self):
			
 
				+        """ 生成pdf """
			
 
				+        pass