Browse Source

refactor: update CLI commands and improve argument parsing

liuyuqi-dellpc 7 months ago
parent
commit
4512daab28
5 changed files with 63 additions and 44 deletions
  1. 2 5
      .env.example
  2. 7 2
      README.md
  3. 34 3
      crawl_yuque/__init__.py
  4. 9 19
      crawl_yuque/options.py
  5. 11 15
      crawl_yuque/yuque.py

+ 2 - 5
.env.example

@@ -1,6 +1,3 @@
-"token": "xx"
-"user_agent": "xx"
-"base_url": "https://api.yuque.com/api/v2"
-"data_path": "data"
-
+token=xx
+cookie=_yuque_session 
 

+ 7 - 2
README.md

@@ -9,13 +9,16 @@
 
 复制文档url,执行如下命令:
 ```
-python main.py -url https://www.yuque.com/burpheart/phpaudit
+python main.py markdown -url https://www.yuque.com/burpheart/phpaudit
 
 wget https://fileshare.yoqi.me/d/dl/c/Python/crawl_yuque/crawl_yuque
 chmod +x crawl_yuque
-./crawl_yuque -url https://www.yuque.com/burpheart/phpaudit
+./crawl_yuque markdown -url https://www.yuque.com/burpheart/phpaudit
+
+https://www.yuque.com/yuque/thyzgp
 ```
 
+私有文档配置 .env 文件,chrome 获取cookie填入即可,登录状态可以看到的项目都可以获取。
 
 ## 源码分析
 
@@ -39,3 +42,5 @@ Licensed under the [Apache 2.0](LICENSE) © [liuyuqi.gov@msn.cn](https://github.
 ## Reference
 
 目前有一些其他语言,如php,node 实现的采集工具,本项目实现的主要用途针对自己的项目,导出markdown文件,方便多平台同步。
+
+- [gxr404/yuque-dl](https://github.com/gxr404/yuque-dl)

+ 34 - 3
crawl_yuque/__init__.py

@@ -8,7 +8,38 @@
 '''
 
 from crawl_yuque.yuque import YuQue
+import sys,re,os
+from crawl_yuque.options import parser_args
 
-
-def main():
-    YuQue().run()
+def main(argv=None):
+    """Main entry point of the program"""
+    try:
+        args = parser_args()
+        if args.get('version'):
+            print("0.0.1")
+            sys.exit(0)
+        command = args.get('command','')
+        if command == '':
+            # logging.error("command is empty")
+            # argparser.print_help()
+            sys.exit(1)
+        if command =="serve" or command =="server":
+            # from apps import create_app
+            # app = create_app()
+            # app.run(host='127.0.0.1', port=5000, debug=True)
+            return
+        if command == "markdown":
+            crawl = YuQue(args)
+            if(args.url != ''):
+                url = args.url
+                crawl.get_book(url=url)
+            else:
+                url = input("请输入语雀文档链接:")
+                crawl.get_book(url=url)
+        if command == "help":
+            return
+        if command == "pdf":
+            crawl = YuQue(args)
+            crawl.pdf()
+    except KeyboardInterrupt:
+        sys.exit('\nERROR: Interrupted by user')

+ 9 - 19
crawl_yuque/options.py

@@ -13,30 +13,18 @@ import shlex
 import dotenv
 from collections import OrderedDict
 from .utils.str_util import preferredencoding
-
+from crawl_yuque.utils.frozen_dir import get_app_path
 
 def parser_args(overrideArguments=None):
     """解析参数"""
 
     argparser = argparse.ArgumentParser()
-    argparser.add_argument('-c', '--config', help='config file', default='config.ini')
     argparser.add_argument(
         'command',
         help='command: ',
-        choices=['create', 'clone', 'push', 'delete', 'pull'],
-    )
-    argparser.add_argument('-d', '--debug', help='debug mode', action='store_true')
-    argparser.add_argument(
-        '-p',
-        '--platform',
-        help='set a platform',
-        choices=['github', 'gitee', 'gitlab', 'gogs', 'gitea', 'bitbucket', 'coding'],
-        default='github',
+        choices=['markdown', 'pdf', 'serve', 'version', 'help'],
     )
-    argparser.add_argument('-token', '--token', help='set a token')
-    argparser.add_argument(
-        '-repo_path', '--repo_path', help='set a repo'
-    )  # , default=os.getcwd())
+    argparser.add_argument('-url', '--url', help='please input a url', type=str)
     args = argparser.parse_args()
 
     # remove None
@@ -52,8 +40,10 @@ def parser_args(overrideArguments=None):
 
     system_conf.update(user_conf)
     system_conf.update(command_line_conf)
-    if args.command == None and args.extractor == None:
-        raise 'Error, please input cmd and extractor params11'
+    app_path = get_app_path()
+    system_conf["app_path"] = app_path
+    # if args.command == None and args.extractor == None:
+    #     raise 'Error, please input cmd and extractor params11'
     return system_conf
 
 
@@ -67,7 +57,7 @@ def _read_custom_conf(config_path: str) -> OrderedDict:
 
     try:
         with open(config_path, 'r', encoding=preferredencoding()) as f:
-            contents = f.read()
+            contents: str = f.read()
             res = compat_shlex_split(contents, comments=True)
     except Exception as e:
         return []
@@ -77,7 +67,7 @@ def _read_custom_conf(config_path: str) -> OrderedDict:
 def _read_user_conf() -> OrderedDict:
     """读取用户配置文件: .env 文件"""
     user_conf = OrderedDict()
-    dotenv_path = '.env'
+    dotenv_path = os.path.join(get_app_path(), '.env')
     if os.path.exists(dotenv_path):
         user_conf = dotenv.dotenv_values(dotenv_path)
     return OrderedDict(user_conf)

+ 11 - 15
crawl_yuque/yuque.py

@@ -60,8 +60,9 @@ class YuQue(object):
         md = ""
         table = str.maketrans('\/:*?"<>|' + "\n\r", "___________")
         prename = ""
-        if (os.path.exists("download/" + str(docsjson['book']['id'])) == False):
-            os.makedirs("download/" + str(docsjson['book']['id']))
+        download_dir= os.path.join(self.args["app_path"], "download", str(docsjson['book']['id']))
+        if (os.path.exists(download_dir) == False):
+            os.makedirs(download_dir)
         # 遍历文档
         for doc in docsjson['book']['toc']:
             # 创建目录
@@ -80,8 +81,8 @@ class YuQue(object):
                     else:
                         temp[doc['uuid']] = list[uuid]['0'].translate(table) + '/' + temp[doc['uuid']]
                         break
-                if ((os.path.exists("download/" + str(docsjson['book']['id']) + '/' + temp[doc['uuid']])) == False):
-                    os.makedirs("download/" + str(docsjson['book']['id']) + '/' + temp[doc['uuid']])
+                if ((os.path.exists(f"{download_dir}/" + temp[doc['uuid']])) == False):
+                    os.makedirs(f"{download_dir}/" + temp[doc['uuid']])
                 if (temp[doc['uuid']].endswith("/")):
                     md += "## " + temp[doc['uuid']][:-1] + "\n"
                 else:
@@ -96,22 +97,17 @@ class YuQue(object):
                         md += "  " * temp[doc['parent_uuid']].count("/") + "* [" + doc['title'] + "](" + urllib.parse.quote(
                             temp[doc['parent_uuid']] + "/" + doc['title'].translate(table) + '.md') + ")" + "\n"
                     self.save_page(str(docsjson['book']['id']), doc['url'],
-                            "download/" + str(docsjson['book']['id']) + '/' + temp[doc['parent_uuid']] + "/" + doc[
+                            f"{download_dir}/" + temp[doc['parent_uuid']] + "/" + doc[
                                 'title'].translate(table) + '.md')
                 else:
                     md += " " + "* [" + doc['title'] + "](" + urllib.parse.quote(
                         doc['title'].translate(table) + '.md') + ")" + "\n"
                     self.save_page(str(docsjson['book']['id']), doc['url'],
-                            "download/" + str(docsjson['book']['id']) + "/" + doc[
+                            f"{download_dir}/" + doc[
                                 'title'].translate(table) + '.md')
-        with open("download/" + str(docsjson['book']['id']) + '/' + "/SUMMARY.md", 'w', encoding='utf-8') as f:
+        with open(f"{download_dir}" + "/SUMMARY.md", 'w', encoding='utf-8') as f:
             f.write(md)
 
-    def run(self):
-        ''' 获取文档 '''
-        if(self.args.url != ''):
-            url = self.args.url
-            self.get_book(url)
-        else:
-            url = input("请输入语雀文档链接:")
-            self.get_book(url=url)
+    def pdf(self):
+        """ 生成pdf """
+        pass