liuyuqi-dellpc 9 months ago
parent
commit
bc243c6901

+ 27 - 0
.dockerignore

@@ -0,0 +1,27 @@
+**/__pycache__
+**/.venv
+**/.classpath
+**/.dockerignore
+**/.env
+**/.git
+**/.gitignore
+**/.project
+**/.settings
+**/.toolstarget
+**/.vs
+**/.vscode
+**/*.*proj.user
+**/*.dbmdl
+**/*.jfm
+**/bin
+**/charts
+**/docker-compose*
+**/compose*
+**/Dockerfile*
+**/node_modules
+**/npm-debug.log
+**/obj
+**/secrets.dev.yaml
+**/values.dev.yaml
+LICENSE
+README.md

+ 2 - 0
.env

@@ -0,0 +1,2 @@
+year = []
+cookie = 

+ 5 - 6
.github/workflows/build.yml

@@ -28,20 +28,19 @@ jobs:
       - name: Set Up Python
         uses: actions/setup-python@v4
         with:
-          python-version: '3.9'
+          python-version: '3.12'
           cache: pip
           cache-dependency-path: '**/requirements*.txt'
 
       - name: Install Dependencies
         run: |
-          python -m pip install --upgrade pip wheel setuptools
-          pip install -r requirements.txt
-          python -m pip install pyinstaller
+          python -m pip install --upgrade pip wheel setuptools poetry
+          poetry config virtualenvs.in-project true
+          poetry install
 
       - name: Build Package
         run: |
-          python -m PyInstaller -F -c  -i favicon.ico --name crawl_sse main.py
-
+          poetry run python -m PyInstaller crawl_sse.spec
       - name: Update to ali oss
         uses: yizhoumo/setup-ossutil@v1
         with:

+ 19 - 0
.vscode/launch.json

@@ -0,0 +1,19 @@
+{
+    "configurations": [
+        {
+            "name": "Docker: Python - General",
+            "type": "docker",
+            "request": "launch",
+            "preLaunchTask": "docker-run: debug",
+            "python": {
+                "pathMappings": [
+                    {
+                        "localRoot": "${workspaceFolder}",
+                        "remoteRoot": "/app"
+                    }
+                ],
+                "projectType": "general"
+            }
+        }
+    ]
+}

+ 26 - 0
.vscode/tasks.json

@@ -0,0 +1,26 @@
+{
+	"version": "2.0.0",
+	"tasks": [
+		{
+			"type": "docker-build",
+			"label": "docker-build",
+			"platform": "python",
+			"dockerBuild": {
+				"tag": "crawlsse:latest",
+				"dockerfile": "${workspaceFolder}/Dockerfile",
+				"context": "${workspaceFolder}",
+				"pull": true
+			}
+		},
+		{
+			"type": "docker-run",
+			"label": "docker-run: debug",
+			"dependsOn": [
+				"docker-build"
+			],
+			"python": {
+				"file": "main.py"
+			}
+		}
+	]
+}

+ 23 - 0
Dockerfile

@@ -0,0 +1,23 @@
+# For more information, please refer to https://aka.ms/vscode-docker-python
+FROM python:3-slim
+
+# Keeps Python from generating .pyc files in the container
+ENV PYTHONDONTWRITEBYTECODE=1
+
+# Turns off buffering for easier container logging
+ENV PYTHONUNBUFFERED=1
+
+# Install pip requirements
+COPY requirements.txt .
+RUN python -m pip install -r requirements.txt
+
+WORKDIR /app
+COPY . /app
+
+# Creates a non-root user with an explicit UID and adds permission to access the /app folder
+# For more info, please refer to https://aka.ms/vscode-docker-python-configure-containers
+RUN adduser -u 5678 --disabled-password --gecos "" appuser && chown -R appuser /app
+USER appuser
+
+# During debugging, this entry point will be overridden. For more information, please refer to https://aka.ms/vscode-docker-python-debug
+CMD ["python", "main.py"]

+ 13 - 6
README.md

@@ -6,17 +6,13 @@
 
 [English](./README.md) 
 
-上市公司数据分析
+上市公司数据下载,分析
 
 ## Develop
 
 下载上市公司数据:
 ```
-virtualenv .venv
-source .venv/bin/activate
-
-pip install -r requirements.txt
-
+poetry shell
 python main.py company
 
 ```
@@ -34,6 +30,17 @@ python main.py nianbao --download
 
 ```
 
+docker 打包交付运行:
+
+```
+docker run -it --rm -v /data/crawl_sse:/app jianboy/crawl_sse:1.0.1 download
+
+```
+
+
+分析年报:
+docs/上市公司分析.ipynb
+
 ## License
 
 ## Reference

+ 3 - 3
crawl_sse/cninfo.py

@@ -16,7 +16,7 @@ from concurrent.futures import ThreadPoolExecutor
 
 class Cninfo(object):
     ''' 
-    深圳证券
+    巨潮资讯
     '''
     years =[ 2010,2011,2012,2013,2014,2015,2016,2017, 2018, 2019, 2020, 2021, 2022, 2023 ]
     host = "http://www.cninfo.com.cn"
@@ -252,10 +252,10 @@ class Cninfo(object):
                 file_names = [f'data/{company_names[i]}/{years[i]}-{titles[i]}.pdf' for i in range(len(company_codes))]
                 for i in range(len(urls)):
                     # http://static.cninfo.com.cn/finalpage/2018-01-30/1204372527.PDF
-                    self.pool.submit(self.download_file, urls[i],file_names[i] )
+                    self.pool.submit(self._download_file, urls[i],file_names[i] )
                 print(f'----{year}年下载完成')
     
-    def download_file(self, url, file_path):
+    def _download_file(self, url, file_path):
         ''' download file 
         '''
         if not os.path.exists(file_path):

+ 50 - 0
crawl_sse/options.py

@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
+"""
+@Contact :   liuyuqi.gov@msn.cn
+@Time    :   2024/06/22
+@License :   Copyright © 2017-2022 liuyuqi. All Rights Reserved.
+@Desc    :   command line params or config from file
+"""
+
+import sys,os,re
+import argparse
+from collections import OrderedDict
+from .utils.frozen_dir import get_app_path
+
+def parse_args():
+    """
+    parse command line params
+    """
+    parser = argparse.ArgumentParser(description='search domain')
+    parser.add_argument('command',  help='command: generate, search', choices=['generate','search', 'help','version'] , default='help')
+    parser.add_argument('--export_all', action='store_true', help='export all domain')
+    parser.add_argument(
+        "--input", help="set input domain list file,eg: domain.txt", type=str, default="domain.txt")
+    parser.add_argument(
+        "--output", help="set output domain result list file,eg: result.txt", type=str, default="result.txt")
+        
+    parser.add_argument('--lang', choices=['zh', 'en'], default='en',help='language')
+    parser.add_argument('--domain', default='com',help='input some domain, plilt with ","')
+    parser.add_argument('--keyword', default='', help='input some keyword, spilt with ","')
+    parser.add_argument('--position', default='prefix',choices=['prefix', 'suffix'], help='choose generate str positon')
+    args = parser.parse_args()
+
+    # remove None
+    command_line_conf = OrderedDict(
+        {k: v for k, v in args.__dict__.items() if v is not None}
+    )
+    system_conf = user_conf = custom_conf = OrderedDict()
+    system_conf.update(command_line_conf)
+
+    app_path = get_app_path()
+    system_conf["app_path"] = app_path
+    return system_conf
+
+def _read_custom_conf(config_path: str) -> OrderedDict:
+    """read custom config file"""
+    pass
+
+def _read_user_conf() -> OrderedDict:
+    """read user config file"""
+    pass

+ 8 - 1
crawl_sse/sse.py

@@ -9,6 +9,7 @@
 http://www.sse.com.cn/assortment/stock/areatrade/area/
 
 '''
+from ast import main
 import requests
 from lxml import etree
 import csv,re,os,sys,time,random
@@ -21,6 +22,9 @@ from selenium.webdriver.support import expected_conditions as EC
 import selenium.common.exceptions
 
 class Sse(object):
+    """
+    上海证券交易所
+    """
 
     _host = r'http://www.sse.com.cn'
     _headers = {
@@ -133,4 +137,7 @@ class Sse(object):
         with open('sse_diqu.csv','w',newline='',encoding='utf-8') as f:
             writer = csv.writer(f)
             writer.writerow(['地区名称','股票代码','名称'])
-            writer.writerows(self.diqu_date)
+            writer.writerows(self.diqu_date)
+
+if __name__=='__main__':
+    pass

+ 0 - 0
crawl_sse/utils/__init__.py


+ 18 - 0
crawl_sse/utils/frozen_dir.py

@@ -0,0 +1,18 @@
+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
+"""
+@Contact :   liuyuqi.gov@msn.cn
+@Time    :   2024/04/12
+@License :   Copyright © 2017-2022 liuyuqi. All Rights Reserved.
+@Desc    :   
+"""
+import sys  
+import os  
+   
+def get_app_path() -> str:  
+    """Returns the base application path."""  
+    if hasattr(sys, 'frozen'):  
+        # Handles PyInstaller  
+        return os.path.dirname(sys.executable)  #使用 pyinstaller 打包后的 exe 目录
+    # return os.path.dirname(os.path.dirname(os.path.dirname(__file__))) # 没打包前的py目录
+    return sys.path[0]

+ 11 - 0
docker-compose.debug.yml

@@ -0,0 +1,11 @@
+version: '3.4'
+
+services:
+  crawlsse:
+    image: crawlsse
+    build:
+      context: .
+      dockerfile: ./Dockerfile
+    command: ["sh", "-c", "pip install debugpy -t /tmp && python /tmp/debugpy --wait-for-client --listen 0.0.0.0:5678 main.py "]
+    ports:
+      - 5678:5678

+ 8 - 0
docker-compose.yml

@@ -0,0 +1,8 @@
+version: '3.4'
+
+services:
+  crawlsse:
+    image: crawlsse
+    build:
+      context: .
+      dockerfile: ./Dockerfile

+ 17 - 6
main.py

@@ -7,12 +7,23 @@
 @Desc    :   enter point
 '''
 
-# from crawl_sse import Sse
+from crawl_sse import Sse
 from crawl_sse import Cninfo
+from crawl_sse.options import parse_args
+import sys
 
 if __name__=='__main__':
-    # sse = Sse()
-    # sse.crawl()
-    cninfo =Cninfo()
-    cninfo.download()
-
+    args = parse_args()
+    if args['command'] == 'generate':
+        sse = Sse()
+        sse.crawl()
+    elif args['command'] == 'search':
+        cninfo =Cninfo()
+        cninfo.download()
+    elif args['command'] == 'help':
+        pass
+    elif args['command'] == 'version':
+        print('1.0.0')
+    else:
+        print('command error, please use --help to get help')
+        sys.exit(1)