Browse Source

Merge branch 'tmp' of lyq/word2html into master

天问 1 year ago
parent
commit
fc4e0fca9a
8 changed files with 200 additions and 18 deletions
  1. 25 0
      .dockerignore
  2. 63 0
      .github/workflows/build.yml
  3. 19 0
      .vscode/launch.json
  4. 40 0
      .vscode/tasks.json
  5. 21 0
      Dockerfile
  6. 16 0
      README.md
  7. 6 18
      convert.go
  8. 10 0
      docker-compose.yml

+ 25 - 0
.dockerignore

@@ -0,0 +1,25 @@
+**/__pycache__
+**/.classpath
+**/.dockerignore
+**/.env
+**/.git
+**/.gitignore
+**/.project
+**/.settings
+**/.toolstarget
+**/.vs
+**/.vscode
+**/*.*proj.user
+**/*.dbmdl
+**/*.jfm
+**/bin
+**/charts
+**/docker-compose*
+**/compose*
+**/Dockerfile*
+**/node_modules
+**/npm-debug.log
+**/obj
+**/secrets.dev.yaml
+**/values.dev.yaml
+README.md

+ 63 - 0
.github/workflows/build.yml

@@ -0,0 +1,63 @@
+name: Go
+
+on:
+  push:
+    branches: [ master, tmp ]
+  pull_request:
+    branches: [ master, tmp ]
+
+jobs:
+
+  build:
+    name: Build
+    strategy:
+      matrix:
+        os: [ubuntu-latest]
+        include:
+        - os: ubuntu-latest
+          path: |
+            ~/.cache/go-build
+            ~/go/pkg/mod
+    runs-on: ubuntu-latest
+    steps:
+      - name: Set up Go 1.13
+        uses: actions/setup-go@v1
+        with:
+          go-version: 1.13
+        id: go
+
+      - name: Check out code into the Go module directory
+        uses: actions/checkout@v2
+
+      - name: cache
+        id: cache
+        uses: actions/cache@v2
+        with:
+          path: ${{ matrix.path }}
+          key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
+          restore-keys: |
+            ${{ runner.os }}-go-
+
+      - name: Get dependencies
+        run: |
+          go get
+          if [ -f Gopkg.toml ]; then
+              curl https://raw.githubusercontent.com/golang/dep/master/install.sh | sh
+              dep ensure
+          fi
+        working-directory: ./
+
+      - name: Build
+        run: go build convert.go
+        working-directory: ./
+
+      - name: docker build
+        run: |
+          docker login --username=${{ secrets.USERNAME }} --password ${{ secrets.PASSWORD }} ccr.ccs.tencentyun.com
+          docker build . -t ccr.ccs.tencentyun.com/jianboy/wordtohtml:v1.0.5
+
+      - name: docker push
+        run: |
+          docker push ccr.ccs.tencentyun.com/jianboy/wordtohtml:v1.0.5
+          docker tag $(docker images ccr.ccs.tencentyun.com/jianboy/wordtohtml:v1.0.5 -q) ccr.ccs.tencentyun.com/jianboy/wordtohtml:latest
+          docker push ccr.ccs.tencentyun.com/jianboy/wordtohtml:latest

+ 19 - 0
.vscode/launch.json

@@ -0,0 +1,19 @@
+{
+    "configurations": [
+        {
+            "name": "Docker: Python - Flask",
+            "type": "docker",
+            "request": "launch",
+            "preLaunchTask": "docker-run: debug",
+            "python": {
+                "pathMappings": [
+                    {
+                        "localRoot": "${workspaceFolder}",
+                        "remoteRoot": "/app"
+                    }
+                ],
+                "projectType": "flask"
+            }
+        }
+    ]
+}

+ 40 - 0
.vscode/tasks.json

@@ -0,0 +1,40 @@
+{
+	"version": "2.0.0",
+	"tasks": [
+		{
+			"type": "docker-build",
+			"label": "docker-build",
+			"platform": "python",
+			"dockerBuild": {
+				"tag": "wordtoxml:latest",
+				"dockerfile": "${workspaceFolder}/Dockerfile",
+				"context": "${workspaceFolder}",
+				"pull": true
+			}
+		},
+		{
+			"type": "docker-run",
+			"label": "docker-run: debug",
+			"dependsOn": [
+				"docker-build"
+			],
+			"dockerRun": {
+				"env": {
+					"FLASK_APP": "垃圾分类\\2-tf分类.py"
+				}
+			},
+			"python": {
+				"args": [
+					"run",
+					"--no-debugger",
+					"--no-reload",
+					"--host",
+					"0.0.0.0",
+					"--port",
+					"5000"
+				],
+				"module": "flask"
+			}
+		}
+	]
+}

+ 21 - 0
Dockerfile

@@ -0,0 +1,21 @@
+FROM kfwkfulq.mirror.aliyuncs.com/library/ubuntu:20.10 as dev
+LABEL Name=wordtohtml Version=1.0.5
+EXPOSE 5000
+
+ENV LANG     C.UTF-8
+# ENV LANGUAGE en_US.UTF-8
+# ENV LC_ALL   en_US.UTF-8
+
+# Install requirements
+RUN apt update && \
+    apt install -y libreoffice
+
+# FROM jianboy/libreoffice:v1.0.5 as prod
+WORKDIR /app
+RUN mkdir -p /opt/wordtohtml
+COPY convert /opt/wordtohtml/convert
+RUN adduser -u 5678 --disabled-password --gecos "" wordtohtml && chown -R wordtohtml /app && chown -R wordtohtml /opt/wordtohtml
+USER wordtohtml
+
+VOLUME [ "/app" ]
+CMD ["/opt/wordtohtml/convert"]

+ 16 - 0
README.md

@@ -13,5 +13,21 @@ sudo apt-get install poppler-utils wv unrtf tidy
 go get github.com/JalfResi/justext
 go get github.com/JalfResi/justext
 ```
 ```
 
 
+github action 配置docker参数: USERNAME, PASSWORD, REGISTRY, GITHUB_RUN_NUMBER
 
 
 
 
+## 使用
+
+
+```
+useradd -u 5678 wordtohtml
+
+docker pull ccr.ccs.tencentyun.com/jianboy/wordtohtml:v1.0.5
+
+docker run -it --rm -v /data/wordtohtml:/app ccr.ccs.tencentyun.com/jianboy/wordtohtml:v1.0.5
+
+alias wordtohtml='docker run -it --rm -v /data:/app jianboy/wordtohtml'
+
+alias pdf2htmlEX='docker run -ti --rm -v `pwd`:/pdf bwits/pdf2htmlex pdf2htmlEX'
+pdf2htmlEX --zoom 1 test.pdf
+```

+ 6 - 18
convert.go

@@ -1,30 +1,18 @@
 package main
 package main
 
 
 import (
 import (
-	"fmt"
 	"os/exec"
 	"os/exec"
 )
 )
 
 
 // exec command
 // exec command
 func doCommand(cmd string) {
 func doCommand(cmd string) {
-	fmt.Println(cmd)
-	cmdRes := exec.Command("libreoffice", "--headless", "--convert-to html", "*.doc", "--outdir", "output")
-	stdout, err := cmdRes.Output()
-	if err != nil {
-		fmt.Println(err.Error())
-		return
-	}
-	fmt.Print(string(stdout))
-	cmdRes2 := exec.Command("libreoffice", "--headless", "--convert-to html", "*.docx", "--outdir", "output")
-	stdout2, err2 := cmdRes2.Output()
-	if err2 != nil {
-		fmt.Println(err2.Error())
-		return
-	}
-	fmt.Print("finish.... result is:")
-	fmt.Print(string(stdout2))
+	_ = exec.Command("sh", "-c", cmd).Run()
+	
 }
 }
 
 
 func main() {
 func main() {
-	doCommand("go get -u github.com/golang/protobuf/protoc-gen-go")
+	// if diretory not exist, create it
+	doCommand("mkdir -p output")
+	doCommand("libreoffice --headless --convert-to \"html:XHTML Writer File:UTF8\" *.doc --outdir output")
+	doCommand("libreoffice --headless --convert-to \"html:XHTML Writer File:UTF8\" *.docx --outdir output")
 }
 }

+ 10 - 0
docker-compose.yml

@@ -0,0 +1,10 @@
+version: '3.4'
+
+services:
+  wordtohtml:
+    image: jianboy/wordtohtml:v1.0.1
+    build:
+      context: .
+      dockerfile: ./Dockerfile
+    ports:
+      - 5000:5000