lqg 2 years ago
parent
commit
f7e659eb0b

+ 21 - 0
ai/.devcontainer/Dockerfile

@@ -0,0 +1,21 @@
+# See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.245.2/containers/python-3/.devcontainer/base.Dockerfile
+
+# [Choice] Python version (use -bullseye variants on local arm64/Apple Silicon): 3, 3.10, 3.9, 3.8, 3.7, 3.6, 3-bullseye, 3.10-bullseye, 3.9-bullseye, 3.8-bullseye, 3.7-bullseye, 3.6-bullseye, 3-buster, 3.10-buster, 3.9-buster, 3.8-buster, 3.7-buster, 3.6-buster
+ARG VARIANT="3.10-bullseye"
+FROM mcr.microsoft.com/vscode/devcontainers/python:0-${VARIANT}
+
+# [Choice] Node.js version: none, lts/*, 16, 14, 12, 10
+ARG NODE_VERSION="none"
+RUN if [ "${NODE_VERSION}" != "none" ]; then su vscode -c "umask 0002 && . /usr/local/share/nvm/nvm.sh && nvm install ${NODE_VERSION} 2>&1"; fi
+
+# [Optional] If your pip requirements rarely change, uncomment this section to add them to the image.
+# COPY requirements.txt /tmp/pip-tmp/
+# RUN pip3 --disable-pip-version-check --no-cache-dir install -r /tmp/pip-tmp/requirements.txt \
+#    && rm -rf /tmp/pip-tmp
+
+# [Optional] Uncomment this section to install additional OS packages.
+# RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
+#     && apt-get -y install --no-install-recommends <your-package-list-here>
+
+# [Optional] Uncomment this line to install global node packages.
+# RUN su vscode -c "source /usr/local/share/nvm/nvm.sh && npm install -g <your-package-here>" 2>&1

+ 21 - 0
ai/.devcontainer/devcontainer.json

@@ -0,0 +1,21 @@
+{
+	"customizations": {
+		"vscode": {
+			"extensions": [
+				"ms-python.python",
+				"ms-toolsai.jupyter",
+				"GitHub.copilot-labs",
+				"GitHub.copilot-nightly",
+				"ms-toolsai.jupyter-keymap",
+				"ms-toolsai.jupyter-renderers",
+				"ms-vscode.makefile-tools",
+				"ms-python.vscode-pylance"
+			]
+		}
+	},
+	"features": {
+		 "ghcr.io/devcontainers/features/nvidia-cuda:1": {
+			"installCudnn": true
+		 }
+	}
+}

+ 15 - 0
ai/.github/workflows/main.yml

@@ -0,0 +1,15 @@
+name: Python application test with Github Actions using devcontainers
+on: [push]
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: test build-Build and run dev container task
+      uses: devcontainers/ci@v0.2
+      with:
+        runCmd: |
+          make install
+          make lint
+          make test
+          make format

+ 129 - 0
ai/.gitignore

@@ -0,0 +1,129 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/

+ 18 - 0
ai/Makefile

@@ -0,0 +1,18 @@
+install:
+	pip install --upgrade pip &&\
+		pip install -r requirements.txt
+	#conda env create -f environment.yml
+	#conda activate hf
+	##had to do this as well
+	#conda install pytorch torchvision -c pytorch
+
+test:
+	#python -m pytest -vv test_main.py
+
+format:
+	black *.py
+
+lint:
+	pylint --disable=R,C *.py
+
+all: install lint test

+ 15 - 0
ai/README.md

@@ -0,0 +1,15 @@
+# ai
+
+Repo that allows me to build AI tools on top of Hugging Face
+
+## How to do fast inference using API
+
+* [Use hugging face inference api](https://gradio.app/using_hugging_face_integrations/#using-hugging-face-inference-api)
+* [How to use inference](https://huggingface.co/docs/huggingface_hub/how-to-inference)
+
+## Verify GPU working
+
+* [Reference PyTorch site](https://pytorch.org/get-started/locally/)
+* `numba -s | grep cuda`
+* run `python utils/verify_cuda_pytorch.py`
+* run `nvidia-smi` should show a GPU

+ 17 - 0
ai/environment.yml

@@ -0,0 +1,17 @@
+name: hf
+dependencies:
+  - pip
+  - python=3.9
+  - ipykernel 
+  - numpy=1.21
+  - pytorch
+  - cudatoolkit
+  - numba
+  - pip:
+    - geopy==2.2.0
+    - pytest==7.1.3
+    - pandas==1.4.4
+    - click==8.1.3
+    - black==22.8.0
+    - pylint==2.15.0
+    - transformers

+ 11 - 0
ai/requirements.txt

@@ -0,0 +1,11 @@
+transformers
+torch
+click==7.1.2
+tensorflow==2.9.1
+beautifulsoup4==4.11.1
+wikipedia==1.4.0
+pylint==2.15.0
+ipython==8.4.0
+gradio
+numpy==1.23.2
+sentencepiece

+ 72 - 0
ai/utils/cuda_test_numba.py

@@ -0,0 +1,72 @@
+"""
+From numba documentation: 
+https://numba.pydata.org/numba-doc/latest/cuda/examples.html#matrix-multiplication
+"""
+
+from numba import cuda, float32
+import numpy as np
+from timeit import default_timer as timer
+
+# Controls threads per block and shared memory usage.
+# The computation will be done on blocks of TPBxTPB elements.
+TPB = 16
+
+@cuda.jit
+def fast_matmul(A, B, C):
+    # Define an array in the shared memory
+    # The size and type of the arrays must be known at compile time
+    sA = cuda.shared.array(shape=(TPB, TPB), dtype=float32)
+    sB = cuda.shared.array(shape=(TPB, TPB), dtype=float32)
+
+    x, y = cuda.grid(2)
+
+    tx = cuda.threadIdx.x
+    ty = cuda.threadIdx.y
+    bpg = cuda.gridDim.x    # blocks per grid
+
+    if x >= C.shape[0] and y >= C.shape[1]:
+        # Quit if (x, y) is outside of valid C boundary
+        return
+
+    # Each thread computes one element in the result matrix.
+    # The dot product is chunked into dot products of TPB-long vectors.
+    tmp = 0.
+    for i in range(bpg):
+        # Preload data into shared memory
+        sA[tx, ty] = A[x, ty + i * TPB]
+        sB[tx, ty] = B[tx + i * TPB, y]
+
+        # Wait until all threads finish preloading
+        cuda.syncthreads()
+
+        # Computes partial product on the shared memory
+        for j in range(TPB):
+            tmp += sA[tx, j] * sB[j, ty]
+
+        # Wait until all threads finish computing
+        cuda.syncthreads()
+
+    C[x, y] = tmp
+
+# run it
+if __name__ == '__main__':
+
+    # Initialize the data arrays
+    A = np.full((TPB*20, TPB*20), 3, np.float32)
+    B = np.full((TPB*20, TPB*20), 4, np.float32)
+
+    # Configure the blocks
+    threadsperblock = (TPB, TPB)
+    blockspergrid_x = int(np.ceil(A.shape[0] / threadsperblock[0]))
+    blockspergrid_y = int(np.ceil(B.shape[1] / threadsperblock[1]))
+    blockspergrid = (blockspergrid_x, blockspergrid_y)
+
+    # Start the kernel 
+    C = np.zeros_like(A)
+    start = timer()
+    fast_matmul[blockspergrid, threadsperblock](A, B, C)
+    cuda.synchronize()
+    print("Time taken: %f" % (timer() - start))
+
+    # Print the result
+    print(C)

+ 12 - 0
ai/utils/verify_cuda_pytorch.py

@@ -0,0 +1,12 @@
+import torch
+
+if torch.cuda.is_available():
+    print("CUDA is available")
+    print("CUDA version: {}".format(torch.version.cuda))
+    print("PyTorch version: {}".format(torch.__version__))
+    print("cuDNN version: {}".format(torch.backends.cudnn.version()))
+    print("Number of CUDA devices: {}".format(torch.cuda.device_count()))
+    print("Current CUDA device: {}".format(torch.cuda.current_device()))
+    print("Device name: {}".format(torch.cuda.get_device_name(torch.cuda.current_device())))
+else:
+    print("CUDA is not available")