Browse Source

Do some cleanup in the readme and also add option to show the landmark in the webcam.

Dat Tran 6 years ago
parent
commit
7436614fcd
7 changed files with 169 additions and 12 deletions
  1. 101 0
      .gitignore
  2. 19 10
      README.md
  3. 36 0
      environment.yml
  4. BIN
      example.gif
  5. 4 0
      generate_train_data.py
  6. 1 0
      reduce_model.py
  7. 8 2
      run_webcam.py

+ 101 - 0
.gitignore

@@ -0,0 +1,101 @@
+# Created by .ignore support plugin (hsz.mobi)
+### Python template
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# dotenv
+.env
+
+# virtualenv
+.venv/
+venv/
+ENV/
+
+# Spyder project settings
+.spyderproject
+
+# Rope project settings
+.ropeproject
+
+# Custom
+.idea/
+face2face-model*/
+face2face-reduced-model*/
+landmarks
+original
+angela_merkel_speech.mp4
+shape_predictor_68_face_landmarks.dat

+ 19 - 10
README.md

@@ -9,6 +9,9 @@ This is a pix2pix demo that learns from facial landmarks and translates this int
 ```
 # Clone this repo
 git clone git@github.com:datitran/face2face-demo.git
+
+# Create the conda environment from file (Mac OSX)
+conda env create -f environment.yml
 ```
 
 #### 2. Generate Training Data
@@ -21,11 +24,13 @@ Input:
 
 - `file` is the name of the video file from which you want to create the data set.
 - `num` is the number of train data to be created.
-- `landmark-model` is the facial landmark model that is used to detect the landmarks.
+- `landmark-model` is the facial landmark model that is used to detect the landmarks. A pre-trained facial landmark model is provided [here](http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2).
 
 Output:
 
-Two folders `original` and `landmarks` will be created.
+- Two folders `original` and `landmarks` will be created.
+
+If you want to download my dataset, here is also the [video file](https://u7410512.dl.dropboxusercontent.com/u/7410512/face2face-demo/angela_merkel_speech.mp4) that I used and the generated [training dataset](https://u7410512.dl.dropboxusercontent.com/u/7410512/face2face-demo/dataset.zip) (400 images already split into training and validation).
 
 #### 3. Train Model
 
@@ -87,7 +92,7 @@ For more information around training, have a look at Christopher Hesse's [pix2pi
     
     Output:
     
-    It returns a reduced model with less weights file size than the original model.
+    - It returns a reduced model with less weights file size than the original model.
 
 2. Second, we freeze the reduced model to a single file.
     ```
@@ -100,31 +105,35 @@ For more information around training, have a look at Christopher Hesse's [pix2pi
     
     Output:
     
-    It returns a frozen model file `frozen_model.pb` in the model folder.
-
+    - It returns a frozen model file `frozen_model.pb` in the model folder.
+    
+I have uploaded a pre-trained frozen model [here](https://u7410512.dl.dropboxusercontent.com/u/7410512/face2face-demo/face2face_model_epoch_200.zip). This model is trained on 400 images with epoch 200.
+    
 #### 5. Run Demo
 
 ```
-python run_webcam.py --source 0 --landmark-model shape_predictor_68_face_landmarks.dat --tf-model face2face-reduced-model/frozen_model.pb
+python run_webcam.py --source 0 --show 0 --landmark-model shape_predictor_68_face_landmarks.dat --tf-model face2face-reduced-model/frozen_model.pb
 ```
 
 Input:
 
-- `source` is the device index of the camera.
+- `source` is the device index of the camera (default=0).
+- `show` is an option to either display the normal input (0) or the facial landmark (1) alongside the generated image (default=0).
 - `landmark-model` is the facial landmark model that is used to detect the landmarks.
 - `tf-model` is the frozen model file.
 
 Example:
 
-- Add example image here
+![example](example.gif)
 
 ## Requirements
 - [Anaconda / Python 3.5](https://www.continuum.io/downloads)
-- [TensorFlow 1.0](https://www.tensorflow.org/)
+- [TensorFlow 1.2](https://www.tensorflow.org/)
 - [OpenCV 3.0](http://opencv.org/)
+- [Dlib 19.4](http://dlib.net/)
 
 ## Acknowledgments
-Kudos to [Christopher Hesse](https://github.com/christopherhesse) for his amazing pix2pix Tensorflow implementation and [Gene Kogan](http://genekogan.com/) for his inspirational workshop. 
+Kudos to [Christopher Hesse](https://github.com/christopherhesse) for his amazing pix2pix TensorFlow implementation and [Gene Kogan](http://genekogan.com/) for his inspirational workshop. 
 
 ## Copyright
 

+ 36 - 0
environment.yml

@@ -0,0 +1,36 @@
+name: face2face-demo
+channels: !!python/tuple
+- menpo
+- defaults
+dependencies:
+- bzip2=1.0.6=3
+- jlaura::opencv3=3.0.0=py35_0
+- jpeg=9b=0
+- libpng=1.6.27=0
+- menpo::boost=1.59.0=py35_0
+- menpo::dlib=19.4=py35_0
+- menpo::tbb=4.3_20141023=0
+- mkl=2017.0.3=0
+- numpy=1.13.0=py35_0
+- openssl=1.0.2l=0
+- pip=9.0.1=py35_1
+- python=3.5.3=1
+- readline=6.2=2
+- setuptools=27.2.0=py35_0
+- sqlite=3.13.0=0
+- tk=8.5.18=0
+- wheel=0.29.0=py35_0
+- xz=5.2.2=1
+- zlib=1.2.8=3
+- pip:
+  - backports.weakref==1.0rc1
+  - bleach==1.5.0
+  - html5lib==0.9999999
+  - imutils==0.4.3
+  - markdown==2.6.8
+  - protobuf==3.3.0
+  - six==1.10.0
+  - tensorflow==1.2.1
+  - werkzeug==0.12.2
+prefix: /Users/datitran/anaconda/envs/face2face-demo
+

BIN
example.gif


+ 4 - 0
generate_train_data.py

@@ -1,3 +1,4 @@
+import os
 import cv2
 import dlib
 import time
@@ -13,6 +14,9 @@ def reshape_for_polyline(array):
 
 
 def main():
+    os.makedirs('original', exist_ok=True)
+    os.makedirs('landmarks', exist_ok=True)
+
     cap = cv2.VideoCapture(args.filename)
     fps = video.FPS().start()
 

+ 1 - 0
reduce_model.py

@@ -217,3 +217,4 @@ if __name__ == '__main__':
         # Export reduced model used for prediction
         saver = tf.train.Saver()
         saver.save(sess, '{}/reduced_model'.format(args.output_folder))
+        print("Model is exported to {}".format(checkpoint))

+ 8 - 2
run_webcam.py

@@ -91,9 +91,13 @@ def main():
         image_rgb = cv2.cvtColor(combined_image, cv2.COLOR_BGR2RGB)  # OpenCV uses BGR instead of RGB
         generated_image = sess.run(output_tensor, feed_dict={image_tensor: image_rgb})
         image_bgr = cv2.cvtColor(np.squeeze(generated_image), cv2.COLOR_RGB2BGR)
-        output_image = np.concatenate([resize(frame_resize), image_bgr], axis=1)
+        image_normal = np.concatenate([resize(frame_resize), image_bgr], axis=1)
+        image_landmark = np.concatenate([resize(black_image), image_bgr], axis=1)
 
-        cv2.imshow('frame', output_image)
+        if args.display_landmark == 0:
+            cv2.imshow('frame', image_normal)
+        else:
+            cv2.imshow('frame', image_landmark)
 
         fps.update()
         if cv2.waitKey(1) & 0xFF == ord('q'):
@@ -112,6 +116,8 @@ if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument('-src', '--source', dest='video_source', type=int,
                         default=0, help='Device index of the camera.')
+    parser.add_argument('--show', dest='display_landmark', type=int, default=0, choices=[0, 1],
+                        help='0 shows the normal input and 1 the facial landmark.')
     parser.add_argument('--landmark-model', dest='face_landmark_shape_file', type=str, help='Face landmark model file.')
     parser.add_argument('--tf-model', dest='frozen_model_file', type=str, help='Frozen TensorFlow model file.')