vggish_params.py 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. # Copyright 2017 The TensorFlow Authors All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. """Global parameters for the VGGish model.
  16. See vggish_slim.py for more information.
  17. """
  18. # Architectural constants.
  19. NUM_FRAMES = 96 # Frames in input mel-spectrogram patch.
  20. NUM_BANDS = 64 # Frequency bands in input mel-spectrogram patch.
  21. EMBEDDING_SIZE = 128 # Size of embedding layer.
  22. # Hyperparameters used in feature and example generation.
  23. SAMPLE_RATE = 16000
  24. STFT_WINDOW_LENGTH_SECONDS = 0.025
  25. STFT_HOP_LENGTH_SECONDS = 0.010
  26. NUM_MEL_BINS = NUM_BANDS
  27. MEL_MIN_HZ = 125
  28. MEL_MAX_HZ = 7500
  29. LOG_OFFSET = 0.01 # Offset used for stabilized log of input mel-spectrogram.
  30. EXAMPLE_WINDOW_SECONDS = 0.96 # Each example contains 96 10ms frames
  31. EXAMPLE_HOP_SECONDS = 0.96 # with zero overlap.
  32. # Parameters used for embedding postprocessing.
  33. PCA_EIGEN_VECTORS_NAME = 'pca_eigen_vectors'
  34. PCA_MEANS_NAME = 'pca_means'
  35. QUANTIZE_MIN_VAL = -2.0
  36. QUANTIZE_MAX_VAL = +2.0
  37. # Hyperparameters used in training.
  38. INIT_STDDEV = 0.01 # Standard deviation used to initialize weights.
  39. LEARNING_RATE = 1e-4 # Learning rate for the Adam optimizer.
  40. ADAM_EPSILON = 1e-8 # Epsilon for the Adam optimizer.
  41. # Names of ops, tensors, and features.
  42. INPUT_OP_NAME = 'vggish/input_features'
  43. INPUT_TENSOR_NAME = INPUT_OP_NAME + ':0'
  44. OUTPUT_OP_NAME = 'vggish/embedding'
  45. OUTPUT_TENSOR_NAME = OUTPUT_OP_NAME + ':0'
  46. AUDIO_EMBEDDING_FEATURE_NAME = 'audio_embedding'