Browse Source

https://huggingface.co/spaces/abidlabs/whisper-large-v2

liuyuqi-dellpc 1 year ago
parent
commit
db945cc265
3 changed files with 48 additions and 4 deletions
  1. 44 4
      app.py
  2. 1 0
      packages.txt
  3. 3 0
      requirements.txt

+ 44 - 4
app.py

@@ -1,7 +1,47 @@
+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
+'''
+@Contact :   liuyuqi.gov@msn.cn
+@Time    :   2023/05/12 18:19:33
+@License :   Copyright © 2017-2022 liuyuqi. All Rights Reserved.
+@Desc    :   whisper voice to text
+'''
+
+import torch
+from transformers import pipeline
 import gradio as gr
+# import pytube as pt
+
+MODE_NAME="openai/whisper-large-v2"
+device= 0 if torch.cuda.is_available() else "cpu"
+
+pipe = pipeline("automatic-speech-recognition", model=MODE_NAME, device=device,
+chunk_length_s=30)
+
+
+all_special_ids = pipe.tokenizer.all_special_ids
+transcribe_token_id = all_special_ids[-5]
+translate_token_id = all_special_ids[-6]
+
 
-def greet(name):
-    return "Hello " + name + "!!"
+def transcribe(microphone, state, task="transcribe"):
+    file = microphone
+    pipe.model.config.forced_decoder_ids = [[2, transcribe_token_id if task=="transcribe" else translate_token_id]]
+    text = pipe(file)["text"]
+    return state + "\n" + text, state + "\n" + text
 
-iface = gr.Interface(fn=greet, inputs="text", outputs="text")
-iface.launch()
+iface = gr.Interface(fn=transcribe, 
+    inputs=[
+        gr.Audio(source="microphone", type="filepath", optional=True),
+        gr.State(value="")
+    ], outputs=[
+        gr.Textbox(lines=15),
+        gr.State()],
+    title="Speech to Text",
+    layout="horizontal",
+    theme="huggingface",
+    live=True,
+    description="Transcribe speech from your microphone or from a youtube video",
+    allow_flagging="never",
+    )
+iface.launch(enable_queue=True)

+ 1 - 0
packages.txt

@@ -0,0 +1 @@
+ffmpeg

+ 3 - 0
requirements.txt

@@ -0,0 +1,3 @@
+transformers==4.2.9
+torch==2.0.1
+pytube==15.0.0