|
@@ -1,7 +1,47 @@
|
|
|
+#!/usr/bin/env python
|
|
|
+# -*- encoding: utf-8 -*-
|
|
|
+'''
|
|
|
+@Contact : liuyuqi.gov@msn.cn
|
|
|
+@Time : 2023/05/12 18:19:33
|
|
|
+@License : Copyright © 2017-2022 liuyuqi. All Rights Reserved.
|
|
|
+@Desc : whisper voice to text
|
|
|
+'''
|
|
|
+
|
|
|
+import torch
|
|
|
+from transformers import pipeline
|
|
|
import gradio as gr
|
|
|
+# import pytube as pt
|
|
|
+
|
|
|
+MODE_NAME="openai/whisper-large-v2"
|
|
|
+device= 0 if torch.cuda.is_available() else "cpu"
|
|
|
+
|
|
|
+pipe = pipeline("automatic-speech-recognition", model=MODE_NAME, device=device,
|
|
|
+chunk_length_s=30)
|
|
|
+
|
|
|
+
|
|
|
+all_special_ids = pipe.tokenizer.all_special_ids
|
|
|
+transcribe_token_id = all_special_ids[-5]
|
|
|
+translate_token_id = all_special_ids[-6]
|
|
|
+
|
|
|
|
|
|
-def greet(name):
|
|
|
- return "Hello " + name + "!!"
|
|
|
+def transcribe(microphone, state, task="transcribe"):
|
|
|
+ file = microphone
|
|
|
+ pipe.model.config.forced_decoder_ids = [[2, transcribe_token_id if task=="transcribe" else translate_token_id]]
|
|
|
+ text = pipe(file)["text"]
|
|
|
+ return state + "\n" + text, state + "\n" + text
|
|
|
|
|
|
-iface = gr.Interface(fn=greet, inputs="text", outputs="text")
|
|
|
-iface.launch()
|
|
|
+iface = gr.Interface(fn=transcribe,
|
|
|
+ inputs=[
|
|
|
+ gr.Audio(source="microphone", type="filepath", optional=True),
|
|
|
+ gr.State(value="")
|
|
|
+ ], outputs=[
|
|
|
+ gr.Textbox(lines=15),
|
|
|
+ gr.State()],
|
|
|
+ title="Speech to Text",
|
|
|
+ layout="horizontal",
|
|
|
+ theme="huggingface",
|
|
|
+ live=True,
|
|
|
+ description="Transcribe speech from your microphone or from a youtube video",
|
|
|
+ allow_flagging="never",
|
|
|
+ )
|
|
|
+iface.launch(enable_queue=True)
|