3 years ago · db945cc265
--- a/app.py
+++ b/app.py
@@ -1,7 +1,47 @@
 
				+#!/usr/bin/env python
			
 
				+# -*- encoding: utf-8 -*-
			
 
				+'''
			
 
				+@Contact :   liuyuqi.gov@msn.cn
			
 
				+@Time    :   2023/05/12 18:19:33
			
 
				+@License :   Copyright © 2017-2022 liuyuqi. All Rights Reserved.
			
 
				+@Desc    :   whisper voice to text
			
 
				+'''
			
 
				+
			
 
				+import torch
			
 
				+from transformers import pipeline
			
 
				 import gradio as gr
			
 
				+# import pytube as pt
			
 
				+
			
 
				+MODE_NAME="openai/whisper-large-v2"
			
 
				+device= 0 if torch.cuda.is_available() else "cpu"
			
 
				+
			
 
				+pipe = pipeline("automatic-speech-recognition", model=MODE_NAME, device=device,
			
 
				+chunk_length_s=30)
			
 
				+
			
 
				+
			
 
				+all_special_ids = pipe.tokenizer.all_special_ids
			
 
				+transcribe_token_id = all_special_ids[-5]
			
 
				+translate_token_id = all_special_ids[-6]
			
 
				+
			
 
				 
			
 
				-def greet(name):
			
 
				-    return "Hello " + name + "!!"
			
 
				+def transcribe(microphone, state, task="transcribe"):
			
 
				+    file = microphone
			
 
				+    pipe.model.config.forced_decoder_ids = [[2, transcribe_token_id if task=="transcribe" else translate_token_id]]
			
 
				+    text = pipe(file)["text"]
			
 
				+    return state + "\n" + text, state + "\n" + text
			
 
				 
			
 
				-iface = gr.Interface(fn=greet, inputs="text", outputs="text")
			
 
				-iface.launch()
			
 
				+iface = gr.Interface(fn=transcribe, 
			
 
				+    inputs=[
			
 
				+        gr.Audio(source="microphone", type="filepath", optional=True),
			
 
				+        gr.State(value="")
			
 
				+    ], outputs=[
			
 
				+        gr.Textbox(lines=15),
			
 
				+        gr.State()],
			
 
				+    title="Speech to Text",
			
 
				+    layout="horizontal",
			
 
				+    theme="huggingface",
			
 
				+    live=True,
			
 
				+    description="Transcribe speech from your microphone or from a youtube video",
			
 
				+    allow_flagging="never",
			
 
				+    )
			
 
				+iface.launch(enable_queue=True)
			
--- a/packages.txt
+++ b/packages.txt
@@ -0,0 +1 @@
 
				+ffmpeg
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,3 @@
 
				+transformers==4.2.9
			
 
				+torch==2.0.1
			
 
				+pytube==15.0.0