1 year ago · 2b1a9485e4
--- a/README.md
+++ b/README.md
@@ -13,4 +13,22 @@ pip install -U openai-whisper
 
				 # mp3音频
			
 
				 whisper audio.flac audio.mp3 audio.wav --model medium
			
 
				 
			
 
				-```
			
 
				+```
			
 
				+支持模型：
			
 
				+- tiny
			
 
				+- tiny.en
			
 
				+- base
			
 
				+- base.en
			
 
				+- small
			
 
				+- small.en
			
 
				+- medium
			
 
				+- medium.en
			
 
				+- large
			
 
				+- large-v2
			
 
				+
			
 
				+
			
 
				+## Reference
			
 
				+
			
 
				+https://github.com/openai/whisper
			
 
				+
			
 
				+
			
--- a/loader_model.py
+++ b/loader_model.py
@@ -0,0 +1,19 @@
 
				+#!/usr/bin/env python
			
 
				+# -*- encoding: utf-8 -*-
			
 
				+'''
			
 
				+@Contact :   liuyuqi.gov@msn.cn
			
 
				+@Time    :   2024/09/26 17:33:18
			
 
				+@License :   Copyright © 2017-2022 liuyuqi. All Rights Reserved.
			
 
				+@Desc    :   下载模型，加载模型
			
 
				+
			
 
				+~/.cache/whisper
			
 
				+
			
 
				+'''
			
 
				+
			
 
				+import whisper
			
 
				+
			
 
				+# 加载 medium 模型
			
 
				+model = whisper.load_model("medium")
			
 
				+
			
 
				+# 加载成功后模型文件会被保存在本地缓存中
			
 
				+print("模型加载完成")
			
--- a/main.py
+++ b/main.py
@@ -0,0 +1,83 @@
 
				+#!/usr/bin/env python
			
 
				+# -*- encoding: utf-8 -*-
			
 
				+'''
			
 
				+@Contact :   liuyuqi.gov@msn.cn
			
 
				+@Time    :   2024/09/26 17:12:46
			
 
				+@License :   Copyright © 2017-2022 liuyuqi. All Rights Reserved.
			
 
				+@Desc    :   enter point
			
 
				+
			
 
				+音频转文本
			
 
				+'''
			
 
				+import whisper
			
 
				+import time
			
 
				+
			
 
				+def openai_whisper_model_result(file_path, device, model_name,model_path,prompt, temp, vad, lang, beam_size, min_vad):
			
 
				+    """ 
			
 
				+    file_path: 音频文件的路径。
			
 
				+    device: 运行设备类型（例如，cuda 表示使用 GPU，cpu 表示使用 CPU）。
			
 
				+    model_name: 要加载的 Whisper 模型名称（如 tiny，base，medium，large 等）。
			
 
				+    model_path: 模型所在的目录路径。
			
 
				+    prompt: 初始文本提示，用于引导转录结果。
			
 
				+    temp: 温度参数，用于控制转录输出的随机性。
			
 
				+    vad: 语音活动检测 (VAD) 选项，用于辅助识别音频中的语音部分。
			
 
				+    lang: 语言设置（虽然代码中并没有显式使用它）。
			
 
				+    beam_size: 用于波束搜索的波束数量（影响转录的质量和速度）。
			
 
				+    min_vad: 在代码中没有明确使用的参数。
			
 
				+    """
			
 
				+    start_time = time.time()  # 记录开始时间
			
 
				+    if model_name not in ['tiny', 'tiny.en', 'base', 'base.en', 'small', 'small.en', 'medium', 'medium.en', 'large-v1',
			
 
				+                          'large-v2', 'large-v3', 'large']:
			
 
				+        print("\n*** Faster Whisper 本地模型加载模式 ***\n")
			
 
				+    else:
			
 
				+        print("\n*** Faster Whisper 调用模式 ***\n")
			
 
				+    print(f"- 运行模型：{model_name}")
			
 
				+    print(f"- 运行模型路径：{model_path}")
			
 
				+    print(f"- 运行方式：{device}")
			
 
				+    print(f"- VAD辅助：{vad}")
			
 
				+
			
 
				+    try:
			
 
				+        file_path.split('.')
			
 
				+        file_path = open(file_path, "rb")
			
 
				+    except:
			
 
				+        file_path = open(file_path + "/output.mp3", "rb")
			
 
				+    # 加载音频文件为 NumPy 数组
			
 
				+    #audio = whisper.load_audio(file_path)
			
 
				+    model = whisper.load_model(model_name,device,model_path,in_memory=True)
			
 
				+    #result = model.transcribe(audio,initial_prompt=prompt,beam_size=beam_size,temperature=temp)
			
 
				+    result = model.transcribe("D:\\tmp\\output.mp3",initial_prompt=prompt)
			
 
				+    print(f"- whisper识别内容：\n{result['segments']}\n")
			
 
				+    # 获取 segments
			
 
				+    segments = result["segments"]
			
 
				+    segments_dict =openai_whisper_result_dict(segments)
			
 
				+    print(f"- whisper2识别内容：\n{segments_dict}\n")
			
 
				+    end_time = time.time()  # 记录结束时间
			
 
				+    execution_time = end_time - start_time
			
 
				+    print(f"- 方法执行时间：{execution_time:.2f} 秒")
			
 
				+    return segments_dict
			
 
				+
			
 
				+def openai_whisper_result_dict(segments):
			
 
				+    """ 
			
 
				+    segments: Whisper 模型识别结果的分段列表。
			
 
				+    """
			
 
				+    # 将分段列表转换为字典
			
 
				+    segments_dict = {
			
 
				+        'text': ' '.join([segment['text'] for segment in segments]),
			
 
				+        'segments': [{
			
 
				+            'id': segment['id'],
			
 
				+            'seek': segment['seek'],
			
 
				+            'start': segment['start'],
			
 
				+            'end': segment['end'],
			
 
				+            'text': segment['text'],
			
 
				+            'tokens': segment['tokens'],
			
 
				+            'temperature': segment.get('temperature', None),
			
 
				+            'avg_logprob': segment['avg_logprob'],
			
 
				+            'compression_ratio': segment['compression_ratio'],
			
 
				+            'no_speech_prob': segment['no_speech_prob']}
			
 
				+            for segment in segments
			
 
				+        ]
			
 
				+    }
			
 
				+    return segments_dict
			
 
				+
			
 
				+if __name__=='__main__':
			
 
				+
			
 
				+    openai_whisper_model_result("D:\\tmp","cuda","medium","E:\\AI\\whisper-medium","Don’t make each line too long.","0.8",False,"自动识别",5,500)
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1 @@
 
				+openai-whisper