python从麦克风获取音频并识别
麦克风获取
# 打开麦克风流
stream = audio.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
设置参数
# 录音参数
FORMAT = pyaudio.paInt16 # 16-bit resolution
CHANNELS = 1 # 单声道
RATE = 16000 # 采样率 16kHz
CHUNK = 1024 # 数据块大小
RECORD_SECONDS = 5 # 录制时长 (秒)
WAVE_OUTPUT_FILENAME = "output.wav"
读取数据块
# 循环读取数据块
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
阿里语音识别模型加载
paraformer_path = "./iic/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn"
fsmn_path = "./iic/speech_fsmn_vad_zh-cn-16k-common-pytorch"
ct_punc_path = "./iic/punc_ct-transformer_cn-en-common-vocab471067-large"
cam_path = "./iic/speech_campplus_sv_zh-cn_16k-common"
model = AutoModel(model=paraformer_path, model_revision="v2.0.4",
vad_model=fsmn_path, vad_model_revision="v2.0.4",
punc_model=ct_punc_path, punc_model_revision="v2.0.4",
spk_model=cam_path, spk_model_revision="v2.0.2",
device="cpu"
)
阿里语音识别
res = model.generate(input=WAVE_OUTPUT_FILENAME,
batch_size_s=16000,
hotword='魔搭')
整体代码
import pyaudio
import wave
import threading
import keyboard
from funasr import AutoModel
# 录音参数
FORMAT = pyaudio.paInt16 # 16-bit resolution
CHANNELS = 1 # 单声道
RATE = 16000 # 采样率 16kHz
CHUNK = 1024 # 数据块大小
WAVE_OUTPUT_FILENAME = "./wav_data/output.wav"
# 初始化 PyAudio
audio = pyaudio.PyAudio()
frames = []
stream = None
recording = False
paraformer_path = "./iic/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn"
fsmn_path = "./iic/speech_fsmn_vad_zh-cn-16k-common-pytorch"
ct_punc_path = "./iic/punc_ct-transformer_cn-en-common-vocab471067-large"
cam_path = "./iic/speech_campplus_sv_zh-cn_16k-common"
model = AutoModel(model=paraformer_path, model_revision="v2.0.4",
vad_model=fsmn_path, vad_model_revision="v2.0.4",
punc_model=ct_punc_path, punc_model_revision="v2.0.4",
spk_model=cam_path, spk_model_revision="v2.0.2",
device="cpu"
)
print("加载模型完成!!!")
def start_recording():
"""
开始录音
"""
global stream, recording
if not recording:
print("开始录音...")
recording = True
stream = audio.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
record_thread = threading.Thread(target=record_audio)
record_thread.start()
def stop_recording():
"""
停止录音并进行识别
"""
global stream, recording
if recording:
print("录音结束.")
recording = False
stream.stop_stream()
stream.close()
save_audio()
audio.terminate()
print("开始识别!!!")
res = model.generate(input=WAVE_OUTPUT_FILENAME,
batch_size_s=16000,
hotword='魔搭')
print("识别结束!!!")
print("识别结果:", res)
def record_audio():
"""
录音功能实现
"""
while recording:
data = stream.read(CHUNK)
frames.append(data)
def save_audio():
"""
保存录音文件
"""
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(audio.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()
# 监听 Ctrl 键开始录音
keyboard.add_hotkey('ctrl', start_recording)
# 监听 Alt 键结束录音
keyboard.add_hotkey('alt', stop_recording)
print("按 Ctrl 开始录音,按 Alt 结束录音")
keyboard.wait() # 保持程序运行
原文地址:https://blog.csdn.net/yangzheng_520/article/details/144283362
免责声明:本站文章内容转载自网络资源,如本站内容侵犯了原著者的合法权益,可联系本站删除。更多内容请关注自学内容网(zxcms.com)!