利用阿里百炼大模型 Dashscope,实现实时麦克风语音转文本输出。
pip install pyaudio dashscope
import pyaudio
import dashscope
from dashscope.audio.asr import (Recognition, RecognitionCallback,
RecognitionResult)
dashscope.api_key='sk-d90a2c8cb92344bbb431d......'
mic = None
stream = None
class Callback(RecognitionCallback):
def on_open(self) -> None:
global mic
global stream
print('RecognitionCallback open.')
mic = pyaudio.PyAudio()
stream = mic.open(format=pyaudio.paInt16,
channels=1,
rate=16000,
input=True)
def on_close(self) -> None:
global mic
global stream
print('RecognitionCallback close.')
stream.stop_stream()
stream.close()
mic.terminate()
stream = None
mic = None
def on_event(self, result: RecognitionResult) -> None:
print('RecognitionCallback sentence: ', result.get_sentence())
callback = Callback()
recognition = Recognition(model='paraformer-realtime-v1',
format='pcm',
sample_rate=16000,
callback=callback)
recognition.start()
while True:
if stream:
data = stream.read(3200, exception_on_overflow = False)
recognition.send_audio_frame(data)
else:
break
recognition.stop()
import sounddevice as sd
import numpy as np
import dashscope
from dashscope.audio.asr import Recognition, RecognitionCallback, RecognitionResult
dashscope.api_key = "sk-**********************"
mic = None
stream = None
class Callback(RecognitionCallback):
def on_open(self) -> None:
print("RecognitionCallback open.")
def on_close(self) -> None:
print("RecognitionCallback close.")
def on_event(self, result: RecognitionResult) -> None:
if result.output.sentence["end_time"] != None:
print("RecognitionCallback sentence: ", result.get_sentence()['text'])
callback = Callback()
recognition = Recognition(
model="paraformer-realtime-v1", format="pcm", sample_rate=16000, callback=callback
)
recognition.start()
def audio_callback(indata, frames, time, status):
if status:
print(status)
recognition.send_audio_frame(indata.tobytes())
devices = sd.query_devices()
print(devices)
stream = sd.InputStream(blocksize=3200, callback=audio_callback, channels=1, samplerate=16000, dtype=np.int16, device=2)
stream.start()
try:
while True:
pass
except KeyboardInterrupt:
pass
recognition.stop()
输出结果: