AI发展越来越成熟,像chatgpt可以语音聊天,还可以带眼晴的功能,所以本博文是参照chatgpt功能实现的,已实现功能,(1)语音聊天,(2)打开摄像头(视频数据已传入后台,未实现视频数据识别,后面再加)
说明:本例子APP端是使用uniapp写的一个h5页面(可以打包成APP),来模仿APP端,将APP端的语音数据、视频数据通过webrtc推流技术,推送到python后端,后端收到语音数据之后,进行语音识别转文字(使用阿里的sensevoice,本地布署),文字传给大模型(使用通信千问,本地布署),大模型推理之后的结果数据再转为语音(文字转语音使用微软的edge-tts技术,也可以使用其它的),然后将语音使用webrtc推流给APP端播放,整个流程结束
具体使用的技术如下:
uniapp:写APP端
webrtc:前后端音视频推流
fastapi:WEB框架
langchain: 集成大语言模型通义千问qwen
sensevoice:语音识别
ollama:布署qwen
qwen大模型
edge-tts:文字转语音
redis:保存用户上下文信息,用户信息记忆功能
一、先看演示效果
一、AI语音聊天机器人APP(使用webrtc、语音识别、TTL、langchain、大语语模型、uniapp)
二、AI语音聊天机器人APP(使用webrtc、语音识别、TTL、langchain、大语语模型、uniapp)
三、AI语音聊天机器人APP(使用webrtc、语音识别、TTL、langchain、大语语模型、uniapp)
二、环境准备
1、准备senseVoice语音识别(我是虚拟机centos7)
senseVoice对应的docker镜像在后面下载压缩包中
docker load -i sensevoice.tar
docker run -d --name sensevoice --gpus all -p 7860:7860 sensevoice:1 # 有nvidia显卡
docker run -d --name sensevoice -p 7860:7860 sensevoice:1 # 没有nvdia显卡使用cpu运算
2、安装ollama环境(我是windows安装)
https://ollama.com/download/windows
直接安装后,使用cmd命令,进入window命令行,执行下载通信千问模型
ollama pull qwen2.5:0.5b
3、准备redis环境(我是虚拟机centos7)
docker pull redis:5.0.10
配置文件redis.conf
requirepass 123456
appendonly yes
docker启动(相关路径,修改为你自己的)
docker run -d --restart always --name redis \
-p 6379:6379 \
-v /home/soft/redis/redis.conf:/etc/redis/redis.conf \
-v /home/soft/redis/data:/data \
redis:5.0.10 redis-server /etc/redis/redis.conf
三、代码运行
压缩后面可下载,压缩包中有APP端代码(ai-voice-app),服务端代码(ai-voice-server)
1、运行APP端代码
使用HBuilder X软件打开,然后访问 http://localhost:8090/ ,然后谷歌浏览器调成手机模式
2、运行后端代码
(1)、使用anaconda创建python环境
conda create -n aiapp python=3.10
conda activate aiapp
cd 后端路径/ai-voice-server
pip install -r requirements.txt
然后pycharm配置conda环境
(2)、修改配置文件settings.py
# 当前项目路径
CURRENT_DIR = os.path.dirname(__file__)
# 音频文件存储路径
STORAGE_DIR = "D:/temp/"
# Redis连接配置
REDIS_URL = "redis://'':123456@127.127.0.1:6379/3"
# 大模型名称
LLM_MODEL= "qwen2.5:0.5b"
# senseVoice识别功能
SENSE_VOICE="http://127.127.0.1:7860/"
(3)运行main.py
四、相关源代码解释
1、前端代码
this.pc = new RTCPeerConnection();
let constraints = {audio: true,video: this.openVidoStream};
await navigator.mediaDevices.getUserMedia(constraints).then((stream) => {
console.log('发送多媒体流')
this.localStream = stream
stream.getTracks().forEach((track) => {
// console.log('trace类型', track)
// const level = this.localStream.getAudioLevel();
// console.log('level', level);
this.pc.addTrack(track, stream);//发送
});
}, (err) => {
alert('Could not acquire media: ' + err);
});
this.pc.addEventListener('track', (evt) => {
if (evt.track.kind == 'video'){
if(this.videoObj==null){
const video = document.createElement('video');
video.autoplay = true;
video.playsInline = true; // 对于iOS很重要,确保视频在页面内播放
document.getElementById('videoContainer').appendChild(video);
this.videoObj = video
}
this.videoObj.srcObject = evt.streams[0];
}else{
console.log('进入到audio中***********************')
if(this.audioObj==null){
const audio = document.createElement('audio');
audio.autoplay = true;
audio.playsInline = true; // 对于iOS很重要,确保视频在页面内播放
document.getElementById('audioContainer').appendChild(audio);
this.audioObj = audio
const audioStream = evt.streams[0];
const audioTrack = audioStream.getAudioTracks()[0];
console.log('音频轨道信息',audioTrack); // 查看音频轨道信息
}
this.audioObj.srcObject = evt.streams[0];
}
});
// 监听数据通道
this.pc.ondatachannel = (event) => {
const receiver = event.channel;
receiver.onmessage = (event) => {
console.log('', event.data);
};
receiver.onopen = () => {
console.log('Data channel is open');
};
receiver.onclose = () => {
console.log('Data channel is closed');
};
};
var parameters = {"ordered": true}
let dc = this.pc.createDataChannel('chat', parameters);
dc.addEventListener('close', () => {
clearInterval(this.dcInterval);
});
dc.addEventListener('open', () => {
this.dcInterval = setInterval(() => {
// console.log('心跳******')
let data = 'ping ' + this.currentStamp();
let obj = {"header":{"accountId":123456},"target":'TS',"method": "heartBeat","data":data}
let objStr = JSON.stringify(obj)
// console.log('发送数据:',objStr)
dc.send(objStr);
}, 1000);
});
dc.addEventListener('message', (evt) => {
// console.log('收到消息:',evt.data) //主要是心跳数据
});
this.dc=dc
2、后端代码:
offer = RTCSessionDescription(sdp=data.sdp, type=data.type)
pc = RTCPeerConnection()
pcs.add(pc)
# 发送文本消息
data_channel = pc.createDataChannel("chat")
# prepare local media
# player = MediaPlayer(os.path.join(ROOT, "demo-instruct.wav"))
video_relay = MediaRelay()
dir_name = public_utils.mk_user_dir_file(request)
file_name = dir_name+'.mp4'
video_recorder = MediaRecorder(file=file_name) # 录视频
video_track = VideoTransformTrack() # 视频拍照
audio_track = AudioTransformTrack()
# await audio_track.init_audio("test.mp3")
await audio_track.init_silense()
@pc.on("datachannel")
def on_datachannel(channel):
@channel.on("message")
async def on_message(res):
# print('进入了message事件********',res)
await deal_data(channel, res)
@pc.on("connectionstatechange")
async def on_connectionstatechange():
print('connectionstatechange事件:', pc.connectionState)
if pc.connectionState == "failed":
await pc.close()
pcs.discard(pc)
elif pc.connectionState == "connected":
print("连接成功")
elif pc.connectionState == "closed":
print("连接关闭")
await video_recorder.stop()
@pc.on("track")
def on_track(track):
if track.kind == "audio":
print('进入了audio事件********', track)
# pc.addTrack(player.audio)
audio_track.set_frame(track)
pc.addTrack(audio_track)
elif track.kind == "video":
print('进入了video事件********', track)
video_track.set_frame(video_relay.subscribe(track))
pc.addTrack(video_track)
video_recorder.addTrack(video_relay.subscribe(track))
@track.on("ended")
async def on_ended():
print('进入了ended事件********')
pcs.discard(pc)
await pc.close()
await video_recorder.stop()
五、代码下载,因文件过大,所以放在网盘