阿里云实时语音识别

发布于:2025-07-07 ⋅ 阅读:(21) ⋅ 点赞:(0)

前端:

<script setup>
import { ref, onMounted, onBeforeUnmount } from 'vue'

const isRecording = ref(false)
const transcript = ref('')
let mediaRecorder = null
let ws = null

onMounted(() => {
  ws = new WebSocket('ws://localhost:3002')
  ws.onopen = () => {
    console.log('WebSocket连接成功')
  }
  ws.onerror = (event) => {
    console.error('WebSocket连接错误:', event)
  }
  ws.onmessage = (event) => {
    console.log("接收到消息:",event.data)
    transcript.value = event.data
  }
})

onBeforeUnmount(() => {
  if (ws) ws.close()
})

const startRecording = async () => {
  if (isRecording.value) return
  isRecording.value = true
  transcript.value = ''

  // 获取音频流
  const stream = await navigator.mediaDevices.getUserMedia({ audio: true })
  // 采集为标准WAV格式
  mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm;codecs=opus' })

  mediaRecorder.ondataavailable = (e) => {
    if (e.data.size > 0 && ws && ws.readyState === 1) {
      e.data.arrayBuffer().then(buffer => {
        // console.log(buffer)
        ws.send(buffer)
      })
    }
  }
  mediaRecorder.onstop = () => {
    console.log("停止录音")
    setTimeout(() => {
      // ws.send(JSON.stringify({ type: 'voiceToTextEnd'}))
    }, 500)
  }
  // ws.send(JSON.stringify({ type: 'voiceToTextStart'}))
  mediaRecorder.start(500) // 每500ms发送一次数据
}

const stopRecording = () => {
  if (!isRecording.value) return
  isRecording.value = false
  if (mediaRecorder) {
    mediaRecorder.stop()
    mediaRecorder.stream.getTracks().forEach(track => track.stop())
  }
}
</script>

<template>
  <div style="padding: 40px; max-width: 600px; margin: auto;">
    <el-button type="primary" @click="isRecording ? stopRecording() : startRecording()">
      {{ isRecording ? '停止录音' : '开始录音' }}
    </el-button>
    <div style="margin-top: 30px;">
      <el-card>
        <div>识别文本:</div>
        <div style="min-height: 40px; color: #333;width: 600px;">{{ transcript }}</div>
      </el-card>
    </div>
  </div>
</template>

<style scoped>
.logo {
  height: 6em;
  padding: 1.5em;
  will-change: filter;
  transition: filter 300ms;
}
.logo:hover {
  filter: drop-shadow(0 0 2em #646cffaa);
}
.logo.vue:hover {
  filter: drop-shadow(0 0 2em #42b883aa);
}
</style>

后端:

const express = require('express');
const http = require('http');
const WebSocket = require('ws');
const Nls = require('alibabacloud-nls');
const { PassThrough } = require('stream');
const ffmpegPath = require('@ffmpeg-installer/ffmpeg').path;
const ffmpeg = require('fluent-ffmpeg');

const app = express();
const server = http.createServer(app);
const wss = new WebSocket.Server({ server });

const ALI_APP_KEY = '';// 请手动填写有效ALI_APP_KEY
const ALI_TOKEN = ''; // 请手动填写有效token

ffmpeg.setFfmpegPath(ffmpegPath);

let transcriber = null;
let ffmpegStream = null;
let inputStream = null;

wss.on('connection', (ws) => {
  console.log('新用户连接WebSocket成功')


  ws.on('message', async (data) => {
    if (!ffmpegStream) {
      inputStream = new PassThrough();
      ffmpegStream = ffmpeg()
        .input(inputStream)
        .inputFormat('webm')
        .inputOptions('-fflags +genpts')
        .audioCodec('pcm_s16le')
        .audioChannels(1)
        .audioFrequency(16000)
        .format('s16le')
        .outputOptions('-f s16le')
        .outputOptions('-acodec pcm_s16le')
        .outputOptions('-ar 16000')
        .outputOptions('-ac 1')
        .pipe();

      // 用手动填写的 token 实例化识别对象
      transcriber = new Nls.SpeechTranscription({
        url: 'wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1',
        appkey: ALI_APP_KEY,
        token: ALI_TOKEN
      });

      transcriber.on('started', (msg) => {
        console.log('开始识别:',msg)
      });
      transcriber.on('changed', (msg) => {
        const data = JSON.parse(msg)
        console.log('changed:',data)
        ws.send(JSON.stringify({type: 'changed', content: data.payload?.result||''}));     
      });
      transcriber.on('completed', (msg) => {
        const data = JSON.parse(msg)
        console.log("completed:",data)
        ws.send(JSON.stringify({type: 'completed', content: data.payload?.result||''}));
      });
      transcriber.on('failed', (msg) => {
        console.log('识别失败:',msg)
      });
      transcriber.on('closed', () => {
        console.log('连接关闭')
      });

      // 启动识别
      try {
        await transcriber.start(transcriber.defaultStartParams(), true, 6000);
      } catch (err) {
         console.log('[识别启动失败] ' + err);
        return;
      }

      ffmpegStream.on('data', (pcmChunk) => {
        transcriber.sendAudio(pcmChunk);
      });      
      ffmpegStream.on('error', (err) => {
        stopAliyunTranscription()
      })
      ffmpegStream.on('end', () => {
        stopAliyunTranscription();
      })
    }
    inputStream.write(Buffer.from(data));
  });

  ws.on('close', async () => {
    stopAliyunTranscription()
  });
  
});
const stopAliyunTranscription = () => {
  if (inputStream) {
      inputStream.end()
      inputStream = null
  }
  if (ffmpegStream) {
      ffmpegStream.end()
      ffmpegStream = null
  }
  if (transcriber) {
      transcriber.shutdown()
      transcriber = null
  }
}
server.listen(3002, () => {
  console.log('WebSocket server running on ws://localhost:3002');
});