unity 接入火山模型平台文字转语音API

发布于:2025-06-25 ⋅ 阅读:(21) ⋅ 点赞:(0)

在这里插入图片描述

在这里插入图片描述

using System;
using System.Collections;
using System.Collections.Generic;
using System.Text;
using System.Threading.Tasks;
using UnityEngine;
using UnityEngine.Networking;
using System.IO;
using System.Net.WebSockets;
using System.Threading;
using LitJson; // 引入LitJson命名空间

public class TTSManager : MonoBehaviour
{
    // API配置
    [SerializeField] private string appId = "your_app_id";
    [SerializeField] private string secretKey = "your_secret_key"; // 添加Secret Key参数
    [SerializeField] private string accessToken = "your_access_token";
    [SerializeField] private string voiceType = "zh_male_M392_conversation_wvae_bigtts";
    [SerializeField] private string encoding = "mp3";
    [SerializeField] private float speedRatio = 1.0f;

    // 请求网址配置
    [SerializeField] private string httpApiUrl = "https://openspeech.bytedance.com/api/v1/tts"; // HTTP API地址
    [SerializeField] private string websocketApiUrl = "wss://openspeech.bytedance.com/api/v1/tts/ws_binary"; // WebSocket API地址

    // 认证方式配置
    [SerializeField] private AuthType authType = AuthType.BearerToken;

    // 认证类型枚举
    public enum AuthType
    {
        BearerToken,  // 使用Bearer Token认证
        ApiKey,       // 使用API Key认证
        BasicAuth     // 使用Basic认证
    }

    // 事件回调
    public delegate void AudioReceivedCallback(AudioClip audioClip);
    public event AudioReceivedCallback OnAudioReceived;

    public delegate void ErrorCallback(string errorMessage);
    public event ErrorCallback OnError;

    // 单例模式
    private static TTSManager instance;
    public static TTSManager Instance
    {
        get
        {
            if (instance == null)
            {
                instance = FindObjectOfType<TTSManager>();
                if (instance == null)
                {
                    GameObject obj = new GameObject("TTSManager");
                    instance = obj.AddComponent<TTSManager>();
                }
            }
            return instance;
        }
    }

    private void Awake()
    {
        if (instance != null && instance != this)
        {
            Destroy(gameObject);
        }
        else
        {
            instance = this;
            DontDestroyOnLoad(gameObject);
        }
    }

    // HTTP方式调用(非流式)
    public IEnumerator TextToSpeechHTTP(string text, Action<AudioClip> callback = null)
    {
        // 验证参数
        if (!ValidateParameters())
        {
            OnError?.Invoke("参数验证失败,请检查配置");
            if (callback != null) callback(null);
            yield break;
        }

        string reqId = Guid.NewGuid().ToString();

        // 构建请求JSON
        var requestData = new Dictionary<string, object>
        {
            ["app"] = new Dictionary<string, object>
            {
                ["appid"] = appId,
                ["token"] = accessToken,
                ["cluster"] = "volcano_tts"
            },
            ["user"] = new Dictionary<string, object>
            {
                ["uid"] = "unity_user"
            },
            ["audio"] = new Dictionary<string, object>
            {
                ["voice_type"] = voiceType,
                ["encoding"] = encoding,
                ["speed_ratio"] = speedRatio
            },
            ["request"] = new Dictionary<string, object>
            {
                ["reqid"] = reqId,
                ["text"] = text,
                ["operation"] = "query"
            }
        };

        // 使用LitJson生成JSON
        string jsonData = JsonMapper.ToJson(requestData);
        Debug.Log($"请求JSON: {jsonData}");

        // 创建请求
        using (UnityWebRequest www = new UnityWebRequest(httpApiUrl, "POST"))
        {
            byte[] bodyRaw = Encoding.UTF8.GetBytes(jsonData);
            www.uploadHandler = new UploadHandlerRaw(bodyRaw);
            www.downloadHandler = new DownloadHandlerBuffer();
            www.SetRequestHeader("Content-Type", "application/json");

            // 根据认证类型设置不同的认证头
            SetAuthenticationHeader(www);

            // 发送请求
            yield return www.SendWebRequest();

            if (www.result == UnityWebRequest.Result.ConnectionError ||
                www.result == UnityWebRequest.Result.ProtocolError)
            {
                Debug.LogError($"HTTP错误: {www.error}");
                Debug.LogError($"响应文本: {www.downloadHandler.text}");
                OnError?.Invoke(www.error);
                if (callback != null) callback(null);
                yield break;
            }

            // 解析响应 - 使用LitJson
            Debug.Log($"响应文本: {www.downloadHandler.text}");
            TTSResponse response = JsonMapper.ToObject<TTSResponse>(www.downloadHandler.text);

            if (response.code == 3000)
            {
                // 解码Base64音频数据
                byte[] audioData = Convert.FromBase64String(response.data);

                // 创建临时文件保存音频
                string tempPath = Path.Combine(Application.temporaryCachePath, $"tts_{reqId}.{encoding}");
                File.WriteAllBytes(tempPath, audioData);

                // 加载音频
                using (UnityWebRequest audioRequest = UnityWebRequestMultimedia.GetAudioClip($"file://{tempPath}", GetAudioType(encoding)))
                {
                    yield return audioRequest.SendWebRequest();

                    if (audioRequest.result == UnityWebRequest.Result.ConnectionError ||
                        audioRequest.result == UnityWebRequest.Result.ProtocolError)
                    {
                        Debug.LogError($"音频加载错误: {audioRequest.error}");
                        OnError?.Invoke(audioRequest.error);
                        if (callback != null) callback(null);
                        yield break;
                    }

                    AudioClip clip = DownloadHandlerAudioClip.GetContent(audioRequest);
                    OnAudioReceived?.Invoke(clip);
                    if (callback != null) callback(clip);
                }
            }
            else
            {
                Debug.LogError($"API错误: {response.message}");
                OnError?.Invoke(response.message);
                if (callback != null) callback(null);
            }
        }
    }

    // WebSocket方式调用(流式)
    public async Task TextToSpeechWebSocket(string text, Action<AudioClip> callback = null)
    {
        // 验证参数
        if (!ValidateParameters())
        {
            OnError?.Invoke("参数验证失败,请检查配置");
            if (callback != null) callback(null);
            return;
        }

        string reqId = Guid.NewGuid().ToString();
        ClientWebSocket client = new ClientWebSocket();

        try
        {
            // 根据认证类型设置不同的认证头
            SetWebSocketAuthentication(client);

            // 连接到WebSocket服务器
            await client.ConnectAsync(new Uri(websocketApiUrl), CancellationToken.None);

            // 构建请求JSON
            var requestData = new Dictionary<string, object>
            {
                ["app"] = new Dictionary<string, object>
                {
                    ["appid"] = appId,
                    ["token"] = accessToken,
                    ["cluster"] = "volcano_tts"
                },
                ["user"] = new Dictionary<string, object>
                {
                    ["uid"] = "unity_user"
                },
                ["audio"] = new Dictionary<string, object>
                {
                    ["voice_type"] = voiceType,
                    ["encoding"] = encoding,
                    ["speed_ratio"] = speedRatio
                },
                ["request"] = new Dictionary<string, object>
                {
                    ["reqid"] = reqId,
                    ["text"] = text,
                    ["operation"] = "submit"
                }
            };

            // 使用LitJson生成JSON
            string jsonData = JsonMapper.ToJson(requestData);

            // 构建二进制请求
            byte[] header = new byte[4];
            header[0] = (byte)((1 << 4) | 1); // 版本1,头大小1
            header[1] = (byte)(1 << 4); // 消息类型1 (full client request)
            header[2] = (byte)(1 << 4); // 序列化方法1 (JSON)
            header[3] = 0; // 保留字段

            byte[] jsonBytes = Encoding.UTF8.GetBytes(jsonData);
            byte[] message = new byte[header.Length + jsonBytes.Length];
            Array.Copy(header, 0, message, 0, header.Length);
            Array.Copy(jsonBytes, 0, message, header.Length, jsonBytes.Length);

            // 发送请求
            await client.SendAsync(new ArraySegment<byte>(message), WebSocketMessageType.Binary, true, CancellationToken.None);

            // 接收响应
            List<byte> audioData = new List<byte>();
            byte[] buffer = new byte[4096];
            bool isComplete = false;

            while (!isComplete && client.State == WebSocketState.Open)
            {
                WebSocketReceiveResult result = await client.ReceiveAsync(new ArraySegment<byte>(buffer), CancellationToken.None);

                if (result.MessageType == WebSocketMessageType.Binary)
                {
                    // 解析二进制响应
                    if (buffer.Length >= 4)
                    {
                        // 检查消息类型 (第2个字节的高4位)
                        int messageType = (buffer[1] >> 4) & 0x0F;

                        if (messageType == 0x0B) // Audio-only server response
                        {
                            // 检查是否是最后一条消息 (sequence number < 0)
                            int flags = buffer[1] & 0x0F;
                            if (flags == 0x02 || flags == 0x03)
                            {
                                isComplete = true;
                            }

                            // 提取音频数据 (从第4个字节开始)
                            byte[] audioChunk = new byte[result.Count - 4];
                            Array.Copy(buffer, 4, audioChunk, 0, audioChunk.Length);
                            audioData.AddRange(audioChunk);
                        }
                        else if (messageType == 0x0F) // Error message
                        {
                            string errorMsg = Encoding.UTF8.GetString(buffer, 4, result.Count - 4);
                            Debug.LogError($"WebSocket错误: {errorMsg}");
                            OnError?.Invoke(errorMsg);
                            if (callback != null) callback(null);
                            break;
                        }
                    }
                }
                else if (result.MessageType == WebSocketMessageType.Close)
                {
                    isComplete = true;
                    await client.CloseAsync(WebSocketCloseStatus.NormalClosure, "", CancellationToken.None);
                }
            }

            // 处理完整的音频数据
            if (audioData.Count > 0 && isComplete)
            {
                // 创建临时文件保存音频
                string tempPath = Path.Combine(Application.temporaryCachePath, $"tts_{reqId}.{encoding}");
                File.WriteAllBytes(tempPath, audioData.ToArray());

                // 加载音频
                using (UnityWebRequest audioRequest = UnityWebRequestMultimedia.GetAudioClip($"file://{tempPath}", GetAudioType(encoding)))
                {
                    // 使用UnityWebRequestAsyncOperation等待请求完成
                    var asyncOp = audioRequest.SendWebRequest();

                    // 创建TaskCompletionSource等待协程完成
                    var tcs = new TaskCompletionSource<bool>();
                    asyncOp.completed += _ => tcs.SetResult(true);

                    // 等待请求完成
                    await tcs.Task;

                    if (audioRequest.result == UnityWebRequest.Result.ConnectionError ||
                        audioRequest.result == UnityWebRequest.Result.ProtocolError)
                    {
                        Debug.LogError($"音频加载错误: {audioRequest.error}");
                        OnError?.Invoke(audioRequest.error);
                        if (callback != null) callback(null);
                    }
                    else
                    {
                        AudioClip clip = DownloadHandlerAudioClip.GetContent(audioRequest);
                        OnAudioReceived?.Invoke(clip);
                        if (callback != null) callback(clip);
                    }
                }
            }
        }
        catch (Exception ex)
        {
            Debug.LogError($"WebSocket异常: {ex.Message}");
            OnError?.Invoke(ex.Message);
            if (callback != null) callback(null);
        }
        finally
        {
            if (client != null && client.State != WebSocketState.Closed)
            {
                await client.CloseAsync(WebSocketCloseStatus.NormalClosure, "", CancellationToken.None);
            }
        }
    }

    // 设置HTTP请求的认证头
    private void SetAuthenticationHeader(UnityWebRequest request)
    {
        switch (authType)
        {
            case AuthType.BearerToken:
                // 使用Bearer Token认证
                string authHeader = $"Bearer; {accessToken}";
                Debug.Log($"Authorization头: {authHeader}");
                request.SetRequestHeader("Authorization", authHeader);
                break;

            case AuthType.ApiKey:
                // 使用API Key认证
                request.SetRequestHeader("X-API-Key", secretKey);
                break;

            case AuthType.BasicAuth:
                // 使用Basic认证
                string authValue = Convert.ToBase64String(Encoding.ASCII.GetBytes($"{appId}:{secretKey}"));
                request.SetRequestHeader("Authorization", $"Basic {authValue}");
                break;
        }
    }

    // 设置WebSocket的认证头
    private void SetWebSocketAuthentication(ClientWebSocket client)
    {
        switch (authType)
        {
            case AuthType.BearerToken:
                // 使用Bearer Token认证
                client.Options.SetRequestHeader("Authorization", $"Bearer; {accessToken}");
                break;

            case AuthType.ApiKey:
                // 使用API Key认证
                client.Options.SetRequestHeader("X-API-Key", secretKey);
                break;

            case AuthType.BasicAuth:
                // 使用Basic认证
                string authValue = Convert.ToBase64String(Encoding.ASCII.GetBytes($"{appId}:{secretKey}"));
                client.Options.SetRequestHeader("Authorization", $"Basic {authValue}");
                break;
        }
    }

    // 获取音频类型
    private AudioType GetAudioType(string encoding)
    {
        switch (encoding.ToLower())
        {
            case "mp3": return AudioType.MPEG;
            case "wav": return AudioType.WAV;
            case "ogg": return AudioType.OGGVORBIS;
            default: return AudioType.UNKNOWN;
        }
    }

    // 验证参数
    private bool ValidateParameters()
    {
        if (string.IsNullOrEmpty(appId))
        {
            Debug.LogError("appId不能为空");
            return false;
        }

        if (string.IsNullOrEmpty(accessToken) && authType == AuthType.BearerToken)
        {
            Debug.LogError("accessToken不能为空");
            return false;
        }

        if (string.IsNullOrEmpty(secretKey) && (authType == AuthType.ApiKey || authType == AuthType.BasicAuth))
        {
            Debug.LogError("secretKey不能为空");
            return false;
        }

        if (string.IsNullOrEmpty(voiceType))
        {
            Debug.LogError("voiceType不能为空");
            return false;
        }

        if (string.IsNullOrEmpty(encoding))
        {
            Debug.LogError("encoding不能为空");
            return false;
        }

        if (string.IsNullOrEmpty(httpApiUrl))
        {
            Debug.LogError("httpApiUrl不能为空");
            return false;
        }

        if (string.IsNullOrEmpty(websocketApiUrl))
        {
            Debug.LogError("websocketApiUrl不能为空");
            return false;
        }

        return true;
    }

    // 响应数据模型
    [Serializable]
    private class TTSResponse
    {
        public string reqid;
        public int code;
        public string message;
        public int sequence;
        public string data;
        public Dictionary<string, object> addition;
    }
}
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using UnityEngine.UI;

public class TTSExample : MonoBehaviour
{
    public InputField textInput;
    public Button httpButton;
    public Button websocketButton;
    public AudioSource audioSource;
    public Text statusText;

    private void Start()
    {
        httpButton.onClick.AddListener(OnHTTPButtonClick);
        websocketButton.onClick.AddListener(OnWebSocketButtonClick);

        // 注册事件回调
        TTSManager.Instance.OnAudioReceived += OnAudioReceived;
        TTSManager.Instance.OnError += OnError;
    }

    private void OnDestroy()
    {
        // 取消注册事件回调
        TTSManager.Instance.OnAudioReceived -= OnAudioReceived;
        TTSManager.Instance.OnError -= OnError;
    }

    private void OnHTTPButtonClick()
    {
        if (string.IsNullOrEmpty(textInput.text))
        {
            statusText.text = "请输入文本";
            return;
        }

        statusText.text = "正在请求音频...";
        StartCoroutine(TTSManager.Instance.TextToSpeechHTTP(textInput.text));
    }

    private async void OnWebSocketButtonClick()
    {
        if (string.IsNullOrEmpty(textInput.text))
        {
            statusText.text = "请输入文本";
            return;
        }

        statusText.text = "正在请求音频...";
        await TTSManager.Instance.TextToSpeechWebSocket(textInput.text);
    }

    private void OnAudioReceived(AudioClip clip)
    {
        if (clip != null)
        {
            audioSource.clip = clip;
            audioSource.Play();
            statusText.text = "音频播放中...";
        }
    }

    private void OnError(string errorMessage)
    {
        statusText.text = $"错误: {errorMessage}";
        Debug.LogError(errorMessage);
    }
}


网站公告

今日签到

点亮在社区的每一天
去签到