一、创建自己的应用
百度智能云控制台网址:https://console.bce.baidu.com/
1、创建应用
2、获取APIKey和SecretKey
3、Api调试
调试网址:https://console.bce.baidu.com/support/?timestamp=1750317430400#/api?product=AI&project=%E8%AF%AD%E9%9F%B3%E6%8A%80%E6%9C%AF&parent=%E9%89%B4%E6%9D%83%E8%AE%A4%E8%AF%81%E6%9C%BA%E5%88%B6&api=oauth%2F2.0%2Ftoken&method=post
二、在Unity中进行调用
1、相关参数说明
(1)短文本个性化语音生成相关参数
(2)长文本个性化语音生成相关参数
2、完整代码
using Newtonsoft.Json;
using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Text;
using UnityEngine;
using UnityEngine.Networking;
using UnityEngine.UI;
public class TTS : MonoBehaviour
{
#region 相关参数
[Header("鉴权相关参数")]
[SerializeField] private string apiKey = "LFfK6DTaswy6LLtBqvHO86w0";
[SerializeField] private string secretKey = "vj6JmKd7zBylDVGW2WmTNPWl9eKxxZEL";
[SerializeField] private string accessToken = null;
[Space]
[Header("长文本语音合成参数设置")]
[SerializeField] private string format = "mp3-16k"; // 或者 wav
[SerializeField] private int voice = 0; // 语音人:0-女,1-男等
[SerializeField] private string lang = "zh";
[SerializeField] private int speed = 5; // 0~15
[SerializeField] private int pitch = 5; // 0~15
[SerializeField] private int volume = 5; // 0~15
[SerializeField] private int enable_subtitle = 0;
[Space]
[Header("短文本语音合成参数设置")]
[SerializeField] private string cuid = "240a906f2b88794fd0426442c4136a5a57bf5c01";
[SerializeField] private string ctp = "1";
[SerializeField] private string lan = "zh";
[SerializeField] private string spd = "5";
[SerializeField] private string pit = "5";
[SerializeField] private string vol = "10";
[SerializeField] private string per = "1";
[SerializeField] private string aue = "3";
[Space]
[Header("UI界面相关")]
public InputField inputFieldText;
public Button buttonStartTTS;//开始合成按钮
public Button buttonPlay;//播放合成的语音按钮
public AudioSource audioSource;//播放音频组件
#endregion
// Start is called before the first frame update
void Start()
{
//一开始就进行鉴权
StartCoroutine(GetAccessToken());
//语音合成
buttonStartTTS.onClick.AddListener(()=> {
StartTTS(inputFieldText.text, audioSource);
});
//播放语音
buttonPlay.onClick.AddListener(() =>
{
if (audioSource.clip != null)
{
audioSource.Play();
}
});
}
/// <summary>
/// 长短语音合成方法
/// </summary>
/// <param name="text">要合成的文本内容</param>
/// <param name="audioSource">语音组件</param>
public void StartTTS(string text,AudioSource audioSource)
{
if (text.Length<60)
{
print("开始短文本语音合成");
//短文本语音合成
StartCoroutine(ShortTTS(text, response => {
audioSource.clip = response.clip;
print("短文本语音合成结束,请播放");
}));
}
else
{
print("开始长文本语音合成");
//长文本语音合成
StartCoroutine(LongTTS(text, clip=> {
audioSource.clip = clip;
print("长文本语音合成结束,请播放");
}));
}
}
#region 鉴权相关
/// <summary>
/// 鉴权方法
/// </summary>
/// <returns></returns>
/// <summary>
/// 获取百度 AccessToken(已使用 using 自动释放资源)
/// </summary>
public IEnumerator GetAccessToken()
{
string url = "https://aip.baidubce.com/oauth/2.0/token";
WWWForm form = new WWWForm();
form.AddField("grant_type", "client_credentials");
form.AddField("client_id", apiKey);
form.AddField("client_secret", secretKey);
using (UnityWebRequest request = UnityWebRequest.Post(url, form))
{
yield return request.SendWebRequest();
if (request.result == UnityWebRequest.Result.Success)
{
try
{
var tokenResponse = JsonConvert.DeserializeObject<TokenResponse>(request.downloadHandler.text);
accessToken = tokenResponse.access_token;
Debug.Log("✅ 获取语音合成 AccessToken 成功: " + accessToken);
}
catch (Exception ex)
{
Debug.LogError("❌ 语音合成AccessToken 解析失败: " + ex.Message);
}
}
else
{
Debug.LogError("❌ 获取 语音合成AccessToken 失败: " + request.error);
}
}
}
#endregion
#region 短文本语音合成相关
/// <summary>
/// 请求短文本语音合成(MP3格式)
/// </summary>
/// <param name="text">需要合成的文本</param>
/// <param name="callback">返回结果回调</param>
public IEnumerator ShortTTS(string text, Action<TtsResponse> callback)
{
string url = "http://tsn.baidu.com/text2audio";
var param = new Dictionary<string, string>
{
{ "tex", text },
{ "tok", accessToken },
{ "cuid", cuid},
{ "ctp", ctp},
{ "lan", lan},
{ "spd", spd},
{ "pit", pit},
{ "vol", vol},
{ "per", per},
{ "aue", aue} // 固定为 MP3 格式
};
// 构建请求 URL
int i = 0;
foreach (var p in param)
{
url += i != 0 ? "&" : "?";
url += p.Key + "=" + UnityWebRequest.EscapeURL(p.Value);
i++;
}
using (UnityWebRequest www = UnityWebRequestMultimedia.GetAudioClip(url, AudioType.MPEG))
{
//Debug.Log("[TTS] 请求URL: " + www.url);//测试使用
yield return www.SendWebRequest();
if (www.result != UnityWebRequest.Result.Success)
{
Debug.LogError("[TTS] 请求失败: " + www.error);
callback?.Invoke(new TtsResponse
{
error_index = -1,
error_msg = www.error
});
}
else
{
string type = www.GetResponseHeader("Content-Type");
//Debug.Log("[TTS] Content-Type: " + type);//测试使用
if (!string.IsNullOrEmpty(type) && type.Contains("audio"))
{
AudioClip clip = DownloadHandlerAudioClip.GetContent(www);
callback?.Invoke(new TtsResponse { clip = clip });
}
else
{
string errorText = Encoding.UTF8.GetString(www.downloadHandler.data);
Debug.LogError("[TTS] 文本响应错误: " + errorText);
callback?.Invoke(new TtsResponse
{
error_index = -2,
error_msg = errorText
});
}
}
}
}
/// <summary>
/// 返回的语音合成结果
/// </summary>
public class TtsResponse
{
public int error_index;
public string error_msg;
public string sn;
public int idx;
public bool Success => error_index == 0;
public AudioClip clip;
}
#endregion
#region 长文本语音合成相关
/// <summary>
/// 按顺序执行长语音合成对应的方法
/// </summary>
/// <param name="text">需要合成的文本</param>
/// <param name="callback">回调函数,返回合成的clip</param>
/// <returns></returns>
IEnumerator LongTTS(String text, Action<AudioClip> callback)
{
string taskId = null;//语音合成任务创建成功返回的id
//创建语音合成任务
yield return StartCoroutine(CreateTTSTask(text,
TaskId => { taskId = TaskId; },
errorMsg => { Debug.LogError("❌ 合成失败: " + errorMsg); }));
if (taskId != null && accessToken != null)
{
string audioUrl = null;//语音合成任务合成成功返回的语音下载链接
//查询语音合成任务
yield return StartCoroutine(QueryTTSTaskStatus(accessToken, taskId,
AudioAddress => { audioUrl = AudioAddress; },
errorMsg => {
Debug.LogError("❌ 查询失败:" + errorMsg);
}));
//下载语音,并赋值给指定的AudioSource组件
if (audioUrl != null)
{
yield return StartCoroutine(DownloadAudio(audioUrl, clip =>
{
if (clip != null)
{
callback?.Invoke(clip);
}
else
{
Debug.LogError("下载的音频 Clip 为 null");
}
}));
}
}
}
/// <summary>
/// 创建语音合成任务
/// </summary>
/// <returns>TaskId</returns>
public IEnumerator CreateTTSTask(string text, Action<string> onSuccess, Action<string> onError)
{
string url = $"https://aip.baidubce.com/rpc/2.0/tts/v1/create?access_token={accessToken}";
var bodyObj = new
{
text = text,
format = format,
voice = voice,
lang = lang,
speed = speed,
pitch = pitch,
volume = volume,
enable_subtitle = enable_subtitle
};
string jsonBody = JsonConvert.SerializeObject(bodyObj);
using (UnityWebRequest request = new UnityWebRequest(url, "POST"))
{
byte[] bodyRaw = Encoding.UTF8.GetBytes(jsonBody);
request.uploadHandler = new UploadHandlerRaw(bodyRaw);
request.downloadHandler = new DownloadHandlerBuffer();
request.SetRequestHeader("Content-Type", "application/json");
request.SetRequestHeader("Accept", "application/json");
yield return request.SendWebRequest();
if (request.result == UnityWebRequest.Result.Success)
{
string responseText = request.downloadHandler.text;
Debug.Log("✅ 创建语音任务返回:" + responseText);
if (responseText.Contains("task_id"))
{
var response = JsonConvert.DeserializeObject<TTSTaskSuccessResponse>(responseText);
onSuccess?.Invoke(response.TaskId);
}
else if (responseText.Contains("error_code"))
{
var error = JsonConvert.DeserializeObject<TTSTaskErrorResponse>(responseText);
onError?.Invoke(error.ErrorMsg);
}
else
{
onError?.Invoke("无法识别的返回内容");
}
}
else
{
Debug.LogError("❌ 网络请求失败:" + request.error);
onError?.Invoke(request.error);
}
}
}
/// <summary>
/// 语音合成任务查询
/// </summary>
/// <param name="accessToken">accessToken</param>
/// <param name="taskId">合成任务id</param>
/// <param name="onSuccess">合成成功返回音频链接</param>
/// <param name="onError">合成失败返回错误码</param>
/// <returns></returns>
public IEnumerator QueryTTSTaskStatus(string accessToken, string taskId, Action<string> onSuccess, Action<string> onError)
{
string url = $"https://aip.baidubce.com/rpc/2.0/tts/v1/query?access_token={accessToken}";
string jsonBody = JsonConvert.SerializeObject(new { task_ids = new string[] { taskId } });
float delaySeconds = 2f;
while (true)
{
using (UnityWebRequest request = new UnityWebRequest(url, "POST"))
{
request.uploadHandler = new UploadHandlerRaw(Encoding.UTF8.GetBytes(jsonBody));
request.downloadHandler = new DownloadHandlerBuffer();
request.SetRequestHeader("Content-Type", "application/json");
request.SetRequestHeader("Accept", "application/json");
yield return request.SendWebRequest();
if (request.result == UnityWebRequest.Result.Success)
{
string json = request.downloadHandler.text;
var root = JsonConvert.DeserializeObject<TTSQueryResponse>(json);
if (root.TasksInfo != null && root.TasksInfo.Count > 0)
{
var task = root.TasksInfo[0];
switch (task.TaskStatus)
{
case "Success":
if (!string.IsNullOrEmpty(task.TaskResult?.SpeechUrl))
onSuccess?.Invoke(task.TaskResult.SpeechUrl);
else
onError?.Invoke("合成成功但未返回语音地址");
yield break;
case "Failure":
onError?.Invoke(task.TaskResult?.ErrMsg ?? "未知错误");
yield break;
case "Running":
Debug.Log("🎙 正在合成...");
yield return new WaitForSeconds(delaySeconds);
continue;
default:
onError?.Invoke("未知状态:" + task.TaskStatus);
yield break;
}
}
else
{
onError?.Invoke("未找到任务信息");
yield break;
}
}
else
{
onError?.Invoke("网络错误:" + request.error);
yield break;
}
}
}
}
/// <summary>
/// 下载音频,并将音频赋给指定的AudioSource
/// </summary>
/// <param name="url">音频下载链接</param>
/// <param name="audioSource">要赋给的音频播放组件</param>
/// <returns></returns>
public IEnumerator DownloadAudio(string url, Action<AudioClip> onComplete)
{
using (UnityWebRequest request = UnityWebRequestMultimedia.GetAudioClip(url, AudioType.MPEG))
{
yield return request.SendWebRequest();
if (request.result == UnityWebRequest.Result.Success)
{
AudioClip clip = DownloadHandlerAudioClip.GetContent(request);
if (clip != null)
{
Debug.Log("✅ 音频合成结束,等待播放");
onComplete?.Invoke(clip); // ✅ 返回 clip
}
else
{
Debug.LogError("❌ 无法解析音频 Clip");
onComplete?.Invoke(null);
}
}
else
{
Debug.LogError("❌ 下载音频失败:" + request.error);
onComplete?.Invoke(null);
}
}
}
[Serializable]
public class TokenResponse
{
/// <summary>
/// 鉴权返回的数据JSON结构
/// </summary>
public string access_token;
public int expires_in;
}
[Serializable]
public class TTSTaskSuccessResponse
{
/// <summary>
/// 创建语音合成成功返回的数据JSON结构
/// </summary>
[JsonProperty("log_id")]
public long LogId { get; set; }
[JsonProperty("task_id")]
public string TaskId { get; set; }
[JsonProperty("task_status")]
public string TaskStatus { get; set; } // "Running"
}
[Serializable]
public class TTSTaskErrorResponse
{
/// <summary>
/// 创建语音合成成功返回的数据JSON结构
/// </summary>
[JsonProperty("error_code")]
public int ErrorCode { get; set; }
[JsonProperty("error_msg")]
public string ErrorMsg { get; set; }
[JsonProperty("log_id")]
public long LogId { get; set; }
}
[Serializable]
public class TTSQueryResponse
{
[JsonProperty("log_id")]
public long LogId { get; set; }
[JsonProperty("tasks_info")]
public List<TTSQueryTaskInfo> TasksInfo { get; set; }
}
[Serializable]
public class TTSQueryTaskInfo
{
[JsonProperty("task_id")]
public string TaskId { get; set; }
[JsonProperty("task_status")]
public string TaskStatus { get; set; }
[JsonProperty("task_result")]
public TTSQueryTaskResult TaskResult { get; set; }
}
[Serializable]
public class TTSQueryTaskResult
{
[JsonProperty("speech_url")]
public string SpeechUrl { get; set; }
[JsonProperty("err_no")]
public int ErrNo { get; set; }
[JsonProperty("err_msg")]
public string ErrMsg { get; set; }
[JsonProperty("sn")]
public string Sn { get; set; }
}
#endregion
}