首先去微软申请以下azure语音的api
不同的语音风格可在参考此处:
https://learn.microsoft.com/zh-cn/azure/ai-services/speech-service/language-support?tabs=tts
pip install azure-cognitiveservices-speech
import azure.cognitiveservices.speech as speechsdk
import os
# 设置你的Azure语音服务的密钥和区域
speech_key = "xxx"
service_region = "eastasia"
def synthesize_speech(text):
output_dir = "./speech"
os.makedirs(output_dir, exist_ok=True)
# 生成文件路径
output_file = os.path.join(output_dir, f"{text}.mp3")
# 创建语音配置对象
speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)
speech_config.speech_synthesis_voice_name = "zh-CN-YunjianNeural" # 选择不同的角色
# 设置输出音频文件
audio_output = speechsdk.audio.AudioOutputConfig(filename=output_file)
# 创建语音合成器对象
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_output)
# 合成语音
result = speech_synthesizer.speak_text_async(text).get()
# 检查结果
if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
print(f"语音合成成功,已保存至 {output_file}")
elif result.reason == speechsdk.ResultReason.Canceled:
cancellation_details = result.cancellation_details
print(f"语音合成失败: {cancellation_details.reason}")
if cancellation_details.reason == speechsdk.CancellationReason.Error:
print(f"错误详情: {cancellation_details.error_details}")
return f"./speech/{text}.mp3"
这样就生成mp3文件。
可用于manim, janim等视频制作