目录
重构llm_handler_openai_compatible代码
简介
完美集成了多模态+知识库+数字人一站式体验
Open Avatar Chat是阿里开源的模块化的实时数字人对话系统,支持在单台电脑上运行完整的功能。
Open Avatar Chat支持低延迟的实时对话(平均响应延迟约2.2秒),兼容多模态语言模型,包括文本、音频和视频等多种交互方式。系统基于模块化设计,用户根据需求灵活替换组件,实现不同的功能组合。为开发者和研究人员提供了高效、灵活的数字人对话解决方案。
整合后的效果
智能体调用日志
智能体配置
OpenAvatarChat的整合代码
在配置文件里LLM_Bailian,添加dify配置
dify_chat_messages: "https://xxxxxxx/v1/chat-messages" #dify消息推送接口
dify_code: "xxxxxx" 秘钥
dify_upload: "https://xxxxxxx/v1/files/upload" dify图片上传接口
重构llm代码
在LLMConfig配置类中,添加
dify_chat_messages: str = Field(default=None)
dify_code: str = Field(default=None)
dify_upload: str = Field(default=None)
在LLMContext上下文类的init方法中,添加
self.conversation_id = None # 这个是dify的会话id字段,保障dify的多轮对话
self.dify_chat_messages = None
self.dify_code = None
self.dify_upload = None
最后就要处理核心方法了 在handle中改成dify调用
output_definition = output_definitions.get(ChatDataType.AVATAR_TEXT).definition
context = cast(LLMContext, context)
text = None
if inputs.type == ChatDataType.CAMERA_VIDEO and context.enable_video_input:
context.current_image = inputs.data.get_main_data()
return
elif inputs.type == ChatDataType.HUMAN_TEXT:
text = inputs.data.get_main_data()
else:
return
speech_id = inputs.data.get_meta("speech_id")
if (speech_id is None):
speech_id = context.session_id
if text is not None:
context.input_texts += text
text_end = inputs.data.get_meta("human_text_end", False)
if not text_end:
return
chat_text = context.input_texts
chat_text = re.sub(r"<\|.*?\|>", "", chat_text)
if len(chat_text) < 1:
return
logger.info(f'llm input {context.model_name} {chat_text} ')
current_content = context.history.generate_next_messages(chat_text,
[context.current_image] if context.current_image is not None else [])
#logger.info(f'llm input {context.model_name} {current_content} ')
# completion = context.client.chat.completions.create(
# model=context.model_name,
# messages=[
# context.system_prompt,
# ] + current_content,
# stream=True,
# stream_options={"include_usage": True}
# )
request_data = {
"inputs": {},
"query": chat_text,
"response_mode": "streaming",
"conversation_id": context.conversation_id or "",
"user": "user",
"files": []
}
if context.current_image is not None:
try:
for image in [context.current_image]:
import base64
if isinstance(image, bytes):
binary_image = image
else:
base64image = ImageUtils.format_image(image)
if isinstance(base64image, str) and base64image.startswith('data:image'):
image_data = base64image.split(',')[1]
binary_image = base64.b64decode(image_data)
files = {
'file': ('image.jpg', binary_image, 'image/jpeg')
}
data = {
'user': 'user'
}
upload_url = context.dify_upload
try:
upload_response = requests.post(
upload_url,
headers={
'Authorization': f'Bearer {context.dify_code}'
},
files=files,
data=data,
timeout=(30, 120) # Add timeout
)
if upload_response.status_code in [200, 201]:
file_info = upload_response.json()
request_data["files"].append({
"type": "image",
"transfer_method": "local_file",
"upload_file_id": file_info['id']
})
logger.info(f"upload image. Status code: {upload_response.status_code}")
else:
logger.error(f"Failed to upload image. Status code: {upload_response.status_code}")
logger.error(f"Response: {upload_response.text}")
except requests.exceptions.RequestException as e:
logger.error(f"Error uploading image: {str(e)}")
except Exception as e:
logger.error(f"Unexpected error handling image upload: {str(e)}")
# Continue with text-only request if image upload fails
logger.info(f"Sending chat message request with data: {json.dumps(request_data, ensure_ascii=False)}")
try:
response = requests.post(
context.dify_chat_messages,
headers={
'Authorization': f'Bearer {context.dify_code}',
'Content-Type': 'application/json'
},
json=request_data,
stream=True
)
logger.info(f"Chat message response received. Status code: {response.status_code}")
if response.status_code != 200:
logger.error(f"Chat message request failed. Response: {response.text}")
except requests.exceptions.RequestException as e:
logger.error(f"Failed to send chat message: {str(e)}")
return
context.current_image = None
context.input_texts = ''
context.output_texts = ''
# for chunk in completion:
# if (chunk and chunk.choices and chunk.choices[0] and chunk.choices[0].delta.content):
# output_text = chunk.choices[0].delta.content
# context.output_texts += output_text
# logger.info(output_text)
# # 生成输出数据包
# output = DataBundle(output_definition)
# output.set_main_data(output_text)
# output.add_meta("avatar_text_end", False)
# output.add_meta("speech_id", speech_id)
# yield output
for line in response.iter_lines():
if line:
try:
line_str = line.decode('utf-8')
if line_str.startswith('data: '):
line_str = line_str[6:]
json_response = json.loads(line_str)
if json_response.get('event') == 'message':
output_text = json_response.get('answer', '')
if output_text:
context.output_texts += output_text
logger.info(f"Received message: {output_text}")
output = DataBundle(output_definition)
output.set_main_data(output_text)
output.add_meta("avatar_text_end", False)
output.add_meta("speech_id", speech_id)
yield output
elif json_response.get('event') == 'message_end':
logger.info("Message stream ended")
context.conversation_id = json_response.get('conversation_id')
if 'metadata' in json_response:
logger.info(f"Message metadata: {json_response['metadata']}")
except json.JSONDecodeError as e:
logger.error(f"Failed to parse JSON: {e}")
continue
except Exception as e:
logger.error(f"Error processing message: {e}")
continue
context.history.add_message(HistoryMessage(role="avatar", content=context.output_texts))
context.output_texts = ''
logger.info('avatar text end')
end_output = DataBundle(output_definition)
end_output.set_main_data('')
end_output.add_meta("avatar_text_end", True)
end_output.add_meta("speech_id", speech_id)
yield end_output
最后
有dify个性化定制需求,可以联系联系