Skip to content

合并chat和chat_with_function_calling #1366

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion main/manager-web/src/components/HeaderBar.vue
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
字典管理
</el-dropdown-item>
<el-dropdown-item @click.native="goProviderManagement">
供应器管理
字段管理
</el-dropdown-item>
<el-dropdown-item @click.native="goServerSideManagement">
服务端管理
Expand Down
2 changes: 1 addition & 1 deletion main/manager-web/src/views/ProviderManagement.vue
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<HeaderBar />

<div class="operation-bar">
<h2 class="page-title">供应器管理</h2>
<h2 class="page-title">字段管理</h2>
<div class="right-operations">
<el-dropdown trigger="click" @command="handleSelectModelType" @visible-change="handleDropdownVisibleChange">
<el-button class="category-btn">
Expand Down
203 changes: 64 additions & 139 deletions main/xiaozhi-server/core/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
from typing import Dict, Any
from plugins_func.loadplugins import auto_import_modules
from config.logger import setup_logging
from config.config_loader import get_project_dir
from core.utils import p3
from core.utils.dialogue import Message, Dialogue
from core.handle.textHandle import handleTextMessage
from core.utils.util import (
Expand Down Expand Up @@ -506,106 +508,20 @@ def change_system_prompt(self, prompt):
# 更新系统prompt至上下文
self.dialogue.update_system_message(self.prompt)

def chat(self, query):

self.dialogue.put(Message(role="user", content=query))

response_message = []
processed_chars = 0 # 跟踪已处理的字符位置
try:
# 使用带记忆的对话
memory_str = None
if self.memory is not None:
future = asyncio.run_coroutine_threadsafe(
self.memory.query_memory(query), self.loop
)
memory_str = future.result()

self.logger.bind(tag=TAG).debug(f"记忆内容: {memory_str}")
llm_responses = self.llm.response(
self.session_id, self.dialogue.get_llm_dialogue_with_memory(memory_str)
)
except Exception as e:
self.logger.bind(tag=TAG).error(f"LLM 处理出错 {query}: {e}")
return None

self.llm_finish_task = False
text_index = 0
for content in llm_responses:
response_message.append(content)
if self.client_abort:
break

# 合并当前全部文本并处理未分割部分
full_text = "".join(response_message)
current_text = full_text[processed_chars:] # 从未处理的位置开始

# 查找最后一个有效标点
punctuations = ("。", ".", "?", "?", "!", "!", ";", ";", ":")
last_punct_pos = -1
number_flag = True
for punct in punctuations:
pos = current_text.rfind(punct)
prev_char = current_text[pos - 1] if pos - 1 >= 0 else ""
# 如果.前面是数字统一判断为小数
if prev_char.isdigit() and punct == ".":
number_flag = False
if pos > last_punct_pos and number_flag:
last_punct_pos = pos

# 找到分割点则处理
if last_punct_pos != -1:
segment_text_raw = current_text[: last_punct_pos + 1]
segment_text = get_string_no_punctuation_or_emoji(segment_text_raw)
if segment_text:
# 强制设置空字符,测试TTS出错返回语音的健壮性
# if text_index % 2 == 0:
# segment_text = " "
text_index += 1
self.recode_first_last_text(segment_text, text_index)
future = self.executor.submit(
self.speak_and_play, segment_text, text_index
)
self.tts_queue.put((future, text_index))
processed_chars += len(segment_text_raw) # 更新已处理字符位置

# 处理最后剩余的文本
full_text = "".join(response_message)
remaining_text = full_text[processed_chars:]
if remaining_text:
segment_text = get_string_no_punctuation_or_emoji(remaining_text)
if segment_text:
text_index += 1
self.recode_first_last_text(segment_text, text_index)
future = self.executor.submit(
self.speak_and_play, segment_text, text_index
)
self.tts_queue.put((future, text_index))

self.llm_finish_task = True
self.dialogue.put(Message(role="assistant", content="".join(response_message)))
self.logger.bind(tag=TAG).debug(
json.dumps(self.dialogue.get_llm_dialogue(), indent=4, ensure_ascii=False)
)
return True

def chat_with_function_calling(self, query, tool_call=False):
self.logger.bind(tag=TAG).debug(f"Chat with function calling start: {query}")
"""Chat with function calling for intent detection using streaming"""
def chat(self, query, tool_call=False):
self.logger.bind(tag=TAG).debug(f"Chat: {query}")

if not tool_call:
self.dialogue.put(Message(role="user", content=query))

# Define intent functions
functions = None
if hasattr(self, "func_handler"):
if self.intent_type == "function_call" and hasattr(self, "func_handler"):
functions = self.func_handler.get_functions()
response_message = []
processed_chars = 0 # 跟踪已处理的字符位置

try:
start_time = time.time()

# 使用带记忆的对话
memory_str = None
if self.memory is not None:
Expand All @@ -614,14 +530,18 @@ def chat_with_function_calling(self, query, tool_call=False):
)
memory_str = future.result()

# self.logger.bind(tag=TAG).info(f"对话记录: {self.dialogue.get_llm_dialogue_with_memory(memory_str)}")

# 使用支持functions的streaming接口
llm_responses = self.llm.response_with_functions(
self.session_id,
self.dialogue.get_llm_dialogue_with_memory(memory_str),
functions=functions,
)
if functions is not None:
# 使用支持functions的streaming接口
llm_responses = self.llm.response_with_functions(
self.session_id,
self.dialogue.get_llm_dialogue_with_memory(memory_str),
functions=functions,
)
else:
llm_responses = self.llm.response(
self.session_id,
self.dialogue.get_llm_dialogue_with_memory(memory_str),
)
except Exception as e:
self.logger.bind(tag=TAG).error(f"LLM 处理出错 {query}: {e}")
return None
Expand All @@ -637,27 +557,28 @@ def chat_with_function_calling(self, query, tool_call=False):
content_arguments = ""

for response in llm_responses:
content, tools_call = response

if "content" in response:
content = response["content"]
tools_call = None
if content is not None and len(content) > 0:
content_arguments += content

if not tool_call_flag and content_arguments.startswith("<tool_call>"):
# print("content_arguments", content_arguments)
tool_call_flag = True

if tools_call is not None:
tool_call_flag = True
if tools_call[0].id is not None:
function_id = tools_call[0].id
if tools_call[0].function.name is not None:
function_name = tools_call[0].function.name
if tools_call[0].function.arguments is not None:
function_arguments += tools_call[0].function.arguments

if self.intent_type == "function_call":
content, tools_call = response
if "content" in response:
content = response["content"]
tools_call = None
if content is not None and len(content) > 0:
content_arguments += content

if not tool_call_flag and content_arguments.startswith("<tool_call>"):
# print("content_arguments", content_arguments)
tool_call_flag = True

if tools_call is not None:
tool_call_flag = True
if tools_call[0].id is not None:
function_id = tools_call[0].id
if tools_call[0].function.name is not None:
function_name = tools_call[0].function.name
if tools_call[0].function.arguments is not None:
function_arguments += tools_call[0].function.arguments
else:
content = response
if content is not None and len(content) > 0:
if not tool_call_flag:
response_message.append(content)
Expand Down Expand Up @@ -696,7 +617,7 @@ def chat_with_function_calling(self, query, tool_call=False):
text_index += 1
self.recode_first_last_text(segment_text, text_index)
future = self.executor.submit(
self.speak_and_play, segment_text, text_index
self.speak_and_play, None, segment_text, text_index
)
self.tts_queue.put((future, text_index))
# 更新已处理字符位置
Expand Down Expand Up @@ -755,7 +676,7 @@ def chat_with_function_calling(self, query, tool_call=False):
text_index += 1
self.recode_first_last_text(segment_text, text_index)
future = self.executor.submit(
self.speak_and_play, segment_text, text_index
self.speak_and_play, None, segment_text, text_index
)
self.tts_queue.put((future, text_index))

Expand Down Expand Up @@ -818,7 +739,7 @@ def _handle_function_result(self, result, function_call_data, text_index):
if result.action == Action.RESPONSE: # 直接回复前端
text = result.response
self.recode_first_last_text(text, text_index)
future = self.executor.submit(self.speak_and_play, text, text_index)
future = self.executor.submit(self.speak_and_play, None, text, text_index)
self.tts_queue.put((future, text_index))
self.dialogue.put(Message(role="assistant", content=text))
elif result.action == Action.REQLLM: # 调用函数后再请求llm生成回复
Expand Down Expand Up @@ -853,11 +774,11 @@ def _handle_function_result(self, result, function_call_data, text_index):
content=text,
)
)
self.chat_with_function_calling(text, tool_call=True)
self.chat(text, tool_call=True)
elif result.action == Action.NOTFOUND or result.action == Action.ERROR:
text = result.result
self.recode_first_last_text(text, text_index)
future = self.executor.submit(self.speak_and_play, text, text_index)
future = self.executor.submit(self.speak_and_play, None, text, text_index)
self.tts_queue.put((future, text_index))
self.dialogue.put(Message(role="assistant", content=text))
else:
Expand All @@ -884,11 +805,7 @@ def _tts_priority_thread(self):
self.logger.bind(tag=TAG).debug("正在处理TTS任务...")
tts_timeout = int(self.config.get("tts_timeout", 10))
tts_file, text, _ = future.result(timeout=tts_timeout)
if text is None or len(text) <= 0:
self.logger.bind(tag=TAG).error(
f"TTS出错:{text_index}: tts text is empty"
)
elif tts_file is None:
if tts_file is None:
self.logger.bind(tag=TAG).error(
f"TTS出错: file is empty: {text_index}: {text}"
)
Expand All @@ -897,12 +814,16 @@ def _tts_priority_thread(self):
f"TTS生成:文件路径: {tts_file}"
)
if os.path.exists(tts_file):
if self.audio_format == "pcm":
if tts_file.endswith(".p3"):
audio_datas, _ = p3.decode_opus_from_file(tts_file)
elif self.audio_format == "pcm":
audio_datas, _ = self.tts.audio_to_pcm_data(tts_file)
else:
audio_datas, _ = self.tts.audio_to_opus_data(tts_file)
# 在这里上报TTS数据
enqueue_tts_report(self, text, audio_datas)
enqueue_tts_report(
self, tts_file if text is None else text, audio_datas
)
else:
self.logger.bind(tag=TAG).error(
f"TTS出错:文件不存在{tts_file}"
Expand All @@ -918,6 +839,7 @@ def _tts_priority_thread(self):
self.tts.delete_audio_file
and tts_file is not None
and os.path.exists(tts_file)
and tts_file.startswith(self.tts.output_file)
):
os.remove(tts_file)
except Exception as e:
Expand Down Expand Up @@ -984,18 +906,21 @@ def _report_worker(self):

self.logger.bind(tag=TAG).info("聊天记录上报线程已退出")

def speak_and_play(self, text, text_index=0):
if text is None or len(text) <= 0:
self.logger.bind(tag=TAG).info(f"无需tts转换,query为空,{text}")
return None, text, text_index
tts_file = self.tts.to_tts(text)
def speak_and_play(self, file_path, content, text_index=0):
if file_path is not None:
self.logger.bind(tag=TAG).info(f"无需tts转换: 从文件播放,{file_path}")
return file_path, content, text_index
if content is None or len(content) <= 0:
self.logger.bind(tag=TAG).info(f"无需tts转换,query为空,{content}")
return None, content, text_index
tts_file = self.tts.to_tts(content)
if tts_file is None:
self.logger.bind(tag=TAG).error(f"tts转换失败,{text}")
return None, text, text_index
self.logger.bind(tag=TAG).error(f"tts转换失败,{content}")
return None, content, text_index
self.logger.bind(tag=TAG).debug(f"TTS 文件生成完毕: {tts_file}")
if self.max_output_size > 0:
add_device_output(self.headers.get("device-id"), len(text))
return tts_file, text, text_index
add_device_output(self.headers.get("device-id"), len(content))
return tts_file, content, text_index

def clearSpeakStatus(self):
self.logger.bind(tag=TAG).debug(f"清除服务端讲话状态")
Expand Down
12 changes: 6 additions & 6 deletions main/xiaozhi-server/core/handle/intentHandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,29 +109,29 @@ def process_function_call():
if result.action == Action.RESPONSE: # 直接回复前端
text = result.response
if text is not None:
speak_and_play(conn, text)
speak_txt(conn, text)
elif result.action == Action.REQLLM: # 调用函数后再请求llm生成回复
text = result.result
conn.dialogue.put(Message(role="tool", content=text))
llm_result = conn.intent.replyResult(text, original_text)
if llm_result is None:
llm_result = text
speak_and_play(conn, llm_result)
speak_txt(conn, llm_result)
elif (
result.action == Action.NOTFOUND
or result.action == Action.ERROR
):
text = result.result
if text is not None:
speak_and_play(conn, text)
speak_txt(conn, text)
elif function_name != "play_music":
# For backward compatibility with original code
# 获取当前最新的文本索引
text = result.response
if text is None:
text = result.result
if text is not None:
speak_and_play(conn, text)
speak_txt(conn, text)

# 将函数执行放在线程池中
conn.executor.submit(process_function_call)
Expand All @@ -142,12 +142,12 @@ def process_function_call():
return False


def speak_and_play(conn, text):
def speak_txt(conn, text):
text_index = (
conn.tts_last_text_index + 1 if hasattr(conn, "tts_last_text_index") else 0
)
conn.recode_first_last_text(text, text_index)
future = conn.executor.submit(conn.speak_and_play, text, text_index)
future = conn.executor.submit(conn.speak_and_play, None, text, text_index)
conn.llm_finish_task = True
conn.tts_queue.put((future, text_index))
conn.dialogue.put(Message(role="assistant", content=text))
10 changes: 4 additions & 6 deletions main/xiaozhi-server/core/handle/receiveAudioHandle.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ async def handleAudioMessage(conn, audio):
if len(conn.asr_audio) < 15:
conn.asr_server_receive = True
else:
raw_text, _ = await conn.asr.speech_to_text(conn.asr_audio, conn.session_id) # 确保ASR模块返回原始文本
raw_text, _ = await conn.asr.speech_to_text(
conn.asr_audio, conn.session_id
) # 确保ASR模块返回原始文本
conn.logger.bind(tag=TAG).info(f"识别文本: {raw_text}")
text_len, _ = remove_punctuation_and_length(raw_text)
if text_len > 0:
Expand Down Expand Up @@ -76,11 +78,7 @@ async def startToChat(conn, text):

# 意图未被处理,继续常规聊天流程
await send_stt_message(conn, text)
if conn.intent_type == "function_call":
# 使用支持function calling的聊天方法
conn.executor.submit(conn.chat_with_function_calling, text)
else:
conn.executor.submit(conn.chat, text)
conn.executor.submit(conn.chat, text)


async def no_voice_close_connect(conn):
Expand Down
Loading