-
供应器管理
+
字段管理
diff --git a/main/xiaozhi-server/core/connection.py b/main/xiaozhi-server/core/connection.py
index 74b612fe9..ea0be02d3 100644
--- a/main/xiaozhi-server/core/connection.py
+++ b/main/xiaozhi-server/core/connection.py
@@ -14,6 +14,8 @@
from typing import Dict, Any
from plugins_func.loadplugins import auto_import_modules
from config.logger import setup_logging
+from config.config_loader import get_project_dir
+from core.utils import p3
from core.utils.dialogue import Message, Dialogue
from core.handle.textHandle import handleTextMessage
from core.utils.util import (
@@ -506,106 +508,20 @@ def change_system_prompt(self, prompt):
# 更新系统prompt至上下文
self.dialogue.update_system_message(self.prompt)
- def chat(self, query):
-
- self.dialogue.put(Message(role="user", content=query))
-
- response_message = []
- processed_chars = 0 # 跟踪已处理的字符位置
- try:
- # 使用带记忆的对话
- memory_str = None
- if self.memory is not None:
- future = asyncio.run_coroutine_threadsafe(
- self.memory.query_memory(query), self.loop
- )
- memory_str = future.result()
-
- self.logger.bind(tag=TAG).debug(f"记忆内容: {memory_str}")
- llm_responses = self.llm.response(
- self.session_id, self.dialogue.get_llm_dialogue_with_memory(memory_str)
- )
- except Exception as e:
- self.logger.bind(tag=TAG).error(f"LLM 处理出错 {query}: {e}")
- return None
-
- self.llm_finish_task = False
- text_index = 0
- for content in llm_responses:
- response_message.append(content)
- if self.client_abort:
- break
-
- # 合并当前全部文本并处理未分割部分
- full_text = "".join(response_message)
- current_text = full_text[processed_chars:] # 从未处理的位置开始
-
- # 查找最后一个有效标点
- punctuations = ("。", ".", "?", "?", "!", "!", ";", ";", ":")
- last_punct_pos = -1
- number_flag = True
- for punct in punctuations:
- pos = current_text.rfind(punct)
- prev_char = current_text[pos - 1] if pos - 1 >= 0 else ""
- # 如果.前面是数字统一判断为小数
- if prev_char.isdigit() and punct == ".":
- number_flag = False
- if pos > last_punct_pos and number_flag:
- last_punct_pos = pos
-
- # 找到分割点则处理
- if last_punct_pos != -1:
- segment_text_raw = current_text[: last_punct_pos + 1]
- segment_text = get_string_no_punctuation_or_emoji(segment_text_raw)
- if segment_text:
- # 强制设置空字符,测试TTS出错返回语音的健壮性
- # if text_index % 2 == 0:
- # segment_text = " "
- text_index += 1
- self.recode_first_last_text(segment_text, text_index)
- future = self.executor.submit(
- self.speak_and_play, segment_text, text_index
- )
- self.tts_queue.put((future, text_index))
- processed_chars += len(segment_text_raw) # 更新已处理字符位置
-
- # 处理最后剩余的文本
- full_text = "".join(response_message)
- remaining_text = full_text[processed_chars:]
- if remaining_text:
- segment_text = get_string_no_punctuation_or_emoji(remaining_text)
- if segment_text:
- text_index += 1
- self.recode_first_last_text(segment_text, text_index)
- future = self.executor.submit(
- self.speak_and_play, segment_text, text_index
- )
- self.tts_queue.put((future, text_index))
-
- self.llm_finish_task = True
- self.dialogue.put(Message(role="assistant", content="".join(response_message)))
- self.logger.bind(tag=TAG).debug(
- json.dumps(self.dialogue.get_llm_dialogue(), indent=4, ensure_ascii=False)
- )
- return True
-
- def chat_with_function_calling(self, query, tool_call=False):
- self.logger.bind(tag=TAG).debug(f"Chat with function calling start: {query}")
- """Chat with function calling for intent detection using streaming"""
+ def chat(self, query, tool_call=False):
+ self.logger.bind(tag=TAG).debug(f"Chat: {query}")
if not tool_call:
self.dialogue.put(Message(role="user", content=query))
# Define intent functions
functions = None
- if hasattr(self, "func_handler"):
+ if self.intent_type == "function_call" and hasattr(self, "func_handler"):
functions = self.func_handler.get_functions()
response_message = []
processed_chars = 0 # 跟踪已处理的字符位置
try:
- start_time = time.time()
-
# 使用带记忆的对话
memory_str = None
if self.memory is not None:
@@ -614,14 +530,18 @@ def chat_with_function_calling(self, query, tool_call=False):
)
memory_str = future.result()
- # self.logger.bind(tag=TAG).info(f"对话记录: {self.dialogue.get_llm_dialogue_with_memory(memory_str)}")
-
- # 使用支持functions的streaming接口
- llm_responses = self.llm.response_with_functions(
- self.session_id,
- self.dialogue.get_llm_dialogue_with_memory(memory_str),
- functions=functions,
- )
+ if functions is not None:
+ # 使用支持functions的streaming接口
+ llm_responses = self.llm.response_with_functions(
+ self.session_id,
+ self.dialogue.get_llm_dialogue_with_memory(memory_str),
+ functions=functions,
+ )
+ else:
+ llm_responses = self.llm.response(
+ self.session_id,
+ self.dialogue.get_llm_dialogue_with_memory(memory_str),
+ )
except Exception as e:
self.logger.bind(tag=TAG).error(f"LLM 处理出错 {query}: {e}")
return None
@@ -637,27 +557,28 @@ def chat_with_function_calling(self, query, tool_call=False):
content_arguments = ""
for response in llm_responses:
- content, tools_call = response
-
- if "content" in response:
- content = response["content"]
- tools_call = None
- if content is not None and len(content) > 0:
- content_arguments += content
-
- if not tool_call_flag and content_arguments.startswith(""):
- # print("content_arguments", content_arguments)
- tool_call_flag = True
-
- if tools_call is not None:
- tool_call_flag = True
- if tools_call[0].id is not None:
- function_id = tools_call[0].id
- if tools_call[0].function.name is not None:
- function_name = tools_call[0].function.name
- if tools_call[0].function.arguments is not None:
- function_arguments += tools_call[0].function.arguments
-
+ if self.intent_type == "function_call":
+ content, tools_call = response
+ if "content" in response:
+ content = response["content"]
+ tools_call = None
+ if content is not None and len(content) > 0:
+ content_arguments += content
+
+ if not tool_call_flag and content_arguments.startswith(""):
+ # print("content_arguments", content_arguments)
+ tool_call_flag = True
+
+ if tools_call is not None:
+ tool_call_flag = True
+ if tools_call[0].id is not None:
+ function_id = tools_call[0].id
+ if tools_call[0].function.name is not None:
+ function_name = tools_call[0].function.name
+ if tools_call[0].function.arguments is not None:
+ function_arguments += tools_call[0].function.arguments
+ else:
+ content = response
if content is not None and len(content) > 0:
if not tool_call_flag:
response_message.append(content)
@@ -696,7 +617,7 @@ def chat_with_function_calling(self, query, tool_call=False):
text_index += 1
self.recode_first_last_text(segment_text, text_index)
future = self.executor.submit(
- self.speak_and_play, segment_text, text_index
+ self.speak_and_play, None, segment_text, text_index
)
self.tts_queue.put((future, text_index))
# 更新已处理字符位置
@@ -755,7 +676,7 @@ def chat_with_function_calling(self, query, tool_call=False):
text_index += 1
self.recode_first_last_text(segment_text, text_index)
future = self.executor.submit(
- self.speak_and_play, segment_text, text_index
+ self.speak_and_play, None, segment_text, text_index
)
self.tts_queue.put((future, text_index))
@@ -818,7 +739,7 @@ def _handle_function_result(self, result, function_call_data, text_index):
if result.action == Action.RESPONSE: # 直接回复前端
text = result.response
self.recode_first_last_text(text, text_index)
- future = self.executor.submit(self.speak_and_play, text, text_index)
+ future = self.executor.submit(self.speak_and_play, None, text, text_index)
self.tts_queue.put((future, text_index))
self.dialogue.put(Message(role="assistant", content=text))
elif result.action == Action.REQLLM: # 调用函数后再请求llm生成回复
@@ -853,11 +774,11 @@ def _handle_function_result(self, result, function_call_data, text_index):
content=text,
)
)
- self.chat_with_function_calling(text, tool_call=True)
+ self.chat(text, tool_call=True)
elif result.action == Action.NOTFOUND or result.action == Action.ERROR:
text = result.result
self.recode_first_last_text(text, text_index)
- future = self.executor.submit(self.speak_and_play, text, text_index)
+ future = self.executor.submit(self.speak_and_play, None, text, text_index)
self.tts_queue.put((future, text_index))
self.dialogue.put(Message(role="assistant", content=text))
else:
@@ -884,11 +805,7 @@ def _tts_priority_thread(self):
self.logger.bind(tag=TAG).debug("正在处理TTS任务...")
tts_timeout = int(self.config.get("tts_timeout", 10))
tts_file, text, _ = future.result(timeout=tts_timeout)
- if text is None or len(text) <= 0:
- self.logger.bind(tag=TAG).error(
- f"TTS出错:{text_index}: tts text is empty"
- )
- elif tts_file is None:
+ if tts_file is None:
self.logger.bind(tag=TAG).error(
f"TTS出错: file is empty: {text_index}: {text}"
)
@@ -897,12 +814,16 @@ def _tts_priority_thread(self):
f"TTS生成:文件路径: {tts_file}"
)
if os.path.exists(tts_file):
- if self.audio_format == "pcm":
+ if tts_file.endswith(".p3"):
+ audio_datas, _ = p3.decode_opus_from_file(tts_file)
+ elif self.audio_format == "pcm":
audio_datas, _ = self.tts.audio_to_pcm_data(tts_file)
else:
audio_datas, _ = self.tts.audio_to_opus_data(tts_file)
# 在这里上报TTS数据
- enqueue_tts_report(self, text, audio_datas)
+ enqueue_tts_report(
+ self, tts_file if text is None else text, audio_datas
+ )
else:
self.logger.bind(tag=TAG).error(
f"TTS出错:文件不存在{tts_file}"
@@ -918,6 +839,7 @@ def _tts_priority_thread(self):
self.tts.delete_audio_file
and tts_file is not None
and os.path.exists(tts_file)
+ and tts_file.startswith(self.tts.output_file)
):
os.remove(tts_file)
except Exception as e:
@@ -984,18 +906,21 @@ def _report_worker(self):
self.logger.bind(tag=TAG).info("聊天记录上报线程已退出")
- def speak_and_play(self, text, text_index=0):
- if text is None or len(text) <= 0:
- self.logger.bind(tag=TAG).info(f"无需tts转换,query为空,{text}")
- return None, text, text_index
- tts_file = self.tts.to_tts(text)
+ def speak_and_play(self, file_path, content, text_index=0):
+ if file_path is not None:
+ self.logger.bind(tag=TAG).info(f"无需tts转换: 从文件播放,{file_path}")
+ return file_path, content, text_index
+ if content is None or len(content) <= 0:
+ self.logger.bind(tag=TAG).info(f"无需tts转换,query为空,{content}")
+ return None, content, text_index
+ tts_file = self.tts.to_tts(content)
if tts_file is None:
- self.logger.bind(tag=TAG).error(f"tts转换失败,{text}")
- return None, text, text_index
+ self.logger.bind(tag=TAG).error(f"tts转换失败,{content}")
+ return None, content, text_index
self.logger.bind(tag=TAG).debug(f"TTS 文件生成完毕: {tts_file}")
if self.max_output_size > 0:
- add_device_output(self.headers.get("device-id"), len(text))
- return tts_file, text, text_index
+ add_device_output(self.headers.get("device-id"), len(content))
+ return tts_file, content, text_index
def clearSpeakStatus(self):
self.logger.bind(tag=TAG).debug(f"清除服务端讲话状态")
diff --git a/main/xiaozhi-server/core/handle/intentHandler.py b/main/xiaozhi-server/core/handle/intentHandler.py
index 495da96f5..e61b1265c 100644
--- a/main/xiaozhi-server/core/handle/intentHandler.py
+++ b/main/xiaozhi-server/core/handle/intentHandler.py
@@ -109,21 +109,21 @@ def process_function_call():
if result.action == Action.RESPONSE: # 直接回复前端
text = result.response
if text is not None:
- speak_and_play(conn, text)
+ speak_txt(conn, text)
elif result.action == Action.REQLLM: # 调用函数后再请求llm生成回复
text = result.result
conn.dialogue.put(Message(role="tool", content=text))
llm_result = conn.intent.replyResult(text, original_text)
if llm_result is None:
llm_result = text
- speak_and_play(conn, llm_result)
+ speak_txt(conn, llm_result)
elif (
result.action == Action.NOTFOUND
or result.action == Action.ERROR
):
text = result.result
if text is not None:
- speak_and_play(conn, text)
+ speak_txt(conn, text)
elif function_name != "play_music":
# For backward compatibility with original code
# 获取当前最新的文本索引
@@ -131,7 +131,7 @@ def process_function_call():
if text is None:
text = result.result
if text is not None:
- speak_and_play(conn, text)
+ speak_txt(conn, text)
# 将函数执行放在线程池中
conn.executor.submit(process_function_call)
@@ -142,12 +142,12 @@ def process_function_call():
return False
-def speak_and_play(conn, text):
+def speak_txt(conn, text):
text_index = (
conn.tts_last_text_index + 1 if hasattr(conn, "tts_last_text_index") else 0
)
conn.recode_first_last_text(text, text_index)
- future = conn.executor.submit(conn.speak_and_play, text, text_index)
+ future = conn.executor.submit(conn.speak_and_play, None, text, text_index)
conn.llm_finish_task = True
conn.tts_queue.put((future, text_index))
conn.dialogue.put(Message(role="assistant", content=text))
diff --git a/main/xiaozhi-server/core/handle/receiveAudioHandle.py b/main/xiaozhi-server/core/handle/receiveAudioHandle.py
index 9c693751d..2584ab83f 100644
--- a/main/xiaozhi-server/core/handle/receiveAudioHandle.py
+++ b/main/xiaozhi-server/core/handle/receiveAudioHandle.py
@@ -39,7 +39,9 @@ async def handleAudioMessage(conn, audio):
if len(conn.asr_audio) < 15:
conn.asr_server_receive = True
else:
- raw_text, _ = await conn.asr.speech_to_text(conn.asr_audio, conn.session_id) # 确保ASR模块返回原始文本
+ raw_text, _ = await conn.asr.speech_to_text(
+ conn.asr_audio, conn.session_id
+ ) # 确保ASR模块返回原始文本
conn.logger.bind(tag=TAG).info(f"识别文本: {raw_text}")
text_len, _ = remove_punctuation_and_length(raw_text)
if text_len > 0:
@@ -76,11 +78,7 @@ async def startToChat(conn, text):
# 意图未被处理,继续常规聊天流程
await send_stt_message(conn, text)
- if conn.intent_type == "function_call":
- # 使用支持function calling的聊天方法
- conn.executor.submit(conn.chat_with_function_calling, text)
- else:
- conn.executor.submit(conn.chat, text)
+ conn.executor.submit(conn.chat, text)
async def no_voice_close_connect(conn):
diff --git a/main/xiaozhi-server/plugins_func/functions/play_music.py b/main/xiaozhi-server/plugins_func/functions/play_music.py
index 3de19d764..76465641e 100644
--- a/main/xiaozhi-server/plugins_func/functions/play_music.py
+++ b/main/xiaozhi-server/plugins_func/functions/play_music.py
@@ -216,24 +216,16 @@ async def play_local_music(conn, specific_file=None):
text = _get_random_play_prompt(selected_music)
await send_stt_message(conn, text)
conn.dialogue.put(Message(role="assistant", content=text))
- conn.tts_first_text_index = 0
- conn.tts_last_text_index = 0
- tts_file = await asyncio.to_thread(conn.tts.to_tts, text)
- if tts_file is not None and os.path.exists(tts_file):
- conn.tts_last_text_index = 1
- opus_packets, _ = conn.tts.audio_to_opus_data(tts_file)
- conn.audio_play_queue.put((opus_packets, None, 0))
- os.remove(tts_file)
+ conn.recode_first_last_text(text, 0)
+ future = conn.executor.submit(conn.speak_and_play, None, text, 0)
+ conn.tts_queue.put((future, 0))
+ conn.recode_first_last_text(text, 1)
+ future = conn.executor.submit(conn.speak_and_play, music_path, None, 1)
+ conn.tts_queue.put((future, 1))
conn.llm_finish_task = True
- if music_path.endswith(".p3"):
- opus_packets, _ = p3.decode_opus_from_file(music_path)
- else:
- opus_packets, _ = conn.tts.audio_to_opus_data(music_path)
- conn.audio_play_queue.put((opus_packets, None, conn.tts_last_text_index))
-
except Exception as e:
conn.logger.bind(tag=TAG).error(f"播放音乐失败: {str(e)}")
conn.logger.bind(tag=TAG).error(f"详细错误: {traceback.format_exc()}")