diff --git a/main/manager-web/src/components/HeaderBar.vue b/main/manager-web/src/components/HeaderBar.vue index 70b64f610..d0a36acb5 100644 --- a/main/manager-web/src/components/HeaderBar.vue +++ b/main/manager-web/src/components/HeaderBar.vue @@ -51,7 +51,7 @@ 字典管理 - 供应器管理 + 字段管理 服务端管理 diff --git a/main/manager-web/src/views/ProviderManagement.vue b/main/manager-web/src/views/ProviderManagement.vue index 7634b511d..5af816f6d 100644 --- a/main/manager-web/src/views/ProviderManagement.vue +++ b/main/manager-web/src/views/ProviderManagement.vue @@ -3,7 +3,7 @@
-

供应器管理

+

字段管理

diff --git a/main/xiaozhi-server/core/connection.py b/main/xiaozhi-server/core/connection.py index 74b612fe9..ea0be02d3 100644 --- a/main/xiaozhi-server/core/connection.py +++ b/main/xiaozhi-server/core/connection.py @@ -14,6 +14,8 @@ from typing import Dict, Any from plugins_func.loadplugins import auto_import_modules from config.logger import setup_logging +from config.config_loader import get_project_dir +from core.utils import p3 from core.utils.dialogue import Message, Dialogue from core.handle.textHandle import handleTextMessage from core.utils.util import ( @@ -506,106 +508,20 @@ def change_system_prompt(self, prompt): # 更新系统prompt至上下文 self.dialogue.update_system_message(self.prompt) - def chat(self, query): - - self.dialogue.put(Message(role="user", content=query)) - - response_message = [] - processed_chars = 0 # 跟踪已处理的字符位置 - try: - # 使用带记忆的对话 - memory_str = None - if self.memory is not None: - future = asyncio.run_coroutine_threadsafe( - self.memory.query_memory(query), self.loop - ) - memory_str = future.result() - - self.logger.bind(tag=TAG).debug(f"记忆内容: {memory_str}") - llm_responses = self.llm.response( - self.session_id, self.dialogue.get_llm_dialogue_with_memory(memory_str) - ) - except Exception as e: - self.logger.bind(tag=TAG).error(f"LLM 处理出错 {query}: {e}") - return None - - self.llm_finish_task = False - text_index = 0 - for content in llm_responses: - response_message.append(content) - if self.client_abort: - break - - # 合并当前全部文本并处理未分割部分 - full_text = "".join(response_message) - current_text = full_text[processed_chars:] # 从未处理的位置开始 - - # 查找最后一个有效标点 - punctuations = ("。", ".", "?", "?", "!", "!", ";", ";", ":") - last_punct_pos = -1 - number_flag = True - for punct in punctuations: - pos = current_text.rfind(punct) - prev_char = current_text[pos - 1] if pos - 1 >= 0 else "" - # 如果.前面是数字统一判断为小数 - if prev_char.isdigit() and punct == ".": - number_flag = False - if pos > last_punct_pos and number_flag: - last_punct_pos = pos - - # 找到分割点则处理 - if last_punct_pos != -1: - segment_text_raw = current_text[: last_punct_pos + 1] - segment_text = get_string_no_punctuation_or_emoji(segment_text_raw) - if segment_text: - # 强制设置空字符,测试TTS出错返回语音的健壮性 - # if text_index % 2 == 0: - # segment_text = " " - text_index += 1 - self.recode_first_last_text(segment_text, text_index) - future = self.executor.submit( - self.speak_and_play, segment_text, text_index - ) - self.tts_queue.put((future, text_index)) - processed_chars += len(segment_text_raw) # 更新已处理字符位置 - - # 处理最后剩余的文本 - full_text = "".join(response_message) - remaining_text = full_text[processed_chars:] - if remaining_text: - segment_text = get_string_no_punctuation_or_emoji(remaining_text) - if segment_text: - text_index += 1 - self.recode_first_last_text(segment_text, text_index) - future = self.executor.submit( - self.speak_and_play, segment_text, text_index - ) - self.tts_queue.put((future, text_index)) - - self.llm_finish_task = True - self.dialogue.put(Message(role="assistant", content="".join(response_message))) - self.logger.bind(tag=TAG).debug( - json.dumps(self.dialogue.get_llm_dialogue(), indent=4, ensure_ascii=False) - ) - return True - - def chat_with_function_calling(self, query, tool_call=False): - self.logger.bind(tag=TAG).debug(f"Chat with function calling start: {query}") - """Chat with function calling for intent detection using streaming""" + def chat(self, query, tool_call=False): + self.logger.bind(tag=TAG).debug(f"Chat: {query}") if not tool_call: self.dialogue.put(Message(role="user", content=query)) # Define intent functions functions = None - if hasattr(self, "func_handler"): + if self.intent_type == "function_call" and hasattr(self, "func_handler"): functions = self.func_handler.get_functions() response_message = [] processed_chars = 0 # 跟踪已处理的字符位置 try: - start_time = time.time() - # 使用带记忆的对话 memory_str = None if self.memory is not None: @@ -614,14 +530,18 @@ def chat_with_function_calling(self, query, tool_call=False): ) memory_str = future.result() - # self.logger.bind(tag=TAG).info(f"对话记录: {self.dialogue.get_llm_dialogue_with_memory(memory_str)}") - - # 使用支持functions的streaming接口 - llm_responses = self.llm.response_with_functions( - self.session_id, - self.dialogue.get_llm_dialogue_with_memory(memory_str), - functions=functions, - ) + if functions is not None: + # 使用支持functions的streaming接口 + llm_responses = self.llm.response_with_functions( + self.session_id, + self.dialogue.get_llm_dialogue_with_memory(memory_str), + functions=functions, + ) + else: + llm_responses = self.llm.response( + self.session_id, + self.dialogue.get_llm_dialogue_with_memory(memory_str), + ) except Exception as e: self.logger.bind(tag=TAG).error(f"LLM 处理出错 {query}: {e}") return None @@ -637,27 +557,28 @@ def chat_with_function_calling(self, query, tool_call=False): content_arguments = "" for response in llm_responses: - content, tools_call = response - - if "content" in response: - content = response["content"] - tools_call = None - if content is not None and len(content) > 0: - content_arguments += content - - if not tool_call_flag and content_arguments.startswith(""): - # print("content_arguments", content_arguments) - tool_call_flag = True - - if tools_call is not None: - tool_call_flag = True - if tools_call[0].id is not None: - function_id = tools_call[0].id - if tools_call[0].function.name is not None: - function_name = tools_call[0].function.name - if tools_call[0].function.arguments is not None: - function_arguments += tools_call[0].function.arguments - + if self.intent_type == "function_call": + content, tools_call = response + if "content" in response: + content = response["content"] + tools_call = None + if content is not None and len(content) > 0: + content_arguments += content + + if not tool_call_flag and content_arguments.startswith(""): + # print("content_arguments", content_arguments) + tool_call_flag = True + + if tools_call is not None: + tool_call_flag = True + if tools_call[0].id is not None: + function_id = tools_call[0].id + if tools_call[0].function.name is not None: + function_name = tools_call[0].function.name + if tools_call[0].function.arguments is not None: + function_arguments += tools_call[0].function.arguments + else: + content = response if content is not None and len(content) > 0: if not tool_call_flag: response_message.append(content) @@ -696,7 +617,7 @@ def chat_with_function_calling(self, query, tool_call=False): text_index += 1 self.recode_first_last_text(segment_text, text_index) future = self.executor.submit( - self.speak_and_play, segment_text, text_index + self.speak_and_play, None, segment_text, text_index ) self.tts_queue.put((future, text_index)) # 更新已处理字符位置 @@ -755,7 +676,7 @@ def chat_with_function_calling(self, query, tool_call=False): text_index += 1 self.recode_first_last_text(segment_text, text_index) future = self.executor.submit( - self.speak_and_play, segment_text, text_index + self.speak_and_play, None, segment_text, text_index ) self.tts_queue.put((future, text_index)) @@ -818,7 +739,7 @@ def _handle_function_result(self, result, function_call_data, text_index): if result.action == Action.RESPONSE: # 直接回复前端 text = result.response self.recode_first_last_text(text, text_index) - future = self.executor.submit(self.speak_and_play, text, text_index) + future = self.executor.submit(self.speak_and_play, None, text, text_index) self.tts_queue.put((future, text_index)) self.dialogue.put(Message(role="assistant", content=text)) elif result.action == Action.REQLLM: # 调用函数后再请求llm生成回复 @@ -853,11 +774,11 @@ def _handle_function_result(self, result, function_call_data, text_index): content=text, ) ) - self.chat_with_function_calling(text, tool_call=True) + self.chat(text, tool_call=True) elif result.action == Action.NOTFOUND or result.action == Action.ERROR: text = result.result self.recode_first_last_text(text, text_index) - future = self.executor.submit(self.speak_and_play, text, text_index) + future = self.executor.submit(self.speak_and_play, None, text, text_index) self.tts_queue.put((future, text_index)) self.dialogue.put(Message(role="assistant", content=text)) else: @@ -884,11 +805,7 @@ def _tts_priority_thread(self): self.logger.bind(tag=TAG).debug("正在处理TTS任务...") tts_timeout = int(self.config.get("tts_timeout", 10)) tts_file, text, _ = future.result(timeout=tts_timeout) - if text is None or len(text) <= 0: - self.logger.bind(tag=TAG).error( - f"TTS出错:{text_index}: tts text is empty" - ) - elif tts_file is None: + if tts_file is None: self.logger.bind(tag=TAG).error( f"TTS出错: file is empty: {text_index}: {text}" ) @@ -897,12 +814,16 @@ def _tts_priority_thread(self): f"TTS生成:文件路径: {tts_file}" ) if os.path.exists(tts_file): - if self.audio_format == "pcm": + if tts_file.endswith(".p3"): + audio_datas, _ = p3.decode_opus_from_file(tts_file) + elif self.audio_format == "pcm": audio_datas, _ = self.tts.audio_to_pcm_data(tts_file) else: audio_datas, _ = self.tts.audio_to_opus_data(tts_file) # 在这里上报TTS数据 - enqueue_tts_report(self, text, audio_datas) + enqueue_tts_report( + self, tts_file if text is None else text, audio_datas + ) else: self.logger.bind(tag=TAG).error( f"TTS出错:文件不存在{tts_file}" @@ -918,6 +839,7 @@ def _tts_priority_thread(self): self.tts.delete_audio_file and tts_file is not None and os.path.exists(tts_file) + and tts_file.startswith(self.tts.output_file) ): os.remove(tts_file) except Exception as e: @@ -984,18 +906,21 @@ def _report_worker(self): self.logger.bind(tag=TAG).info("聊天记录上报线程已退出") - def speak_and_play(self, text, text_index=0): - if text is None or len(text) <= 0: - self.logger.bind(tag=TAG).info(f"无需tts转换,query为空,{text}") - return None, text, text_index - tts_file = self.tts.to_tts(text) + def speak_and_play(self, file_path, content, text_index=0): + if file_path is not None: + self.logger.bind(tag=TAG).info(f"无需tts转换: 从文件播放,{file_path}") + return file_path, content, text_index + if content is None or len(content) <= 0: + self.logger.bind(tag=TAG).info(f"无需tts转换,query为空,{content}") + return None, content, text_index + tts_file = self.tts.to_tts(content) if tts_file is None: - self.logger.bind(tag=TAG).error(f"tts转换失败,{text}") - return None, text, text_index + self.logger.bind(tag=TAG).error(f"tts转换失败,{content}") + return None, content, text_index self.logger.bind(tag=TAG).debug(f"TTS 文件生成完毕: {tts_file}") if self.max_output_size > 0: - add_device_output(self.headers.get("device-id"), len(text)) - return tts_file, text, text_index + add_device_output(self.headers.get("device-id"), len(content)) + return tts_file, content, text_index def clearSpeakStatus(self): self.logger.bind(tag=TAG).debug(f"清除服务端讲话状态") diff --git a/main/xiaozhi-server/core/handle/intentHandler.py b/main/xiaozhi-server/core/handle/intentHandler.py index 495da96f5..e61b1265c 100644 --- a/main/xiaozhi-server/core/handle/intentHandler.py +++ b/main/xiaozhi-server/core/handle/intentHandler.py @@ -109,21 +109,21 @@ def process_function_call(): if result.action == Action.RESPONSE: # 直接回复前端 text = result.response if text is not None: - speak_and_play(conn, text) + speak_txt(conn, text) elif result.action == Action.REQLLM: # 调用函数后再请求llm生成回复 text = result.result conn.dialogue.put(Message(role="tool", content=text)) llm_result = conn.intent.replyResult(text, original_text) if llm_result is None: llm_result = text - speak_and_play(conn, llm_result) + speak_txt(conn, llm_result) elif ( result.action == Action.NOTFOUND or result.action == Action.ERROR ): text = result.result if text is not None: - speak_and_play(conn, text) + speak_txt(conn, text) elif function_name != "play_music": # For backward compatibility with original code # 获取当前最新的文本索引 @@ -131,7 +131,7 @@ def process_function_call(): if text is None: text = result.result if text is not None: - speak_and_play(conn, text) + speak_txt(conn, text) # 将函数执行放在线程池中 conn.executor.submit(process_function_call) @@ -142,12 +142,12 @@ def process_function_call(): return False -def speak_and_play(conn, text): +def speak_txt(conn, text): text_index = ( conn.tts_last_text_index + 1 if hasattr(conn, "tts_last_text_index") else 0 ) conn.recode_first_last_text(text, text_index) - future = conn.executor.submit(conn.speak_and_play, text, text_index) + future = conn.executor.submit(conn.speak_and_play, None, text, text_index) conn.llm_finish_task = True conn.tts_queue.put((future, text_index)) conn.dialogue.put(Message(role="assistant", content=text)) diff --git a/main/xiaozhi-server/core/handle/receiveAudioHandle.py b/main/xiaozhi-server/core/handle/receiveAudioHandle.py index 9c693751d..2584ab83f 100644 --- a/main/xiaozhi-server/core/handle/receiveAudioHandle.py +++ b/main/xiaozhi-server/core/handle/receiveAudioHandle.py @@ -39,7 +39,9 @@ async def handleAudioMessage(conn, audio): if len(conn.asr_audio) < 15: conn.asr_server_receive = True else: - raw_text, _ = await conn.asr.speech_to_text(conn.asr_audio, conn.session_id) # 确保ASR模块返回原始文本 + raw_text, _ = await conn.asr.speech_to_text( + conn.asr_audio, conn.session_id + ) # 确保ASR模块返回原始文本 conn.logger.bind(tag=TAG).info(f"识别文本: {raw_text}") text_len, _ = remove_punctuation_and_length(raw_text) if text_len > 0: @@ -76,11 +78,7 @@ async def startToChat(conn, text): # 意图未被处理,继续常规聊天流程 await send_stt_message(conn, text) - if conn.intent_type == "function_call": - # 使用支持function calling的聊天方法 - conn.executor.submit(conn.chat_with_function_calling, text) - else: - conn.executor.submit(conn.chat, text) + conn.executor.submit(conn.chat, text) async def no_voice_close_connect(conn): diff --git a/main/xiaozhi-server/plugins_func/functions/play_music.py b/main/xiaozhi-server/plugins_func/functions/play_music.py index 3de19d764..76465641e 100644 --- a/main/xiaozhi-server/plugins_func/functions/play_music.py +++ b/main/xiaozhi-server/plugins_func/functions/play_music.py @@ -216,24 +216,16 @@ async def play_local_music(conn, specific_file=None): text = _get_random_play_prompt(selected_music) await send_stt_message(conn, text) conn.dialogue.put(Message(role="assistant", content=text)) - conn.tts_first_text_index = 0 - conn.tts_last_text_index = 0 - tts_file = await asyncio.to_thread(conn.tts.to_tts, text) - if tts_file is not None and os.path.exists(tts_file): - conn.tts_last_text_index = 1 - opus_packets, _ = conn.tts.audio_to_opus_data(tts_file) - conn.audio_play_queue.put((opus_packets, None, 0)) - os.remove(tts_file) + conn.recode_first_last_text(text, 0) + future = conn.executor.submit(conn.speak_and_play, None, text, 0) + conn.tts_queue.put((future, 0)) + conn.recode_first_last_text(text, 1) + future = conn.executor.submit(conn.speak_and_play, music_path, None, 1) + conn.tts_queue.put((future, 1)) conn.llm_finish_task = True - if music_path.endswith(".p3"): - opus_packets, _ = p3.decode_opus_from_file(music_path) - else: - opus_packets, _ = conn.tts.audio_to_opus_data(music_path) - conn.audio_play_queue.put((opus_packets, None, conn.tts_last_text_index)) - except Exception as e: conn.logger.bind(tag=TAG).error(f"播放音乐失败: {str(e)}") conn.logger.bind(tag=TAG).error(f"详细错误: {traceback.format_exc()}")