Skip to content

Commit 76ee2c5

Browse files
authored
Merge pull request #1366 from xinnan-tech/update-fix
合并chat和chat_with_function_calling
2 parents ede8676 + 4721063 commit 76ee2c5

File tree

6 files changed

+82
-167
lines changed

6 files changed

+82
-167
lines changed

main/manager-web/src/components/HeaderBar.vue

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151
字典管理
5252
</el-dropdown-item>
5353
<el-dropdown-item @click.native="goProviderManagement">
54-
供应器管理
54+
字段管理
5555
</el-dropdown-item>
5656
<el-dropdown-item @click.native="goServerSideManagement">
5757
服务端管理

main/manager-web/src/views/ProviderManagement.vue

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
<HeaderBar />
44

55
<div class="operation-bar">
6-
<h2 class="page-title">供应器管理</h2>
6+
<h2 class="page-title">字段管理</h2>
77
<div class="right-operations">
88
<el-dropdown trigger="click" @command="handleSelectModelType" @visible-change="handleDropdownVisibleChange">
99
<el-button class="category-btn">

main/xiaozhi-server/core/connection.py

Lines changed: 64 additions & 139 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
from typing import Dict, Any
1515
from plugins_func.loadplugins import auto_import_modules
1616
from config.logger import setup_logging
17+
from config.config_loader import get_project_dir
18+
from core.utils import p3
1719
from core.utils.dialogue import Message, Dialogue
1820
from core.handle.textHandle import handleTextMessage
1921
from core.utils.util import (
@@ -506,106 +508,20 @@ def change_system_prompt(self, prompt):
506508
# 更新系统prompt至上下文
507509
self.dialogue.update_system_message(self.prompt)
508510

509-
def chat(self, query):
510-
511-
self.dialogue.put(Message(role="user", content=query))
512-
513-
response_message = []
514-
processed_chars = 0 # 跟踪已处理的字符位置
515-
try:
516-
# 使用带记忆的对话
517-
memory_str = None
518-
if self.memory is not None:
519-
future = asyncio.run_coroutine_threadsafe(
520-
self.memory.query_memory(query), self.loop
521-
)
522-
memory_str = future.result()
523-
524-
self.logger.bind(tag=TAG).debug(f"记忆内容: {memory_str}")
525-
llm_responses = self.llm.response(
526-
self.session_id, self.dialogue.get_llm_dialogue_with_memory(memory_str)
527-
)
528-
except Exception as e:
529-
self.logger.bind(tag=TAG).error(f"LLM 处理出错 {query}: {e}")
530-
return None
531-
532-
self.llm_finish_task = False
533-
text_index = 0
534-
for content in llm_responses:
535-
response_message.append(content)
536-
if self.client_abort:
537-
break
538-
539-
# 合并当前全部文本并处理未分割部分
540-
full_text = "".join(response_message)
541-
current_text = full_text[processed_chars:] # 从未处理的位置开始
542-
543-
# 查找最后一个有效标点
544-
punctuations = ("。", ".", "?", "?", "!", "!", ";", ";", ":")
545-
last_punct_pos = -1
546-
number_flag = True
547-
for punct in punctuations:
548-
pos = current_text.rfind(punct)
549-
prev_char = current_text[pos - 1] if pos - 1 >= 0 else ""
550-
# 如果.前面是数字统一判断为小数
551-
if prev_char.isdigit() and punct == ".":
552-
number_flag = False
553-
if pos > last_punct_pos and number_flag:
554-
last_punct_pos = pos
555-
556-
# 找到分割点则处理
557-
if last_punct_pos != -1:
558-
segment_text_raw = current_text[: last_punct_pos + 1]
559-
segment_text = get_string_no_punctuation_or_emoji(segment_text_raw)
560-
if segment_text:
561-
# 强制设置空字符,测试TTS出错返回语音的健壮性
562-
# if text_index % 2 == 0:
563-
# segment_text = " "
564-
text_index += 1
565-
self.recode_first_last_text(segment_text, text_index)
566-
future = self.executor.submit(
567-
self.speak_and_play, segment_text, text_index
568-
)
569-
self.tts_queue.put((future, text_index))
570-
processed_chars += len(segment_text_raw) # 更新已处理字符位置
571-
572-
# 处理最后剩余的文本
573-
full_text = "".join(response_message)
574-
remaining_text = full_text[processed_chars:]
575-
if remaining_text:
576-
segment_text = get_string_no_punctuation_or_emoji(remaining_text)
577-
if segment_text:
578-
text_index += 1
579-
self.recode_first_last_text(segment_text, text_index)
580-
future = self.executor.submit(
581-
self.speak_and_play, segment_text, text_index
582-
)
583-
self.tts_queue.put((future, text_index))
584-
585-
self.llm_finish_task = True
586-
self.dialogue.put(Message(role="assistant", content="".join(response_message)))
587-
self.logger.bind(tag=TAG).debug(
588-
json.dumps(self.dialogue.get_llm_dialogue(), indent=4, ensure_ascii=False)
589-
)
590-
return True
591-
592-
def chat_with_function_calling(self, query, tool_call=False):
593-
self.logger.bind(tag=TAG).debug(f"Chat with function calling start: {query}")
594-
"""Chat with function calling for intent detection using streaming"""
511+
def chat(self, query, tool_call=False):
512+
self.logger.bind(tag=TAG).debug(f"Chat: {query}")
595513

596514
if not tool_call:
597515
self.dialogue.put(Message(role="user", content=query))
598516

599517
# Define intent functions
600518
functions = None
601-
if hasattr(self, "func_handler"):
519+
if self.intent_type == "function_call" and hasattr(self, "func_handler"):
602520
functions = self.func_handler.get_functions()
603521
response_message = []
604522
processed_chars = 0 # 跟踪已处理的字符位置
605523

606524
try:
607-
start_time = time.time()
608-
609525
# 使用带记忆的对话
610526
memory_str = None
611527
if self.memory is not None:
@@ -614,14 +530,18 @@ def chat_with_function_calling(self, query, tool_call=False):
614530
)
615531
memory_str = future.result()
616532

617-
# self.logger.bind(tag=TAG).info(f"对话记录: {self.dialogue.get_llm_dialogue_with_memory(memory_str)}")
618-
619-
# 使用支持functions的streaming接口
620-
llm_responses = self.llm.response_with_functions(
621-
self.session_id,
622-
self.dialogue.get_llm_dialogue_with_memory(memory_str),
623-
functions=functions,
624-
)
533+
if functions is not None:
534+
# 使用支持functions的streaming接口
535+
llm_responses = self.llm.response_with_functions(
536+
self.session_id,
537+
self.dialogue.get_llm_dialogue_with_memory(memory_str),
538+
functions=functions,
539+
)
540+
else:
541+
llm_responses = self.llm.response(
542+
self.session_id,
543+
self.dialogue.get_llm_dialogue_with_memory(memory_str),
544+
)
625545
except Exception as e:
626546
self.logger.bind(tag=TAG).error(f"LLM 处理出错 {query}: {e}")
627547
return None
@@ -637,27 +557,28 @@ def chat_with_function_calling(self, query, tool_call=False):
637557
content_arguments = ""
638558

639559
for response in llm_responses:
640-
content, tools_call = response
641-
642-
if "content" in response:
643-
content = response["content"]
644-
tools_call = None
645-
if content is not None and len(content) > 0:
646-
content_arguments += content
647-
648-
if not tool_call_flag and content_arguments.startswith("<tool_call>"):
649-
# print("content_arguments", content_arguments)
650-
tool_call_flag = True
651-
652-
if tools_call is not None:
653-
tool_call_flag = True
654-
if tools_call[0].id is not None:
655-
function_id = tools_call[0].id
656-
if tools_call[0].function.name is not None:
657-
function_name = tools_call[0].function.name
658-
if tools_call[0].function.arguments is not None:
659-
function_arguments += tools_call[0].function.arguments
660-
560+
if self.intent_type == "function_call":
561+
content, tools_call = response
562+
if "content" in response:
563+
content = response["content"]
564+
tools_call = None
565+
if content is not None and len(content) > 0:
566+
content_arguments += content
567+
568+
if not tool_call_flag and content_arguments.startswith("<tool_call>"):
569+
# print("content_arguments", content_arguments)
570+
tool_call_flag = True
571+
572+
if tools_call is not None:
573+
tool_call_flag = True
574+
if tools_call[0].id is not None:
575+
function_id = tools_call[0].id
576+
if tools_call[0].function.name is not None:
577+
function_name = tools_call[0].function.name
578+
if tools_call[0].function.arguments is not None:
579+
function_arguments += tools_call[0].function.arguments
580+
else:
581+
content = response
661582
if content is not None and len(content) > 0:
662583
if not tool_call_flag:
663584
response_message.append(content)
@@ -696,7 +617,7 @@ def chat_with_function_calling(self, query, tool_call=False):
696617
text_index += 1
697618
self.recode_first_last_text(segment_text, text_index)
698619
future = self.executor.submit(
699-
self.speak_and_play, segment_text, text_index
620+
self.speak_and_play, None, segment_text, text_index
700621
)
701622
self.tts_queue.put((future, text_index))
702623
# 更新已处理字符位置
@@ -755,7 +676,7 @@ def chat_with_function_calling(self, query, tool_call=False):
755676
text_index += 1
756677
self.recode_first_last_text(segment_text, text_index)
757678
future = self.executor.submit(
758-
self.speak_and_play, segment_text, text_index
679+
self.speak_and_play, None, segment_text, text_index
759680
)
760681
self.tts_queue.put((future, text_index))
761682

@@ -818,7 +739,7 @@ def _handle_function_result(self, result, function_call_data, text_index):
818739
if result.action == Action.RESPONSE: # 直接回复前端
819740
text = result.response
820741
self.recode_first_last_text(text, text_index)
821-
future = self.executor.submit(self.speak_and_play, text, text_index)
742+
future = self.executor.submit(self.speak_and_play, None, text, text_index)
822743
self.tts_queue.put((future, text_index))
823744
self.dialogue.put(Message(role="assistant", content=text))
824745
elif result.action == Action.REQLLM: # 调用函数后再请求llm生成回复
@@ -853,11 +774,11 @@ def _handle_function_result(self, result, function_call_data, text_index):
853774
content=text,
854775
)
855776
)
856-
self.chat_with_function_calling(text, tool_call=True)
777+
self.chat(text, tool_call=True)
857778
elif result.action == Action.NOTFOUND or result.action == Action.ERROR:
858779
text = result.result
859780
self.recode_first_last_text(text, text_index)
860-
future = self.executor.submit(self.speak_and_play, text, text_index)
781+
future = self.executor.submit(self.speak_and_play, None, text, text_index)
861782
self.tts_queue.put((future, text_index))
862783
self.dialogue.put(Message(role="assistant", content=text))
863784
else:
@@ -884,11 +805,7 @@ def _tts_priority_thread(self):
884805
self.logger.bind(tag=TAG).debug("正在处理TTS任务...")
885806
tts_timeout = int(self.config.get("tts_timeout", 10))
886807
tts_file, text, _ = future.result(timeout=tts_timeout)
887-
if text is None or len(text) <= 0:
888-
self.logger.bind(tag=TAG).error(
889-
f"TTS出错:{text_index}: tts text is empty"
890-
)
891-
elif tts_file is None:
808+
if tts_file is None:
892809
self.logger.bind(tag=TAG).error(
893810
f"TTS出错: file is empty: {text_index}: {text}"
894811
)
@@ -897,12 +814,16 @@ def _tts_priority_thread(self):
897814
f"TTS生成:文件路径: {tts_file}"
898815
)
899816
if os.path.exists(tts_file):
900-
if self.audio_format == "pcm":
817+
if tts_file.endswith(".p3"):
818+
audio_datas, _ = p3.decode_opus_from_file(tts_file)
819+
elif self.audio_format == "pcm":
901820
audio_datas, _ = self.tts.audio_to_pcm_data(tts_file)
902821
else:
903822
audio_datas, _ = self.tts.audio_to_opus_data(tts_file)
904823
# 在这里上报TTS数据
905-
enqueue_tts_report(self, text, audio_datas)
824+
enqueue_tts_report(
825+
self, tts_file if text is None else text, audio_datas
826+
)
906827
else:
907828
self.logger.bind(tag=TAG).error(
908829
f"TTS出错:文件不存在{tts_file}"
@@ -918,6 +839,7 @@ def _tts_priority_thread(self):
918839
self.tts.delete_audio_file
919840
and tts_file is not None
920841
and os.path.exists(tts_file)
842+
and tts_file.startswith(self.tts.output_file)
921843
):
922844
os.remove(tts_file)
923845
except Exception as e:
@@ -984,18 +906,21 @@ def _report_worker(self):
984906

985907
self.logger.bind(tag=TAG).info("聊天记录上报线程已退出")
986908

987-
def speak_and_play(self, text, text_index=0):
988-
if text is None or len(text) <= 0:
989-
self.logger.bind(tag=TAG).info(f"无需tts转换,query为空,{text}")
990-
return None, text, text_index
991-
tts_file = self.tts.to_tts(text)
909+
def speak_and_play(self, file_path, content, text_index=0):
910+
if file_path is not None:
911+
self.logger.bind(tag=TAG).info(f"无需tts转换: 从文件播放,{file_path}")
912+
return file_path, content, text_index
913+
if content is None or len(content) <= 0:
914+
self.logger.bind(tag=TAG).info(f"无需tts转换,query为空,{content}")
915+
return None, content, text_index
916+
tts_file = self.tts.to_tts(content)
992917
if tts_file is None:
993-
self.logger.bind(tag=TAG).error(f"tts转换失败,{text}")
994-
return None, text, text_index
918+
self.logger.bind(tag=TAG).error(f"tts转换失败,{content}")
919+
return None, content, text_index
995920
self.logger.bind(tag=TAG).debug(f"TTS 文件生成完毕: {tts_file}")
996921
if self.max_output_size > 0:
997-
add_device_output(self.headers.get("device-id"), len(text))
998-
return tts_file, text, text_index
922+
add_device_output(self.headers.get("device-id"), len(content))
923+
return tts_file, content, text_index
999924

1000925
def clearSpeakStatus(self):
1001926
self.logger.bind(tag=TAG).debug(f"清除服务端讲话状态")

main/xiaozhi-server/core/handle/intentHandler.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -109,29 +109,29 @@ def process_function_call():
109109
if result.action == Action.RESPONSE: # 直接回复前端
110110
text = result.response
111111
if text is not None:
112-
speak_and_play(conn, text)
112+
speak_txt(conn, text)
113113
elif result.action == Action.REQLLM: # 调用函数后再请求llm生成回复
114114
text = result.result
115115
conn.dialogue.put(Message(role="tool", content=text))
116116
llm_result = conn.intent.replyResult(text, original_text)
117117
if llm_result is None:
118118
llm_result = text
119-
speak_and_play(conn, llm_result)
119+
speak_txt(conn, llm_result)
120120
elif (
121121
result.action == Action.NOTFOUND
122122
or result.action == Action.ERROR
123123
):
124124
text = result.result
125125
if text is not None:
126-
speak_and_play(conn, text)
126+
speak_txt(conn, text)
127127
elif function_name != "play_music":
128128
# For backward compatibility with original code
129129
# 获取当前最新的文本索引
130130
text = result.response
131131
if text is None:
132132
text = result.result
133133
if text is not None:
134-
speak_and_play(conn, text)
134+
speak_txt(conn, text)
135135

136136
# 将函数执行放在线程池中
137137
conn.executor.submit(process_function_call)
@@ -142,12 +142,12 @@ def process_function_call():
142142
return False
143143

144144

145-
def speak_and_play(conn, text):
145+
def speak_txt(conn, text):
146146
text_index = (
147147
conn.tts_last_text_index + 1 if hasattr(conn, "tts_last_text_index") else 0
148148
)
149149
conn.recode_first_last_text(text, text_index)
150-
future = conn.executor.submit(conn.speak_and_play, text, text_index)
150+
future = conn.executor.submit(conn.speak_and_play, None, text, text_index)
151151
conn.llm_finish_task = True
152152
conn.tts_queue.put((future, text_index))
153153
conn.dialogue.put(Message(role="assistant", content=text))

main/xiaozhi-server/core/handle/receiveAudioHandle.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,9 @@ async def handleAudioMessage(conn, audio):
3939
if len(conn.asr_audio) < 15:
4040
conn.asr_server_receive = True
4141
else:
42-
raw_text, _ = await conn.asr.speech_to_text(conn.asr_audio, conn.session_id) # 确保ASR模块返回原始文本
42+
raw_text, _ = await conn.asr.speech_to_text(
43+
conn.asr_audio, conn.session_id
44+
) # 确保ASR模块返回原始文本
4345
conn.logger.bind(tag=TAG).info(f"识别文本: {raw_text}")
4446
text_len, _ = remove_punctuation_and_length(raw_text)
4547
if text_len > 0:
@@ -76,11 +78,7 @@ async def startToChat(conn, text):
7678

7779
# 意图未被处理,继续常规聊天流程
7880
await send_stt_message(conn, text)
79-
if conn.intent_type == "function_call":
80-
# 使用支持function calling的聊天方法
81-
conn.executor.submit(conn.chat_with_function_calling, text)
82-
else:
83-
conn.executor.submit(conn.chat, text)
81+
conn.executor.submit(conn.chat, text)
8482

8583

8684
async def no_voice_close_connect(conn):

0 commit comments

Comments
 (0)