Skip to content

Commit 4721063

Browse files
committed
update:优化音频播放方法
1 parent d97f8b2 commit 4721063

File tree

3 files changed

+38
-40
lines changed

3 files changed

+38
-40
lines changed

main/xiaozhi-server/core/connection.py

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
from typing import Dict, Any
1515
from plugins_func.loadplugins import auto_import_modules
1616
from config.logger import setup_logging
17+
from config.config_loader import get_project_dir
18+
from core.utils import p3
1719
from core.utils.dialogue import Message, Dialogue
1820
from core.handle.textHandle import handleTextMessage
1921
from core.utils.util import (
@@ -615,7 +617,7 @@ def chat(self, query, tool_call=False):
615617
text_index += 1
616618
self.recode_first_last_text(segment_text, text_index)
617619
future = self.executor.submit(
618-
self.speak_and_play, segment_text, text_index
620+
self.speak_and_play, None, segment_text, text_index
619621
)
620622
self.tts_queue.put((future, text_index))
621623
# 更新已处理字符位置
@@ -674,7 +676,7 @@ def chat(self, query, tool_call=False):
674676
text_index += 1
675677
self.recode_first_last_text(segment_text, text_index)
676678
future = self.executor.submit(
677-
self.speak_and_play, segment_text, text_index
679+
self.speak_and_play, None, segment_text, text_index
678680
)
679681
self.tts_queue.put((future, text_index))
680682

@@ -737,7 +739,7 @@ def _handle_function_result(self, result, function_call_data, text_index):
737739
if result.action == Action.RESPONSE: # 直接回复前端
738740
text = result.response
739741
self.recode_first_last_text(text, text_index)
740-
future = self.executor.submit(self.speak_and_play, text, text_index)
742+
future = self.executor.submit(self.speak_and_play, None, text, text_index)
741743
self.tts_queue.put((future, text_index))
742744
self.dialogue.put(Message(role="assistant", content=text))
743745
elif result.action == Action.REQLLM: # 调用函数后再请求llm生成回复
@@ -776,7 +778,7 @@ def _handle_function_result(self, result, function_call_data, text_index):
776778
elif result.action == Action.NOTFOUND or result.action == Action.ERROR:
777779
text = result.result
778780
self.recode_first_last_text(text, text_index)
779-
future = self.executor.submit(self.speak_and_play, text, text_index)
781+
future = self.executor.submit(self.speak_and_play, None, text, text_index)
780782
self.tts_queue.put((future, text_index))
781783
self.dialogue.put(Message(role="assistant", content=text))
782784
else:
@@ -803,11 +805,7 @@ def _tts_priority_thread(self):
803805
self.logger.bind(tag=TAG).debug("正在处理TTS任务...")
804806
tts_timeout = int(self.config.get("tts_timeout", 10))
805807
tts_file, text, _ = future.result(timeout=tts_timeout)
806-
if text is None or len(text) <= 0:
807-
self.logger.bind(tag=TAG).error(
808-
f"TTS出错:{text_index}: tts text is empty"
809-
)
810-
elif tts_file is None:
808+
if tts_file is None:
811809
self.logger.bind(tag=TAG).error(
812810
f"TTS出错: file is empty: {text_index}: {text}"
813811
)
@@ -816,12 +814,16 @@ def _tts_priority_thread(self):
816814
f"TTS生成:文件路径: {tts_file}"
817815
)
818816
if os.path.exists(tts_file):
819-
if self.audio_format == "pcm":
817+
if tts_file.endswith(".p3"):
818+
audio_datas, _ = p3.decode_opus_from_file(tts_file)
819+
elif self.audio_format == "pcm":
820820
audio_datas, _ = self.tts.audio_to_pcm_data(tts_file)
821821
else:
822822
audio_datas, _ = self.tts.audio_to_opus_data(tts_file)
823823
# 在这里上报TTS数据
824-
enqueue_tts_report(self, text, audio_datas)
824+
enqueue_tts_report(
825+
self, tts_file if text is None else text, audio_datas
826+
)
825827
else:
826828
self.logger.bind(tag=TAG).error(
827829
f"TTS出错:文件不存在{tts_file}"
@@ -837,6 +839,7 @@ def _tts_priority_thread(self):
837839
self.tts.delete_audio_file
838840
and tts_file is not None
839841
and os.path.exists(tts_file)
842+
and tts_file.startswith(self.tts.output_file)
840843
):
841844
os.remove(tts_file)
842845
except Exception as e:
@@ -903,18 +906,21 @@ def _report_worker(self):
903906

904907
self.logger.bind(tag=TAG).info("聊天记录上报线程已退出")
905908

906-
def speak_and_play(self, text, text_index=0):
907-
if text is None or len(text) <= 0:
908-
self.logger.bind(tag=TAG).info(f"无需tts转换,query为空,{text}")
909-
return None, text, text_index
910-
tts_file = self.tts.to_tts(text)
909+
def speak_and_play(self, file_path, content, text_index=0):
910+
if file_path is not None:
911+
self.logger.bind(tag=TAG).info(f"无需tts转换: 从文件播放,{file_path}")
912+
return file_path, content, text_index
913+
if content is None or len(content) <= 0:
914+
self.logger.bind(tag=TAG).info(f"无需tts转换,query为空,{content}")
915+
return None, content, text_index
916+
tts_file = self.tts.to_tts(content)
911917
if tts_file is None:
912-
self.logger.bind(tag=TAG).error(f"tts转换失败,{text}")
913-
return None, text, text_index
918+
self.logger.bind(tag=TAG).error(f"tts转换失败,{content}")
919+
return None, content, text_index
914920
self.logger.bind(tag=TAG).debug(f"TTS 文件生成完毕: {tts_file}")
915921
if self.max_output_size > 0:
916-
add_device_output(self.headers.get("device-id"), len(text))
917-
return tts_file, text, text_index
922+
add_device_output(self.headers.get("device-id"), len(content))
923+
return tts_file, content, text_index
918924

919925
def clearSpeakStatus(self):
920926
self.logger.bind(tag=TAG).debug(f"清除服务端讲话状态")

main/xiaozhi-server/core/handle/intentHandler.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -109,29 +109,29 @@ def process_function_call():
109109
if result.action == Action.RESPONSE: # 直接回复前端
110110
text = result.response
111111
if text is not None:
112-
speak_and_play(conn, text)
112+
speak_txt(conn, text)
113113
elif result.action == Action.REQLLM: # 调用函数后再请求llm生成回复
114114
text = result.result
115115
conn.dialogue.put(Message(role="tool", content=text))
116116
llm_result = conn.intent.replyResult(text, original_text)
117117
if llm_result is None:
118118
llm_result = text
119-
speak_and_play(conn, llm_result)
119+
speak_txt(conn, llm_result)
120120
elif (
121121
result.action == Action.NOTFOUND
122122
or result.action == Action.ERROR
123123
):
124124
text = result.result
125125
if text is not None:
126-
speak_and_play(conn, text)
126+
speak_txt(conn, text)
127127
elif function_name != "play_music":
128128
# For backward compatibility with original code
129129
# 获取当前最新的文本索引
130130
text = result.response
131131
if text is None:
132132
text = result.result
133133
if text is not None:
134-
speak_and_play(conn, text)
134+
speak_txt(conn, text)
135135

136136
# 将函数执行放在线程池中
137137
conn.executor.submit(process_function_call)
@@ -142,12 +142,12 @@ def process_function_call():
142142
return False
143143

144144

145-
def speak_and_play(conn, text):
145+
def speak_txt(conn, text):
146146
text_index = (
147147
conn.tts_last_text_index + 1 if hasattr(conn, "tts_last_text_index") else 0
148148
)
149149
conn.recode_first_last_text(text, text_index)
150-
future = conn.executor.submit(conn.speak_and_play, text, text_index)
150+
future = conn.executor.submit(conn.speak_and_play, None, text, text_index)
151151
conn.llm_finish_task = True
152152
conn.tts_queue.put((future, text_index))
153153
conn.dialogue.put(Message(role="assistant", content=text))

main/xiaozhi-server/plugins_func/functions/play_music.py

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -216,24 +216,16 @@ async def play_local_music(conn, specific_file=None):
216216
text = _get_random_play_prompt(selected_music)
217217
await send_stt_message(conn, text)
218218
conn.dialogue.put(Message(role="assistant", content=text))
219-
conn.tts_first_text_index = 0
220-
conn.tts_last_text_index = 0
221219

222-
tts_file = await asyncio.to_thread(conn.tts.to_tts, text)
223-
if tts_file is not None and os.path.exists(tts_file):
224-
conn.tts_last_text_index = 1
225-
opus_packets, _ = conn.tts.audio_to_opus_data(tts_file)
226-
conn.audio_play_queue.put((opus_packets, None, 0))
227-
os.remove(tts_file)
220+
conn.recode_first_last_text(text, 0)
221+
future = conn.executor.submit(conn.speak_and_play, None, text, 0)
222+
conn.tts_queue.put((future, 0))
228223

224+
conn.recode_first_last_text(text, 1)
225+
future = conn.executor.submit(conn.speak_and_play, music_path, None, 1)
226+
conn.tts_queue.put((future, 1))
229227
conn.llm_finish_task = True
230228

231-
if music_path.endswith(".p3"):
232-
opus_packets, _ = p3.decode_opus_from_file(music_path)
233-
else:
234-
opus_packets, _ = conn.tts.audio_to_opus_data(music_path)
235-
conn.audio_play_queue.put((opus_packets, None, conn.tts_last_text_index))
236-
237229
except Exception as e:
238230
conn.logger.bind(tag=TAG).error(f"播放音乐失败: {str(e)}")
239231
conn.logger.bind(tag=TAG).error(f"详细错误: {traceback.format_exc()}")

0 commit comments

Comments
 (0)