1414from typing import Dict , Any
1515from plugins_func .loadplugins import auto_import_modules
1616from config .logger import setup_logging
17+ from config .config_loader import get_project_dir
18+ from core .utils import p3
1719from core .utils .dialogue import Message , Dialogue
1820from core .handle .textHandle import handleTextMessage
1921from core .utils .util import (
@@ -615,7 +617,7 @@ def chat(self, query, tool_call=False):
615617 text_index += 1
616618 self .recode_first_last_text (segment_text , text_index )
617619 future = self .executor .submit (
618- self .speak_and_play , segment_text , text_index
620+ self .speak_and_play , None , segment_text , text_index
619621 )
620622 self .tts_queue .put ((future , text_index ))
621623 # 更新已处理字符位置
@@ -674,7 +676,7 @@ def chat(self, query, tool_call=False):
674676 text_index += 1
675677 self .recode_first_last_text (segment_text , text_index )
676678 future = self .executor .submit (
677- self .speak_and_play , segment_text , text_index
679+ self .speak_and_play , None , segment_text , text_index
678680 )
679681 self .tts_queue .put ((future , text_index ))
680682
@@ -737,7 +739,7 @@ def _handle_function_result(self, result, function_call_data, text_index):
737739 if result .action == Action .RESPONSE : # 直接回复前端
738740 text = result .response
739741 self .recode_first_last_text (text , text_index )
740- future = self .executor .submit (self .speak_and_play , text , text_index )
742+ future = self .executor .submit (self .speak_and_play , None , text , text_index )
741743 self .tts_queue .put ((future , text_index ))
742744 self .dialogue .put (Message (role = "assistant" , content = text ))
743745 elif result .action == Action .REQLLM : # 调用函数后再请求llm生成回复
@@ -776,7 +778,7 @@ def _handle_function_result(self, result, function_call_data, text_index):
776778 elif result .action == Action .NOTFOUND or result .action == Action .ERROR :
777779 text = result .result
778780 self .recode_first_last_text (text , text_index )
779- future = self .executor .submit (self .speak_and_play , text , text_index )
781+ future = self .executor .submit (self .speak_and_play , None , text , text_index )
780782 self .tts_queue .put ((future , text_index ))
781783 self .dialogue .put (Message (role = "assistant" , content = text ))
782784 else :
@@ -803,11 +805,7 @@ def _tts_priority_thread(self):
803805 self .logger .bind (tag = TAG ).debug ("正在处理TTS任务..." )
804806 tts_timeout = int (self .config .get ("tts_timeout" , 10 ))
805807 tts_file , text , _ = future .result (timeout = tts_timeout )
806- if text is None or len (text ) <= 0 :
807- self .logger .bind (tag = TAG ).error (
808- f"TTS出错:{ text_index } : tts text is empty"
809- )
810- elif tts_file is None :
808+ if tts_file is None :
811809 self .logger .bind (tag = TAG ).error (
812810 f"TTS出错: file is empty: { text_index } : { text } "
813811 )
@@ -816,12 +814,16 @@ def _tts_priority_thread(self):
816814 f"TTS生成:文件路径: { tts_file } "
817815 )
818816 if os .path .exists (tts_file ):
819- if self .audio_format == "pcm" :
817+ if tts_file .endswith (".p3" ):
818+ audio_datas , _ = p3 .decode_opus_from_file (tts_file )
819+ elif self .audio_format == "pcm" :
820820 audio_datas , _ = self .tts .audio_to_pcm_data (tts_file )
821821 else :
822822 audio_datas , _ = self .tts .audio_to_opus_data (tts_file )
823823 # 在这里上报TTS数据
824- enqueue_tts_report (self , text , audio_datas )
824+ enqueue_tts_report (
825+ self , tts_file if text is None else text , audio_datas
826+ )
825827 else :
826828 self .logger .bind (tag = TAG ).error (
827829 f"TTS出错:文件不存在{ tts_file } "
@@ -837,6 +839,7 @@ def _tts_priority_thread(self):
837839 self .tts .delete_audio_file
838840 and tts_file is not None
839841 and os .path .exists (tts_file )
842+ and tts_file .startswith (self .tts .output_file )
840843 ):
841844 os .remove (tts_file )
842845 except Exception as e :
@@ -903,18 +906,21 @@ def _report_worker(self):
903906
904907 self .logger .bind (tag = TAG ).info ("聊天记录上报线程已退出" )
905908
906- def speak_and_play (self , text , text_index = 0 ):
907- if text is None or len (text ) <= 0 :
908- self .logger .bind (tag = TAG ).info (f"无需tts转换,query为空,{ text } " )
909- return None , text , text_index
910- tts_file = self .tts .to_tts (text )
909+ def speak_and_play (self , file_path , content , text_index = 0 ):
910+ if file_path is not None :
911+ self .logger .bind (tag = TAG ).info (f"无需tts转换: 从文件播放,{ file_path } " )
912+ return file_path , content , text_index
913+ if content is None or len (content ) <= 0 :
914+ self .logger .bind (tag = TAG ).info (f"无需tts转换,query为空,{ content } " )
915+ return None , content , text_index
916+ tts_file = self .tts .to_tts (content )
911917 if tts_file is None :
912- self .logger .bind (tag = TAG ).error (f"tts转换失败,{ text } " )
913- return None , text , text_index
918+ self .logger .bind (tag = TAG ).error (f"tts转换失败,{ content } " )
919+ return None , content , text_index
914920 self .logger .bind (tag = TAG ).debug (f"TTS 文件生成完毕: { tts_file } " )
915921 if self .max_output_size > 0 :
916- add_device_output (self .headers .get ("device-id" ), len (text ))
917- return tts_file , text , text_index
922+ add_device_output (self .headers .get ("device-id" ), len (content ))
923+ return tts_file , content , text_index
918924
919925 def clearSpeakStatus (self ):
920926 self .logger .bind (tag = TAG ).debug (f"清除服务端讲话状态" )
0 commit comments