14
14
from typing import Dict , Any
15
15
from plugins_func .loadplugins import auto_import_modules
16
16
from config .logger import setup_logging
17
+ from config .config_loader import get_project_dir
18
+ from core .utils import p3
17
19
from core .utils .dialogue import Message , Dialogue
18
20
from core .handle .textHandle import handleTextMessage
19
21
from core .utils .util import (
@@ -506,106 +508,20 @@ def change_system_prompt(self, prompt):
506
508
# 更新系统prompt至上下文
507
509
self .dialogue .update_system_message (self .prompt )
508
510
509
- def chat (self , query ):
510
-
511
- self .dialogue .put (Message (role = "user" , content = query ))
512
-
513
- response_message = []
514
- processed_chars = 0 # 跟踪已处理的字符位置
515
- try :
516
- # 使用带记忆的对话
517
- memory_str = None
518
- if self .memory is not None :
519
- future = asyncio .run_coroutine_threadsafe (
520
- self .memory .query_memory (query ), self .loop
521
- )
522
- memory_str = future .result ()
523
-
524
- self .logger .bind (tag = TAG ).debug (f"记忆内容: { memory_str } " )
525
- llm_responses = self .llm .response (
526
- self .session_id , self .dialogue .get_llm_dialogue_with_memory (memory_str )
527
- )
528
- except Exception as e :
529
- self .logger .bind (tag = TAG ).error (f"LLM 处理出错 { query } : { e } " )
530
- return None
531
-
532
- self .llm_finish_task = False
533
- text_index = 0
534
- for content in llm_responses :
535
- response_message .append (content )
536
- if self .client_abort :
537
- break
538
-
539
- # 合并当前全部文本并处理未分割部分
540
- full_text = "" .join (response_message )
541
- current_text = full_text [processed_chars :] # 从未处理的位置开始
542
-
543
- # 查找最后一个有效标点
544
- punctuations = ("。" , "." , "?" , "?" , "!" , "!" , ";" , ";" , ":" )
545
- last_punct_pos = - 1
546
- number_flag = True
547
- for punct in punctuations :
548
- pos = current_text .rfind (punct )
549
- prev_char = current_text [pos - 1 ] if pos - 1 >= 0 else ""
550
- # 如果.前面是数字统一判断为小数
551
- if prev_char .isdigit () and punct == "." :
552
- number_flag = False
553
- if pos > last_punct_pos and number_flag :
554
- last_punct_pos = pos
555
-
556
- # 找到分割点则处理
557
- if last_punct_pos != - 1 :
558
- segment_text_raw = current_text [: last_punct_pos + 1 ]
559
- segment_text = get_string_no_punctuation_or_emoji (segment_text_raw )
560
- if segment_text :
561
- # 强制设置空字符,测试TTS出错返回语音的健壮性
562
- # if text_index % 2 == 0:
563
- # segment_text = " "
564
- text_index += 1
565
- self .recode_first_last_text (segment_text , text_index )
566
- future = self .executor .submit (
567
- self .speak_and_play , segment_text , text_index
568
- )
569
- self .tts_queue .put ((future , text_index ))
570
- processed_chars += len (segment_text_raw ) # 更新已处理字符位置
571
-
572
- # 处理最后剩余的文本
573
- full_text = "" .join (response_message )
574
- remaining_text = full_text [processed_chars :]
575
- if remaining_text :
576
- segment_text = get_string_no_punctuation_or_emoji (remaining_text )
577
- if segment_text :
578
- text_index += 1
579
- self .recode_first_last_text (segment_text , text_index )
580
- future = self .executor .submit (
581
- self .speak_and_play , segment_text , text_index
582
- )
583
- self .tts_queue .put ((future , text_index ))
584
-
585
- self .llm_finish_task = True
586
- self .dialogue .put (Message (role = "assistant" , content = "" .join (response_message )))
587
- self .logger .bind (tag = TAG ).debug (
588
- json .dumps (self .dialogue .get_llm_dialogue (), indent = 4 , ensure_ascii = False )
589
- )
590
- return True
591
-
592
- def chat_with_function_calling (self , query , tool_call = False ):
593
- self .logger .bind (tag = TAG ).debug (f"Chat with function calling start: { query } " )
594
- """Chat with function calling for intent detection using streaming"""
511
+ def chat (self , query , tool_call = False ):
512
+ self .logger .bind (tag = TAG ).debug (f"Chat: { query } " )
595
513
596
514
if not tool_call :
597
515
self .dialogue .put (Message (role = "user" , content = query ))
598
516
599
517
# Define intent functions
600
518
functions = None
601
- if hasattr (self , "func_handler" ):
519
+ if self . intent_type == "function_call" and hasattr (self , "func_handler" ):
602
520
functions = self .func_handler .get_functions ()
603
521
response_message = []
604
522
processed_chars = 0 # 跟踪已处理的字符位置
605
523
606
524
try :
607
- start_time = time .time ()
608
-
609
525
# 使用带记忆的对话
610
526
memory_str = None
611
527
if self .memory is not None :
@@ -614,14 +530,18 @@ def chat_with_function_calling(self, query, tool_call=False):
614
530
)
615
531
memory_str = future .result ()
616
532
617
- # self.logger.bind(tag=TAG).info(f"对话记录: {self.dialogue.get_llm_dialogue_with_memory(memory_str)}")
618
-
619
- # 使用支持functions的streaming接口
620
- llm_responses = self .llm .response_with_functions (
621
- self .session_id ,
622
- self .dialogue .get_llm_dialogue_with_memory (memory_str ),
623
- functions = functions ,
624
- )
533
+ if functions is not None :
534
+ # 使用支持functions的streaming接口
535
+ llm_responses = self .llm .response_with_functions (
536
+ self .session_id ,
537
+ self .dialogue .get_llm_dialogue_with_memory (memory_str ),
538
+ functions = functions ,
539
+ )
540
+ else :
541
+ llm_responses = self .llm .response (
542
+ self .session_id ,
543
+ self .dialogue .get_llm_dialogue_with_memory (memory_str ),
544
+ )
625
545
except Exception as e :
626
546
self .logger .bind (tag = TAG ).error (f"LLM 处理出错 { query } : { e } " )
627
547
return None
@@ -637,27 +557,28 @@ def chat_with_function_calling(self, query, tool_call=False):
637
557
content_arguments = ""
638
558
639
559
for response in llm_responses :
640
- content , tools_call = response
641
-
642
- if "content" in response :
643
- content = response ["content" ]
644
- tools_call = None
645
- if content is not None and len (content ) > 0 :
646
- content_arguments += content
647
-
648
- if not tool_call_flag and content_arguments .startswith ("<tool_call>" ):
649
- # print("content_arguments", content_arguments)
650
- tool_call_flag = True
651
-
652
- if tools_call is not None :
653
- tool_call_flag = True
654
- if tools_call [0 ].id is not None :
655
- function_id = tools_call [0 ].id
656
- if tools_call [0 ].function .name is not None :
657
- function_name = tools_call [0 ].function .name
658
- if tools_call [0 ].function .arguments is not None :
659
- function_arguments += tools_call [0 ].function .arguments
660
-
560
+ if self .intent_type == "function_call" :
561
+ content , tools_call = response
562
+ if "content" in response :
563
+ content = response ["content" ]
564
+ tools_call = None
565
+ if content is not None and len (content ) > 0 :
566
+ content_arguments += content
567
+
568
+ if not tool_call_flag and content_arguments .startswith ("<tool_call>" ):
569
+ # print("content_arguments", content_arguments)
570
+ tool_call_flag = True
571
+
572
+ if tools_call is not None :
573
+ tool_call_flag = True
574
+ if tools_call [0 ].id is not None :
575
+ function_id = tools_call [0 ].id
576
+ if tools_call [0 ].function .name is not None :
577
+ function_name = tools_call [0 ].function .name
578
+ if tools_call [0 ].function .arguments is not None :
579
+ function_arguments += tools_call [0 ].function .arguments
580
+ else :
581
+ content = response
661
582
if content is not None and len (content ) > 0 :
662
583
if not tool_call_flag :
663
584
response_message .append (content )
@@ -696,7 +617,7 @@ def chat_with_function_calling(self, query, tool_call=False):
696
617
text_index += 1
697
618
self .recode_first_last_text (segment_text , text_index )
698
619
future = self .executor .submit (
699
- self .speak_and_play , segment_text , text_index
620
+ self .speak_and_play , None , segment_text , text_index
700
621
)
701
622
self .tts_queue .put ((future , text_index ))
702
623
# 更新已处理字符位置
@@ -755,7 +676,7 @@ def chat_with_function_calling(self, query, tool_call=False):
755
676
text_index += 1
756
677
self .recode_first_last_text (segment_text , text_index )
757
678
future = self .executor .submit (
758
- self .speak_and_play , segment_text , text_index
679
+ self .speak_and_play , None , segment_text , text_index
759
680
)
760
681
self .tts_queue .put ((future , text_index ))
761
682
@@ -818,7 +739,7 @@ def _handle_function_result(self, result, function_call_data, text_index):
818
739
if result .action == Action .RESPONSE : # 直接回复前端
819
740
text = result .response
820
741
self .recode_first_last_text (text , text_index )
821
- future = self .executor .submit (self .speak_and_play , text , text_index )
742
+ future = self .executor .submit (self .speak_and_play , None , text , text_index )
822
743
self .tts_queue .put ((future , text_index ))
823
744
self .dialogue .put (Message (role = "assistant" , content = text ))
824
745
elif result .action == Action .REQLLM : # 调用函数后再请求llm生成回复
@@ -853,11 +774,11 @@ def _handle_function_result(self, result, function_call_data, text_index):
853
774
content = text ,
854
775
)
855
776
)
856
- self .chat_with_function_calling (text , tool_call = True )
777
+ self .chat (text , tool_call = True )
857
778
elif result .action == Action .NOTFOUND or result .action == Action .ERROR :
858
779
text = result .result
859
780
self .recode_first_last_text (text , text_index )
860
- future = self .executor .submit (self .speak_and_play , text , text_index )
781
+ future = self .executor .submit (self .speak_and_play , None , text , text_index )
861
782
self .tts_queue .put ((future , text_index ))
862
783
self .dialogue .put (Message (role = "assistant" , content = text ))
863
784
else :
@@ -884,11 +805,7 @@ def _tts_priority_thread(self):
884
805
self .logger .bind (tag = TAG ).debug ("正在处理TTS任务..." )
885
806
tts_timeout = int (self .config .get ("tts_timeout" , 10 ))
886
807
tts_file , text , _ = future .result (timeout = tts_timeout )
887
- if text is None or len (text ) <= 0 :
888
- self .logger .bind (tag = TAG ).error (
889
- f"TTS出错:{ text_index } : tts text is empty"
890
- )
891
- elif tts_file is None :
808
+ if tts_file is None :
892
809
self .logger .bind (tag = TAG ).error (
893
810
f"TTS出错: file is empty: { text_index } : { text } "
894
811
)
@@ -897,12 +814,16 @@ def _tts_priority_thread(self):
897
814
f"TTS生成:文件路径: { tts_file } "
898
815
)
899
816
if os .path .exists (tts_file ):
900
- if self .audio_format == "pcm" :
817
+ if tts_file .endswith (".p3" ):
818
+ audio_datas , _ = p3 .decode_opus_from_file (tts_file )
819
+ elif self .audio_format == "pcm" :
901
820
audio_datas , _ = self .tts .audio_to_pcm_data (tts_file )
902
821
else :
903
822
audio_datas , _ = self .tts .audio_to_opus_data (tts_file )
904
823
# 在这里上报TTS数据
905
- enqueue_tts_report (self , text , audio_datas )
824
+ enqueue_tts_report (
825
+ self , tts_file if text is None else text , audio_datas
826
+ )
906
827
else :
907
828
self .logger .bind (tag = TAG ).error (
908
829
f"TTS出错:文件不存在{ tts_file } "
@@ -918,6 +839,7 @@ def _tts_priority_thread(self):
918
839
self .tts .delete_audio_file
919
840
and tts_file is not None
920
841
and os .path .exists (tts_file )
842
+ and tts_file .startswith (self .tts .output_file )
921
843
):
922
844
os .remove (tts_file )
923
845
except Exception as e :
@@ -984,18 +906,21 @@ def _report_worker(self):
984
906
985
907
self .logger .bind (tag = TAG ).info ("聊天记录上报线程已退出" )
986
908
987
- def speak_and_play (self , text , text_index = 0 ):
988
- if text is None or len (text ) <= 0 :
989
- self .logger .bind (tag = TAG ).info (f"无需tts转换,query为空,{ text } " )
990
- return None , text , text_index
991
- tts_file = self .tts .to_tts (text )
909
+ def speak_and_play (self , file_path , content , text_index = 0 ):
910
+ if file_path is not None :
911
+ self .logger .bind (tag = TAG ).info (f"无需tts转换: 从文件播放,{ file_path } " )
912
+ return file_path , content , text_index
913
+ if content is None or len (content ) <= 0 :
914
+ self .logger .bind (tag = TAG ).info (f"无需tts转换,query为空,{ content } " )
915
+ return None , content , text_index
916
+ tts_file = self .tts .to_tts (content )
992
917
if tts_file is None :
993
- self .logger .bind (tag = TAG ).error (f"tts转换失败,{ text } " )
994
- return None , text , text_index
918
+ self .logger .bind (tag = TAG ).error (f"tts转换失败,{ content } " )
919
+ return None , content , text_index
995
920
self .logger .bind (tag = TAG ).debug (f"TTS 文件生成完毕: { tts_file } " )
996
921
if self .max_output_size > 0 :
997
- add_device_output (self .headers .get ("device-id" ), len (text ))
998
- return tts_file , text , text_index
922
+ add_device_output (self .headers .get ("device-id" ), len (content ))
923
+ return tts_file , content , text_index
999
924
1000
925
def clearSpeakStatus (self ):
1001
926
self .logger .bind (tag = TAG ).debug (f"清除服务端讲话状态" )
0 commit comments