fix:智能体音色未随manager生效bug

openrz · web-flow · commit 4912f385c69f · 2025-04-13T19:01:19.000+08:00
* fix:连接manager后无法使用functioncallbug

* fix:意图识别使用llm无法播放音乐bug

* fix：manager第一图识别使用独立llm无法初始化llm的bug

* fix:智能体音色未随manager生效bug

* update:添加edgeTTS音色
diff --git a/main/manager-api/src/main/resources/db/changelog/202504131543.sql b/main/manager-api/src/main/resources/db/changelog/202504131543.sql
@@ -0,0 +1,18 @@
+-- 对0.3.0版本之前的参数进行修改
+update `sys_params` set param_value = '.mp3;.wav;.p3' where  param_code = 'plugins.play_music.music_ext';
+update `ai_model_config` set config_json =  '{\"type\": \"intent_llm\", \"llm\": \"LLM_ChatGLMLLM\"}' where  id = 'Intent_intent_llm';
+
+-- 添加edge音色
+delete from `ai_tts_voice` where tts_model_id = 'TTS_EdgeTTS';
+INSERT INTO `ai_tts_voice` VALUES 
+('TTS_EdgeTTS0001', 'TTS_EdgeTTS', 'EdgeTTS女声-晓晓', 'zh-CN-XiaoxiaoNeural', '普通话', NULL, NULL, 1, NULL, NULL, NULL, NULL),
+('TTS_EdgeTTS0002', 'TTS_EdgeTTS', 'EdgeTTS男声-云扬', 'zh-CN-YunyangNeural', '普通话', NULL, NULL, 1, NULL, NULL, NULL, NULL),
+('TTS_EdgeTTS0003', 'TTS_EdgeTTS', 'EdgeTTS女声-晓伊', 'zh-CN-XiaoyiNeural', '普通话', NULL, NULL, 1, NULL, NULL, NULL, NULL),
+('TTS_EdgeTTS0004', 'TTS_EdgeTTS', 'EdgeTTS男声-云健', 'zh-CN-YunjianNeural', '普通话', NULL, NULL, 1, NULL, NULL, NULL, NULL),
+('TTS_EdgeTTS0005', 'TTS_EdgeTTS', 'EdgeTTS男声-云希', 'zh-CN-YunxiNeural', '普通话', NULL, NULL, 1, NULL, NULL, NULL, NULL),
+('TTS_EdgeTTS0006', 'TTS_EdgeTTS', 'EdgeTTS男声-云夏', 'zh-CN-YunxiaNeural', '普通话', NULL, NULL, 1, NULL, NULL, NULL, NULL),
+('TTS_EdgeTTS0007', 'TTS_EdgeTTS', 'EdgeTTS女声-辽宁小贝', 'zh-CN-liaoning-XiaobeiNeural', '辽宁', NULL, NULL, 1, NULL, NULL, NULL, NULL),
+('TTS_EdgeTTS0008', 'TTS_EdgeTTS', 'EdgeTTS女声-陕西小妮', 'zh-CN-shaanxi-XiaoniNeural', '陕西', NULL, NULL, 1, NULL, NULL, NULL, NULL),
+('TTS_EdgeTTS0009', 'TTS_EdgeTTS', 'EdgeTTS女声-香港海佳', 'zh-HK-HiuGaaiNeural', '粤语', 'General', 'Friendly, Positive', 1, NULL, NULL, NULL, NULL),
+('TTS_EdgeTTS0010', 'TTS_EdgeTTS', 'EdgeTTS女声-香港海曼', 'zh-HK-HiuMaanNeural', '粤语', 'General', 'Friendly, Positive', 1, NULL, NULL, NULL, NULL),
+('TTS_EdgeTTS0011', 'TTS_EdgeTTS', 'EdgeTTS男声-香港万龙', 'zh-HK-WanLungNeural', '粤语', 'General', 'Friendly, Positive', 1, NULL, NULL, NULL, NULL);
diff --git a/main/manager-api/src/main/resources/db/changelog/db.changelog-master.yaml b/main/manager-api/src/main/resources/db/changelog/db.changelog-master.yaml
@@ -52,9 +52,9 @@ databaseChangeLog:
             encoding: utf8
             path: classpath:db/changelog/202504112058.sql
   - changeSet:
-      id: 202504131542
+      id: 202504131543
       author: John
       changes:
         - sqlFile:
             encoding: utf8
-            path: classpath:db/changelog/202504131542.sql
+            path: classpath:db/changelog/202504131543.sql
diff --git a/main/xiaozhi-server/core/providers/tts/cozecn.py b/main/xiaozhi-server/core/providers/tts/cozecn.py
@@ -12,14 +12,20 @@ def __init__(self, config, delete_audio_file):
         super().__init__(config, delete_audio_file)
         self.model = config.get("model")
         self.access_token = config.get("access_token")
-        self.voice = config.get("voice")
+        if config.get("private_voice"):
+            self.voice = config.get("private_voice")
+        else:
+            self.voice = config.get("voice")
         self.response_format = config.get("response_format")
 
         self.host = "api.coze.cn"
         self.api_url = f"https://{self.host}/v1/audio/speech"
 
     def generate_filename(self, extension=".wav"):
-        return os.path.join(self.output_file, f"tts-{datetime.now().date()}@{uuid.uuid4().hex}{extension}")
+        return os.path.join(
+            self.output_file,
+            f"tts-{datetime.now().date()}@{uuid.uuid4().hex}{extension}",
+        )
 
     async def text_to_speak(self, text, output_file):
         request_json = {
@@ -30,9 +36,11 @@ async def text_to_speak(self, text, output_file):
         }
         headers = {
             "Authorization": f"Bearer {self.access_token}",
-            "Content-Type": "application/json"
+            "Content-Type": "application/json",
         }
-        response = requests.request("POST", self.api_url, json=request_json, headers=headers)
+        response = requests.request(
+            "POST", self.api_url, json=request_json, headers=headers
+        )
         data = response.content
         file_to_save = open(output_file, "wb")
         file_to_save.write(data)
diff --git a/main/xiaozhi-server/core/providers/tts/doubao.py b/main/xiaozhi-server/core/providers/tts/doubao.py
@@ -18,7 +18,12 @@ def __init__(self, config, delete_audio_file):
         self.appid = config.get("appid")
         self.access_token = config.get("access_token")
         self.cluster = config.get("cluster")
-        self.voice = config.get("voice")
+
+        if config.get("private_voice"):
+            self.voice = config.get("private_voice")
+        else:
+            self.voice = config.get("voice")
+
         self.api_url = config.get("api_url")
         self.authorization = config.get("authorization")
         self.header = {"Authorization": f"{self.authorization}{self.access_token}"}
diff --git a/main/xiaozhi-server/core/providers/tts/edge.py b/main/xiaozhi-server/core/providers/tts/edge.py
@@ -8,20 +8,26 @@
 class TTSProvider(TTSProviderBase):
     def __init__(self, config, delete_audio_file):
         super().__init__(config, delete_audio_file)
-        self.voice = config.get("voice")
+        if config.get("private_voice"):
+            self.voice = config.get("private_voice")
+        else:
+            self.voice = config.get("voice")
 
     def generate_filename(self, extension=".mp3"):
-        return os.path.join(self.output_file, f"tts-{datetime.now().date()}@{uuid.uuid4().hex}{extension}")
+        return os.path.join(
+            self.output_file,
+            f"tts-{datetime.now().date()}@{uuid.uuid4().hex}{extension}",
+        )
 
     async def text_to_speak(self, text, output_file):
         communicate = edge_tts.Communicate(text, voice=self.voice)
         # 确保目录存在并创建空文件
         os.makedirs(os.path.dirname(output_file), exist_ok=True)
-        with open(output_file, 'wb') as f:
+        with open(output_file, "wb") as f:
             pass
-        
+
         # 流式写入音频数据
-        with open(output_file, 'ab') as f:  # 改为追加模式避免覆盖
+        with open(output_file, "ab") as f:  # 改为追加模式避免覆盖
             async for chunk in communicate.stream():
                 if chunk["type"] == "audio":  # 只处理音频数据块
                     f.write(chunk["data"])
diff --git a/main/xiaozhi-server/core/providers/tts/minimax.py b/main/xiaozhi-server/core/providers/tts/minimax.py
@@ -12,28 +12,33 @@ def __init__(self, config, delete_audio_file):
         self.group_id = config.get("group_id")
         self.api_key = config.get("api_key")
         self.model = config.get("model")
-        self.voice_id = config.get("voice_id")
+        if config.get("private_voice"):
+            self.voice_id = config.get("private_voice")
+        else:
+            self.voice_id = config.get("voice_id")
 
         default_voice_setting = {
             "voice_id": "female-shaonv",
             "speed": 1,
             "vol": 1,
             "pitch": 0,
-            "emotion": "happy"
-        }
-        default_pronunciation_dict = {
-            "tone": [
-                "处理/(chu3)(li3)", "危险/dangerous"
-            ]
+            "emotion": "happy",
         }
+        default_pronunciation_dict = {"tone": ["处理/(chu3)(li3)", "危险/dangerous"]}
         defult_audio_setting = {
             "sample_rate": 32000,
             "bitrate": 128000,
             "format": "mp3",
-            "channel": 1
+            "channel": 1,
+        }
+        self.voice_setting = {
+            **default_voice_setting,
+            **config.get("voice_setting", {}),
+        }
+        self.pronunciation_dict = {
+            **default_pronunciation_dict,
+            **config.get("pronunciation_dict", {}),
         }
-        self.voice_setting = {**default_voice_setting, **config.get("voice_setting", {})}
-        self.pronunciation_dict = {**default_pronunciation_dict, **config.get("pronunciation_dict", {})}
         self.audio_setting = {**defult_audio_setting, **config.get("audio_setting", {})}
         self.timber_weights = config.get("timber_weights", [])
 
@@ -44,11 +49,14 @@ def __init__(self, config, delete_audio_file):
         self.api_url = f"https://{self.host}/v1/t2a_v2?GroupId={self.group_id}"
         self.header = {
             "Content-Type": "application/json",
-            "Authorization": f"Bearer {self.api_key}"
+            "Authorization": f"Bearer {self.api_key}",
         }
 
     def generate_filename(self, extension=".mp3"):
-        return os.path.join(self.output_file, f"tts-{__name__}{datetime.now().date()}@{uuid.uuid4().hex}{extension}")
+        return os.path.join(
+            self.output_file,
+            f"tts-{__name__}{datetime.now().date()}@{uuid.uuid4().hex}{extension}",
+        )
 
     async def text_to_speak(self, text, output_file):
         request_json = {
@@ -65,13 +73,17 @@ async def text_to_speak(self, text, output_file):
             request_json["voice_setting"]["voice_id"] = ""
 
         try:
-            resp = requests.post(self.api_url, json.dumps(request_json), headers=self.header)
+            resp = requests.post(
+                self.api_url, json.dumps(request_json), headers=self.header
+            )
             # 检查返回请求数据的status_code是否为0
             if resp.json()["base_resp"]["status_code"] == 0:
-                data = resp.json()['data']['audio']
+                data = resp.json()["data"]["audio"]
                 file_to_save = open(output_file, "wb")
                 file_to_save.write(bytes.fromhex(data))
             else:
-                raise Exception(f"{__name__} status_code: {resp.status_code} response: {resp.content}")
+                raise Exception(
+                    f"{__name__} status_code: {resp.status_code} response: {resp.content}"
+                )
         except Exception as e:
-            raise Exception(f"{__name__} error: {e}")
+            raise Exception(f"{__name__} error: {e}")
diff --git a/main/xiaozhi-server/core/providers/tts/openai.py b/main/xiaozhi-server/core/providers/tts/openai.py
@@ -16,7 +16,10 @@ def __init__(self, config, delete_audio_file):
         self.api_key = config.get("api_key")
         self.api_url = config.get("api_url", "https://api.openai.com/v1/audio/speech")
         self.model = config.get("model", "tts-1")
-        self.voice = config.get("voice", "alloy")
+        if config.get("private_voice"):
+            self.voice = config.get("private_voice")
+        else:
+            self.voice = config.get("voice", "alloy")
         self.response_format = "wav"
         self.speed = config.get("speed", 1.0)
         self.output_file = config.get("output_dir", "tmp/")
diff --git a/main/xiaozhi-server/core/providers/tts/siliconflow.py b/main/xiaozhi-server/core/providers/tts/siliconflow.py
@@ -10,7 +10,10 @@ def __init__(self, config, delete_audio_file):
         super().__init__(config, delete_audio_file)
         self.model = config.get("model")
         self.access_token = config.get("access_token")
-        self.voice = config.get("voice")
+        if config.get("private_voice"):
+            self.voice = config.get("private_voice")
+        else:
+            self.voice = config.get("voice")
         self.response_format = config.get("response_format")
         self.sample_rate = config.get("sample_rate")
         self.speed = config.get("speed")
@@ -20,7 +23,10 @@ def __init__(self, config, delete_audio_file):
         self.api_url = f"https://{self.host}/v1/audio/speech"
 
     def generate_filename(self, extension=".wav"):
-        return os.path.join(self.output_file, f"tts-{datetime.now().date()}@{uuid.uuid4().hex}{extension}")
+        return os.path.join(
+            self.output_file,
+            f"tts-{datetime.now().date()}@{uuid.uuid4().hex}{extension}",
+        )
 
     async def text_to_speak(self, text, output_file):
         request_json = {
@@ -31,9 +37,11 @@ async def text_to_speak(self, text, output_file):
         }
         headers = {
             "Authorization": f"Bearer {self.access_token}",
-            "Content-Type": "application/json"
+            "Content-Type": "application/json",
         }
-        response = requests.request("POST", self.api_url, json=request_json, headers=headers)
+        response = requests.request(
+            "POST", self.api_url, json=request_json, headers=headers
+        )
         data = response.content
         file_to_save = open(output_file, "wb")
         file_to_save.write(data)
diff --git a/main/xiaozhi-server/core/providers/tts/tencent.py b/main/xiaozhi-server/core/providers/tts/tencent.py
diff --git a/main/xiaozhi-server/core/providers/tts/ttson.py b/main/xiaozhi-server/core/providers/tts/ttson.py