Skip to content

Commit 4912f38

Browse files
authored
fix:智能体音色未随manager生效bug
* fix:连接manager后无法使用functioncallbug * fix:意图识别使用llm无法播放音乐bug * fix:manager第一图识别使用独立llm无法初始化llm的bug * fix:智能体音色未随manager生效bug * update:添加edgeTTS音色
1 parent de8c762 commit 4912f38

File tree

10 files changed

+181
-92
lines changed

10 files changed

+181
-92
lines changed
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
-- 对0.3.0版本之前的参数进行修改
2+
update `sys_params` set param_value = '.mp3;.wav;.p3' where param_code = 'plugins.play_music.music_ext';
3+
update `ai_model_config` set config_json = '{\"type\": \"intent_llm\", \"llm\": \"LLM_ChatGLMLLM\"}' where id = 'Intent_intent_llm';
4+
5+
-- 添加edge音色
6+
delete from `ai_tts_voice` where tts_model_id = 'TTS_EdgeTTS';
7+
INSERT INTO `ai_tts_voice` VALUES
8+
('TTS_EdgeTTS0001', 'TTS_EdgeTTS', 'EdgeTTS女声-晓晓', 'zh-CN-XiaoxiaoNeural', '普通话', NULL, NULL, 1, NULL, NULL, NULL, NULL),
9+
('TTS_EdgeTTS0002', 'TTS_EdgeTTS', 'EdgeTTS男声-云扬', 'zh-CN-YunyangNeural', '普通话', NULL, NULL, 1, NULL, NULL, NULL, NULL),
10+
('TTS_EdgeTTS0003', 'TTS_EdgeTTS', 'EdgeTTS女声-晓伊', 'zh-CN-XiaoyiNeural', '普通话', NULL, NULL, 1, NULL, NULL, NULL, NULL),
11+
('TTS_EdgeTTS0004', 'TTS_EdgeTTS', 'EdgeTTS男声-云健', 'zh-CN-YunjianNeural', '普通话', NULL, NULL, 1, NULL, NULL, NULL, NULL),
12+
('TTS_EdgeTTS0005', 'TTS_EdgeTTS', 'EdgeTTS男声-云希', 'zh-CN-YunxiNeural', '普通话', NULL, NULL, 1, NULL, NULL, NULL, NULL),
13+
('TTS_EdgeTTS0006', 'TTS_EdgeTTS', 'EdgeTTS男声-云夏', 'zh-CN-YunxiaNeural', '普通话', NULL, NULL, 1, NULL, NULL, NULL, NULL),
14+
('TTS_EdgeTTS0007', 'TTS_EdgeTTS', 'EdgeTTS女声-辽宁小贝', 'zh-CN-liaoning-XiaobeiNeural', '辽宁', NULL, NULL, 1, NULL, NULL, NULL, NULL),
15+
('TTS_EdgeTTS0008', 'TTS_EdgeTTS', 'EdgeTTS女声-陕西小妮', 'zh-CN-shaanxi-XiaoniNeural', '陕西', NULL, NULL, 1, NULL, NULL, NULL, NULL),
16+
('TTS_EdgeTTS0009', 'TTS_EdgeTTS', 'EdgeTTS女声-香港海佳', 'zh-HK-HiuGaaiNeural', '粤语', 'General', 'Friendly, Positive', 1, NULL, NULL, NULL, NULL),
17+
('TTS_EdgeTTS0010', 'TTS_EdgeTTS', 'EdgeTTS女声-香港海曼', 'zh-HK-HiuMaanNeural', '粤语', 'General', 'Friendly, Positive', 1, NULL, NULL, NULL, NULL),
18+
('TTS_EdgeTTS0011', 'TTS_EdgeTTS', 'EdgeTTS男声-香港万龙', 'zh-HK-WanLungNeural', '粤语', 'General', 'Friendly, Positive', 1, NULL, NULL, NULL, NULL);

main/manager-api/src/main/resources/db/changelog/db.changelog-master.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,9 @@ databaseChangeLog:
5252
encoding: utf8
5353
path: classpath:db/changelog/202504112058.sql
5454
- changeSet:
55-
id: 202504131542
55+
id: 202504131543
5656
author: John
5757
changes:
5858
- sqlFile:
5959
encoding: utf8
60-
path: classpath:db/changelog/202504131542.sql
60+
path: classpath:db/changelog/202504131543.sql

main/xiaozhi-server/core/providers/tts/cozecn.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,20 @@ def __init__(self, config, delete_audio_file):
1212
super().__init__(config, delete_audio_file)
1313
self.model = config.get("model")
1414
self.access_token = config.get("access_token")
15-
self.voice = config.get("voice")
15+
if config.get("private_voice"):
16+
self.voice = config.get("private_voice")
17+
else:
18+
self.voice = config.get("voice")
1619
self.response_format = config.get("response_format")
1720

1821
self.host = "api.coze.cn"
1922
self.api_url = f"https://{self.host}/v1/audio/speech"
2023

2124
def generate_filename(self, extension=".wav"):
22-
return os.path.join(self.output_file, f"tts-{datetime.now().date()}@{uuid.uuid4().hex}{extension}")
25+
return os.path.join(
26+
self.output_file,
27+
f"tts-{datetime.now().date()}@{uuid.uuid4().hex}{extension}",
28+
)
2329

2430
async def text_to_speak(self, text, output_file):
2531
request_json = {
@@ -30,9 +36,11 @@ async def text_to_speak(self, text, output_file):
3036
}
3137
headers = {
3238
"Authorization": f"Bearer {self.access_token}",
33-
"Content-Type": "application/json"
39+
"Content-Type": "application/json",
3440
}
35-
response = requests.request("POST", self.api_url, json=request_json, headers=headers)
41+
response = requests.request(
42+
"POST", self.api_url, json=request_json, headers=headers
43+
)
3644
data = response.content
3745
file_to_save = open(output_file, "wb")
3846
file_to_save.write(data)

main/xiaozhi-server/core/providers/tts/doubao.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,12 @@ def __init__(self, config, delete_audio_file):
1818
self.appid = config.get("appid")
1919
self.access_token = config.get("access_token")
2020
self.cluster = config.get("cluster")
21-
self.voice = config.get("voice")
21+
22+
if config.get("private_voice"):
23+
self.voice = config.get("private_voice")
24+
else:
25+
self.voice = config.get("voice")
26+
2227
self.api_url = config.get("api_url")
2328
self.authorization = config.get("authorization")
2429
self.header = {"Authorization": f"{self.authorization}{self.access_token}"}

main/xiaozhi-server/core/providers/tts/edge.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,26 @@
88
class TTSProvider(TTSProviderBase):
99
def __init__(self, config, delete_audio_file):
1010
super().__init__(config, delete_audio_file)
11-
self.voice = config.get("voice")
11+
if config.get("private_voice"):
12+
self.voice = config.get("private_voice")
13+
else:
14+
self.voice = config.get("voice")
1215

1316
def generate_filename(self, extension=".mp3"):
14-
return os.path.join(self.output_file, f"tts-{datetime.now().date()}@{uuid.uuid4().hex}{extension}")
17+
return os.path.join(
18+
self.output_file,
19+
f"tts-{datetime.now().date()}@{uuid.uuid4().hex}{extension}",
20+
)
1521

1622
async def text_to_speak(self, text, output_file):
1723
communicate = edge_tts.Communicate(text, voice=self.voice)
1824
# 确保目录存在并创建空文件
1925
os.makedirs(os.path.dirname(output_file), exist_ok=True)
20-
with open(output_file, 'wb') as f:
26+
with open(output_file, "wb") as f:
2127
pass
22-
28+
2329
# 流式写入音频数据
24-
with open(output_file, 'ab') as f: # 改为追加模式避免覆盖
30+
with open(output_file, "ab") as f: # 改为追加模式避免覆盖
2531
async for chunk in communicate.stream():
2632
if chunk["type"] == "audio": # 只处理音频数据块
2733
f.write(chunk["data"])

main/xiaozhi-server/core/providers/tts/minimax.py

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,28 +12,33 @@ def __init__(self, config, delete_audio_file):
1212
self.group_id = config.get("group_id")
1313
self.api_key = config.get("api_key")
1414
self.model = config.get("model")
15-
self.voice_id = config.get("voice_id")
15+
if config.get("private_voice"):
16+
self.voice_id = config.get("private_voice")
17+
else:
18+
self.voice_id = config.get("voice_id")
1619

1720
default_voice_setting = {
1821
"voice_id": "female-shaonv",
1922
"speed": 1,
2023
"vol": 1,
2124
"pitch": 0,
22-
"emotion": "happy"
23-
}
24-
default_pronunciation_dict = {
25-
"tone": [
26-
"处理/(chu3)(li3)", "危险/dangerous"
27-
]
25+
"emotion": "happy",
2826
}
27+
default_pronunciation_dict = {"tone": ["处理/(chu3)(li3)", "危险/dangerous"]}
2928
defult_audio_setting = {
3029
"sample_rate": 32000,
3130
"bitrate": 128000,
3231
"format": "mp3",
33-
"channel": 1
32+
"channel": 1,
33+
}
34+
self.voice_setting = {
35+
**default_voice_setting,
36+
**config.get("voice_setting", {}),
37+
}
38+
self.pronunciation_dict = {
39+
**default_pronunciation_dict,
40+
**config.get("pronunciation_dict", {}),
3441
}
35-
self.voice_setting = {**default_voice_setting, **config.get("voice_setting", {})}
36-
self.pronunciation_dict = {**default_pronunciation_dict, **config.get("pronunciation_dict", {})}
3742
self.audio_setting = {**defult_audio_setting, **config.get("audio_setting", {})}
3843
self.timber_weights = config.get("timber_weights", [])
3944

@@ -44,11 +49,14 @@ def __init__(self, config, delete_audio_file):
4449
self.api_url = f"https://{self.host}/v1/t2a_v2?GroupId={self.group_id}"
4550
self.header = {
4651
"Content-Type": "application/json",
47-
"Authorization": f"Bearer {self.api_key}"
52+
"Authorization": f"Bearer {self.api_key}",
4853
}
4954

5055
def generate_filename(self, extension=".mp3"):
51-
return os.path.join(self.output_file, f"tts-{__name__}{datetime.now().date()}@{uuid.uuid4().hex}{extension}")
56+
return os.path.join(
57+
self.output_file,
58+
f"tts-{__name__}{datetime.now().date()}@{uuid.uuid4().hex}{extension}",
59+
)
5260

5361
async def text_to_speak(self, text, output_file):
5462
request_json = {
@@ -65,13 +73,17 @@ async def text_to_speak(self, text, output_file):
6573
request_json["voice_setting"]["voice_id"] = ""
6674

6775
try:
68-
resp = requests.post(self.api_url, json.dumps(request_json), headers=self.header)
76+
resp = requests.post(
77+
self.api_url, json.dumps(request_json), headers=self.header
78+
)
6979
# 检查返回请求数据的status_code是否为0
7080
if resp.json()["base_resp"]["status_code"] == 0:
71-
data = resp.json()['data']['audio']
81+
data = resp.json()["data"]["audio"]
7282
file_to_save = open(output_file, "wb")
7383
file_to_save.write(bytes.fromhex(data))
7484
else:
75-
raise Exception(f"{__name__} status_code: {resp.status_code} response: {resp.content}")
85+
raise Exception(
86+
f"{__name__} status_code: {resp.status_code} response: {resp.content}"
87+
)
7688
except Exception as e:
77-
raise Exception(f"{__name__} error: {e}")
89+
raise Exception(f"{__name__} error: {e}")

main/xiaozhi-server/core/providers/tts/openai.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,10 @@ def __init__(self, config, delete_audio_file):
1616
self.api_key = config.get("api_key")
1717
self.api_url = config.get("api_url", "https://api.openai.com/v1/audio/speech")
1818
self.model = config.get("model", "tts-1")
19-
self.voice = config.get("voice", "alloy")
19+
if config.get("private_voice"):
20+
self.voice = config.get("private_voice")
21+
else:
22+
self.voice = config.get("voice", "alloy")
2023
self.response_format = "wav"
2124
self.speed = config.get("speed", 1.0)
2225
self.output_file = config.get("output_dir", "tmp/")

main/xiaozhi-server/core/providers/tts/siliconflow.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,10 @@ def __init__(self, config, delete_audio_file):
1010
super().__init__(config, delete_audio_file)
1111
self.model = config.get("model")
1212
self.access_token = config.get("access_token")
13-
self.voice = config.get("voice")
13+
if config.get("private_voice"):
14+
self.voice = config.get("private_voice")
15+
else:
16+
self.voice = config.get("voice")
1417
self.response_format = config.get("response_format")
1518
self.sample_rate = config.get("sample_rate")
1619
self.speed = config.get("speed")
@@ -20,7 +23,10 @@ def __init__(self, config, delete_audio_file):
2023
self.api_url = f"https://{self.host}/v1/audio/speech"
2124

2225
def generate_filename(self, extension=".wav"):
23-
return os.path.join(self.output_file, f"tts-{datetime.now().date()}@{uuid.uuid4().hex}{extension}")
26+
return os.path.join(
27+
self.output_file,
28+
f"tts-{datetime.now().date()}@{uuid.uuid4().hex}{extension}",
29+
)
2430

2531
async def text_to_speak(self, text, output_file):
2632
request_json = {
@@ -31,9 +37,11 @@ async def text_to_speak(self, text, output_file):
3137
}
3238
headers = {
3339
"Authorization": f"Bearer {self.access_token}",
34-
"Content-Type": "application/json"
40+
"Content-Type": "application/json",
3541
}
36-
response = requests.request("POST", self.api_url, json=request_json, headers=headers)
42+
response = requests.request(
43+
"POST", self.api_url, json=request_json, headers=headers
44+
)
3745
data = response.content
3846
file_to_save = open(output_file, "wb")
3947
file_to_save.write(data)

0 commit comments

Comments
 (0)