Skip to content

Commit 17fb60b

Browse files
committed
update:修复iotbug
1 parent 0a9e5d2 commit 17fb60b

File tree

4 files changed

+157
-78
lines changed

4 files changed

+157
-78
lines changed

main/xiaozhi-server/core/providers/asr/base.py

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -30,15 +30,25 @@ def set_audio_format(self, format: str) -> None:
3030
@staticmethod
3131
def decode_opus(opus_data: List[bytes]) -> bytes:
3232
"""将Opus音频数据解码为PCM数据"""
33-
34-
decoder = opuslib_next.Decoder(16000, 1) # 16kHz, 单声道
35-
pcm_data = []
36-
37-
for opus_packet in opus_data:
38-
try:
39-
pcm_frame = decoder.decode(opus_packet, 960) # 960 samples = 60ms
40-
pcm_data.append(pcm_frame)
41-
except opuslib_next.OpusError as e:
42-
logger.bind(tag=TAG).error(f"Opus解码错误: {e}", exc_info=True)
43-
44-
return pcm_data
33+
try:
34+
decoder = opuslib_next.Decoder(16000, 1) # 16kHz, 单声道
35+
pcm_data = []
36+
buffer_size = 960 # 每次处理960个采样点
37+
38+
for opus_packet in opus_data:
39+
try:
40+
# 使用较小的缓冲区大小进行处理
41+
pcm_frame = decoder.decode(opus_packet, buffer_size)
42+
if pcm_frame:
43+
pcm_data.append(pcm_frame)
44+
except opuslib_next.OpusError as e:
45+
logger.bind(tag=TAG).warning(f"Opus解码错误,跳过当前数据包: {e}")
46+
continue
47+
except Exception as e:
48+
logger.bind(tag=TAG).error(f"音频处理错误: {e}", exc_info=True)
49+
continue
50+
51+
return pcm_data
52+
except Exception as e:
53+
logger.bind(tag=TAG).error(f"音频解码过程发生错误: {e}", exc_info=True)
54+
return []

main/xiaozhi-server/core/providers/asr/fun_local.py

Lines changed: 70 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,14 @@
99
from core.providers.asr.base import ASRProviderBase
1010
from funasr import AutoModel
1111
from funasr.utils.postprocess_utils import rich_transcription_postprocess
12+
import shutil
1213

1314
TAG = __name__
1415
logger = setup_logging()
1516

17+
MAX_RETRIES = 2
18+
RETRY_DELAY = 1 # 重试延迟(秒)
19+
1620

1721
# 捕获标准输出
1822
class CaptureOutput:
@@ -68,46 +72,69 @@ async def speech_to_text(
6872
) -> Tuple[Optional[str], Optional[str]]:
6973
"""语音转文本主处理逻辑"""
7074
file_path = None
71-
try:
72-
# 合并所有opus数据包
73-
if self.audio_format == "pcm":
74-
pcm_data = opus_data
75-
else:
76-
pcm_data = self.decode_opus(opus_data)
77-
78-
combined_pcm_data = b"".join(pcm_data)
79-
80-
# 判断是否保存为WAV文件
81-
if self.delete_audio_file:
82-
pass
83-
else:
84-
file_path = self.save_audio_to_file(pcm_data, session_id)
85-
86-
# 语音识别
87-
start_time = time.time()
88-
result = self.model.generate(
89-
input=combined_pcm_data,
90-
cache={},
91-
language="auto",
92-
use_itn=True,
93-
batch_size_s=60,
94-
)
95-
text = rich_transcription_postprocess(result[0]["text"])
96-
logger.bind(tag=TAG).debug(
97-
f"语音识别耗时: {time.time() - start_time:.3f}s | 结果: {text}"
98-
)
99-
100-
return text, file_path
101-
102-
except Exception as e:
103-
logger.bind(tag=TAG).error(f"语音识别失败: {e}", exc_info=True)
104-
return "", file_path
105-
106-
# finally:
107-
# # 文件清理逻辑
108-
# if self.delete_audio_file and file_path and os.path.exists(file_path):
109-
# try:
110-
# os.remove(file_path)
111-
# logger.bind(tag=TAG).debug(f"已删除临时音频文件: {file_path}")
112-
# except Exception as e:
113-
# logger.bind(tag=TAG).error(f"文件删除失败: {file_path} | 错误: {e}")
75+
retry_count = 0
76+
77+
while retry_count < MAX_RETRIES:
78+
try:
79+
# 合并所有opus数据包
80+
if self.audio_format == "pcm":
81+
pcm_data = opus_data
82+
else:
83+
pcm_data = self.decode_opus(opus_data)
84+
85+
combined_pcm_data = b"".join(pcm_data)
86+
87+
# 检查磁盘空间
88+
if not self.delete_audio_file:
89+
free_space = shutil.disk_usage(self.output_dir).free
90+
if free_space < len(combined_pcm_data) * 2: # 预留2倍空间
91+
raise OSError("磁盘空间不足")
92+
93+
# 判断是否保存为WAV文件
94+
if self.delete_audio_file:
95+
pass
96+
else:
97+
file_path = self.save_audio_to_file(pcm_data, session_id)
98+
99+
# 语音识别
100+
start_time = time.time()
101+
result = self.model.generate(
102+
input=combined_pcm_data,
103+
cache={},
104+
language="auto",
105+
use_itn=True,
106+
batch_size_s=60,
107+
)
108+
text = rich_transcription_postprocess(result[0]["text"])
109+
logger.bind(tag=TAG).debug(
110+
f"语音识别耗时: {time.time() - start_time:.3f}s | 结果: {text}"
111+
)
112+
113+
return text, file_path
114+
115+
except OSError as e:
116+
retry_count += 1
117+
if retry_count >= MAX_RETRIES:
118+
logger.bind(tag=TAG).error(
119+
f"语音识别失败(已重试{retry_count}次): {e}", exc_info=True
120+
)
121+
return "", file_path
122+
logger.bind(tag=TAG).warning(
123+
f"语音识别失败,正在重试({retry_count}/{MAX_RETRIES}): {e}"
124+
)
125+
time.sleep(RETRY_DELAY)
126+
127+
except Exception as e:
128+
logger.bind(tag=TAG).error(f"语音识别失败: {e}", exc_info=True)
129+
return "", file_path
130+
131+
finally:
132+
# 文件清理逻辑
133+
if self.delete_audio_file and file_path and os.path.exists(file_path):
134+
try:
135+
os.remove(file_path)
136+
logger.bind(tag=TAG).debug(f"已删除临时音频文件: {file_path}")
137+
except Exception as e:
138+
logger.bind(tag=TAG).error(
139+
f"文件删除失败: {file_path} | 错误: {e}"
140+
)

main/xiaozhi-server/core/providers/intent/intent_llm/intent_llm.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,10 @@ def get_intent_system_prompt(self, functions_list: str) -> str:
7272
'返回: {"function_call": {"name": "get_time"}}\n'
7373
"```\n"
7474
"```\n"
75+
"用户: 当前电池电量是多少?\n"
76+
'返回: {"function_call": {"name": "get_battery_level", "arguments": {"response_success": "当前电池电量为{value}%", "response_failure": "无法获取Battery的当前电量百分比"}}}\n'
77+
"```\n"
78+
"```\n"
7579
"用户: 我想结束对话\n"
7680
'返回: {"function_call": {"name": "handle_exit_intent", "arguments": {"say_goodbye": "goodbye"}}}\n'
7781
"```\n"
@@ -224,7 +228,8 @@ async def detect_intent(self, conn, dialogue_history: List[Dict], text: str) ->
224228
if function_name == "continue_chat":
225229
# 保留非工具相关的消息
226230
clean_history = [
227-
msg for msg in conn.dialogue.dialogue
231+
msg
232+
for msg in conn.dialogue.dialogue
228233
if msg.role not in ["tool", "function"]
229234
]
230235
conn.dialogue.dialogue = clean_history

main/xiaozhi-server/plugins_func/functions/handle_speaker_or_screen.py

Lines changed: 59 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,26 @@ async def _get_device_status(conn, device_name, device_type, property_name):
1515
return status
1616

1717

18-
async def _set_device_property(conn, device_name, device_type, method_name, property_name, new_value=None, action=None, step=10):
18+
async def _set_device_property(
19+
conn,
20+
device_name,
21+
device_type,
22+
method_name,
23+
property_name,
24+
new_value=None,
25+
action=None,
26+
step=10,
27+
):
1928
"""设置设备属性"""
20-
current_value = await _get_device_status(conn, device_name, device_type, property_name)
29+
current_value = await _get_device_status(
30+
conn, device_name, device_type, property_name
31+
)
2132

22-
if action == 'raise':
33+
if action == "raise":
2334
current_value += step
24-
elif action == 'lower':
35+
elif action == "lower":
2536
current_value -= step
26-
elif action == 'set':
37+
elif action == "set":
2738
if new_value is None:
2839
raise Exception(f"缺少{property_name}参数")
2940
current_value = new_value
@@ -37,8 +48,7 @@ async def _set_device_property(conn, device_name, device_type, method_name, prop
3748

3849
def _handle_device_action(conn, func, success_message, error_message, *args, **kwargs):
3950
"""处理设备操作的通用函数"""
40-
future = asyncio.run_coroutine_threadsafe(
41-
func(conn, *args, **kwargs), conn.loop)
51+
future = asyncio.run_coroutine_threadsafe(func(conn, *args, **kwargs), conn.loop)
4252
try:
4353
result = future.result()
4454
logger.bind(tag=TAG).info(f"{success_message}: {result}")
@@ -75,26 +85,41 @@ def _handle_device_action(conn, func, success_message, error_message, *args, **k
7585
"device_type": {
7686
"type": "string",
7787
"description": "设备类型,**严格限定为Speaker(音量)或Screen(亮度)**,其他设备类型禁止调用此函数",
78-
"enum": ["Speaker", "Screen"]
79-
88+
"enum": ["Speaker", "Screen"],
8089
},
8190
"action": {
8291
"type": "string",
83-
"description": "动作名称,可选值:get(获取),set(设置),raise(提高),lower(降低)"
92+
"description": "动作名称,可选值:get(获取),set(设置),raise(提高),lower(降低)",
8493
},
8594
"value": {
8695
"type": "integer",
87-
"description": "值大小,可选值:0-100之间的整数"
88-
}
96+
"description": "值大小,可选值:0-100之间的整数",
97+
},
8998
},
90-
"required": ["device_type", "action"]
91-
}
92-
}
99+
"required": ["device_type", "action"],
100+
},
101+
},
93102
}
94103

95104

96-
@register_function('handle_speaker_volume_or_screen_brightness', handle_device_function_desc, ToolType.IOT_CTL)
97-
def handle_speaker_volume_or_screen_brightness(conn, device_type: str, action: str, value: int = None):
105+
@register_function(
106+
"handle_speaker_volume_or_screen_brightness",
107+
handle_device_function_desc,
108+
ToolType.IOT_CTL,
109+
)
110+
def handle_speaker_volume_or_screen_brightness(
111+
conn, device_type: str, action: str, value: int = None
112+
):
113+
# 检查value是否为中文值
114+
if (
115+
value is not None
116+
and isinstance(value, str)
117+
and any("\u4e00" <= char <= "\u9fff" for char in str(value))
118+
):
119+
raise Exception(
120+
f"请直接告诉我要将{'音量' if device_type=='Speaker' else '亮度'}调整成多少"
121+
)
122+
98123
if device_type == "Speaker":
99124
method_name, property_name, device_name = "SetVolume", "volume", "音量"
100125
elif device_type == "Screen":
@@ -108,13 +133,25 @@ def handle_speaker_volume_or_screen_brightness(conn, device_type: str, action: s
108133
if action == "get":
109134
# get
110135
return _handle_device_action(
111-
conn, _get_device_status, f"当前{device_name}", f"获取{device_name}失败",
112-
device_name=device_name, device_type=device_type, property_name=property_name,
136+
conn,
137+
_get_device_status,
138+
f"当前{device_name}",
139+
f"获取{device_name}失败",
140+
device_name=device_name,
141+
device_type=device_type,
142+
property_name=property_name,
113143
)
114144
else:
115145
# set, raise, lower
116146
return _handle_device_action(
117-
conn, _set_device_property, f"{device_name}已调整到", f"{device_name}调整失败",
118-
device_name=device_name, device_type=device_type, method_name=method_name,
119-
property_name=property_name, new_value=value, action=action
147+
conn,
148+
_set_device_property,
149+
f"{device_name}已调整到",
150+
f"{device_name}调整失败",
151+
device_name=device_name,
152+
device_type=device_type,
153+
method_name=method_name,
154+
property_name=property_name,
155+
new_value=value,
156+
action=action,
120157
)

0 commit comments

Comments
 (0)