update:修复iotbug

openrz · openrz · commit 17fb60b7aec4 · 2025-05-23T16:03:15.000+08:00
diff --git a/main/xiaozhi-server/core/providers/asr/base.py b/main/xiaozhi-server/core/providers/asr/base.py
@@ -30,15 +30,25 @@ def set_audio_format(self, format: str) -> None:
     @staticmethod
     def decode_opus(opus_data: List[bytes]) -> bytes:
         """将Opus音频数据解码为PCM数据"""
-
-        decoder = opuslib_next.Decoder(16000, 1)  # 16kHz, 单声道
-        pcm_data = []
-
-        for opus_packet in opus_data:
-            try:
-                pcm_frame = decoder.decode(opus_packet, 960)  # 960 samples = 60ms
-                pcm_data.append(pcm_frame)
-            except opuslib_next.OpusError as e:
-                logger.bind(tag=TAG).error(f"Opus解码错误: {e}", exc_info=True)
-
-        return pcm_data
+        try:
+            decoder = opuslib_next.Decoder(16000, 1)  # 16kHz, 单声道
+            pcm_data = []
+            buffer_size = 960  # 每次处理960个采样点
+
+            for opus_packet in opus_data:
+                try:
+                    # 使用较小的缓冲区大小进行处理
+                    pcm_frame = decoder.decode(opus_packet, buffer_size)
+                    if pcm_frame:
+                        pcm_data.append(pcm_frame)
+                except opuslib_next.OpusError as e:
+                    logger.bind(tag=TAG).warning(f"Opus解码错误，跳过当前数据包: {e}")
+                    continue
+                except Exception as e:
+                    logger.bind(tag=TAG).error(f"音频处理错误: {e}", exc_info=True)
+                    continue
+
+            return pcm_data
+        except Exception as e:
+            logger.bind(tag=TAG).error(f"音频解码过程发生错误: {e}", exc_info=True)
+            return []
diff --git a/main/xiaozhi-server/core/providers/asr/fun_local.py b/main/xiaozhi-server/core/providers/asr/fun_local.py
@@ -9,10 +9,14 @@
 from core.providers.asr.base import ASRProviderBase
 from funasr import AutoModel
 from funasr.utils.postprocess_utils import rich_transcription_postprocess
+import shutil
 
 TAG = __name__
 logger = setup_logging()
 
+MAX_RETRIES = 2
+RETRY_DELAY = 1  # 重试延迟（秒）
+
 
 # 捕获标准输出
 class CaptureOutput:
@@ -68,46 +72,69 @@ async def speech_to_text(
     ) -> Tuple[Optional[str], Optional[str]]:
         """语音转文本主处理逻辑"""
         file_path = None
-        try:
-            # 合并所有opus数据包
-            if self.audio_format == "pcm":
-                pcm_data = opus_data
-            else:
-                pcm_data = self.decode_opus(opus_data)
-
-            combined_pcm_data = b"".join(pcm_data)
-
-            # 判断是否保存为WAV文件
-            if self.delete_audio_file:
-                pass
-            else:
-                file_path = self.save_audio_to_file(pcm_data, session_id)
-
-            # 语音识别
-            start_time = time.time()
-            result = self.model.generate(
-                input=combined_pcm_data,
-                cache={},
-                language="auto",
-                use_itn=True,
-                batch_size_s=60,
-            )
-            text = rich_transcription_postprocess(result[0]["text"])
-            logger.bind(tag=TAG).debug(
-                f"语音识别耗时: {time.time() - start_time:.3f}s | 结果: {text}"
-            )
-
-            return text, file_path
-
-        except Exception as e:
-            logger.bind(tag=TAG).error(f"语音识别失败: {e}", exc_info=True)
-            return "", file_path
-
-        # finally:
-        #     # 文件清理逻辑
-        #     if self.delete_audio_file and file_path and os.path.exists(file_path):
-        #         try:
-        #             os.remove(file_path)
-        #             logger.bind(tag=TAG).debug(f"已删除临时音频文件: {file_path}")
-        #         except Exception as e:
-        #             logger.bind(tag=TAG).error(f"文件删除失败: {file_path} | 错误: {e}")
+        retry_count = 0
+
+        while retry_count < MAX_RETRIES:
+            try:
+                # 合并所有opus数据包
+                if self.audio_format == "pcm":
+                    pcm_data = opus_data
+                else:
+                    pcm_data = self.decode_opus(opus_data)
+
+                combined_pcm_data = b"".join(pcm_data)
+
+                # 检查磁盘空间
+                if not self.delete_audio_file:
+                    free_space = shutil.disk_usage(self.output_dir).free
+                    if free_space < len(combined_pcm_data) * 2:  # 预留2倍空间
+                        raise OSError("磁盘空间不足")
+
+                # 判断是否保存为WAV文件
+                if self.delete_audio_file:
+                    pass
+                else:
+                    file_path = self.save_audio_to_file(pcm_data, session_id)
+
+                # 语音识别
+                start_time = time.time()
+                result = self.model.generate(
+                    input=combined_pcm_data,
+                    cache={},
+                    language="auto",
+                    use_itn=True,
+                    batch_size_s=60,
+                )
+                text = rich_transcription_postprocess(result[0]["text"])
+                logger.bind(tag=TAG).debug(
+                    f"语音识别耗时: {time.time() - start_time:.3f}s | 结果: {text}"
+                )
+
+                return text, file_path
+
+            except OSError as e:
+                retry_count += 1
+                if retry_count >= MAX_RETRIES:
+                    logger.bind(tag=TAG).error(
+                        f"语音识别失败（已重试{retry_count}次）: {e}", exc_info=True
+                    )
+                    return "", file_path
+                logger.bind(tag=TAG).warning(
+                    f"语音识别失败，正在重试（{retry_count}/{MAX_RETRIES}）: {e}"
+                )
+                time.sleep(RETRY_DELAY)
+
+            except Exception as e:
+                logger.bind(tag=TAG).error(f"语音识别失败: {e}", exc_info=True)
+                return "", file_path
+
+            finally:
+                # 文件清理逻辑
+                if self.delete_audio_file and file_path and os.path.exists(file_path):
+                    try:
+                        os.remove(file_path)
+                        logger.bind(tag=TAG).debug(f"已删除临时音频文件: {file_path}")
+                    except Exception as e:
+                        logger.bind(tag=TAG).error(
+                            f"文件删除失败: {file_path} | 错误: {e}"
+                        )
diff --git a/main/xiaozhi-server/core/providers/intent/intent_llm/intent_llm.py b/main/xiaozhi-server/core/providers/intent/intent_llm/intent_llm.py
@@ -72,6 +72,10 @@ def get_intent_system_prompt(self, functions_list: str) -> str:
             '返回: {"function_call": {"name": "get_time"}}\n'
             "```\n"
             "```\n"
+            "用户: 当前电池电量是多少？\n"
+            '返回: {"function_call": {"name": "get_battery_level", "arguments": {"response_success": "当前电池电量为{value}%", "response_failure": "无法获取Battery的当前电量百分比"}}}\n'
+            "```\n"
+            "```\n"
             "用户: 我想结束对话\n"
             '返回: {"function_call": {"name": "handle_exit_intent", "arguments": {"say_goodbye": "goodbye"}}}\n'
             "```\n"
@@ -224,7 +228,8 @@ async def detect_intent(self, conn, dialogue_history: List[Dict], text: str) ->
                 if function_name == "continue_chat":
                     # 保留非工具相关的消息
                     clean_history = [
-                        msg for msg in conn.dialogue.dialogue
+                        msg
+                        for msg in conn.dialogue.dialogue
                         if msg.role not in ["tool", "function"]
                     ]
                     conn.dialogue.dialogue = clean_history
diff --git a/main/xiaozhi-server/plugins_func/functions/handle_speaker_or_screen.py b/main/xiaozhi-server/plugins_func/functions/handle_speaker_or_screen.py
@@ -15,15 +15,26 @@ async def _get_device_status(conn, device_name, device_type, property_name):
     return status
 
 
-async def _set_device_property(conn, device_name, device_type, method_name, property_name, new_value=None, action=None, step=10):
+async def _set_device_property(
+    conn,
+    device_name,
+    device_type,
+    method_name,
+    property_name,
+    new_value=None,
+    action=None,
+    step=10,
+):
     """设置设备属性"""
-    current_value = await _get_device_status(conn, device_name, device_type, property_name)
+    current_value = await _get_device_status(
+        conn, device_name, device_type, property_name
+    )
 
-    if action == 'raise':
+    if action == "raise":
         current_value += step
-    elif action == 'lower':
+    elif action == "lower":
         current_value -= step
-    elif action == 'set':
+    elif action == "set":
         if new_value is None:
             raise Exception(f"缺少{property_name}参数")
         current_value = new_value
@@ -37,8 +48,7 @@ async def _set_device_property(conn, device_name, device_type, method_name, prop
 
 def _handle_device_action(conn, func, success_message, error_message, *args, **kwargs):
     """处理设备操作的通用函数"""
-    future = asyncio.run_coroutine_threadsafe(
-        func(conn, *args, **kwargs), conn.loop)
+    future = asyncio.run_coroutine_threadsafe(func(conn, *args, **kwargs), conn.loop)
     try:
         result = future.result()
         logger.bind(tag=TAG).info(f"{success_message}: {result}")
@@ -75,26 +85,41 @@ def _handle_device_action(conn, func, success_message, error_message, *args, **k
                 "device_type": {
                     "type": "string",
                     "description": "设备类型，**严格限定为Speaker（音量）或Screen（亮度）**，其他设备类型禁止调用此函数",
-                    "enum": ["Speaker", "Screen"]
-
+                    "enum": ["Speaker", "Screen"],
                 },
                 "action": {
                     "type": "string",
-                    "description": "动作名称，可选值：get(获取),set(设置),raise(提高),lower(降低)"
+                    "description": "动作名称，可选值：get(获取),set(设置),raise(提高),lower(降低)",
                 },
                 "value": {
                     "type": "integer",
-                    "description": "值大小，可选值：0-100之间的整数"
-                }
+                    "description": "值大小，可选值：0-100之间的整数",
+                },
             },
-            "required": ["device_type", "action"]
-        }
-    }
+            "required": ["device_type", "action"],
+        },
+    },
 }
 
 
-@register_function('handle_speaker_volume_or_screen_brightness', handle_device_function_desc, ToolType.IOT_CTL)
-def handle_speaker_volume_or_screen_brightness(conn, device_type: str, action: str, value: int = None):
+@register_function(
+    "handle_speaker_volume_or_screen_brightness",
+    handle_device_function_desc,
+    ToolType.IOT_CTL,
+)
+def handle_speaker_volume_or_screen_brightness(
+    conn, device_type: str, action: str, value: int = None
+):
+    # 检查value是否为中文值
+    if (
+        value is not None
+        and isinstance(value, str)
+        and any("\u4e00" <= char <= "\u9fff" for char in str(value))
+    ):
+        raise Exception(
+            f"请直接告诉我要将{'音量' if device_type=='Speaker' else '亮度'}调整成多少"
+        )
+
     if device_type == "Speaker":
         method_name, property_name, device_name = "SetVolume", "volume", "音量"
     elif device_type == "Screen":
@@ -108,13 +133,25 @@ def handle_speaker_volume_or_screen_brightness(conn, device_type: str, action: s
     if action == "get":
         # get
         return _handle_device_action(
-            conn, _get_device_status, f"当前{device_name}", f"获取{device_name}失败",
-            device_name=device_name, device_type=device_type, property_name=property_name,
+            conn,
+            _get_device_status,
+            f"当前{device_name}",
+            f"获取{device_name}失败",
+            device_name=device_name,
+            device_type=device_type,
+            property_name=property_name,
         )
     else:
         # set, raise, lower
         return _handle_device_action(
-            conn, _set_device_property, f"{device_name}已调整到", f"{device_name}调整失败",
-            device_name=device_name, device_type=device_type, method_name=method_name,
-            property_name=property_name, new_value=value, action=action
+            conn,
+            _set_device_property,
+            f"{device_name}已调整到",
+            f"{device_name}调整失败",
+            device_name=device_name,
+            device_type=device_type,
+            method_name=method_name,
+            property_name=property_name,
+            new_value=value,
+            action=action,
         )