Skip to content

Commit 6209108

Browse files
committed
strict_bytes
1 parent 275e86a commit 6209108

File tree

5 files changed

+63
-35
lines changed

5 files changed

+63
-35
lines changed

api/configs/feature/__init__.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -602,6 +602,16 @@ class ToolConfig(BaseSettings):
602602
default=3600,
603603
)
604604

605+
TOOL_FILE_CHUNK_SIZE_LIMIT: PositiveInt = Field(
606+
description="Maximum bytes for a single file chunk of tool generated files",
607+
default=8 * 1024,
608+
)
609+
610+
TOOL_FILE_SIZE_LIMIT: PositiveInt = Field(
611+
description="Maximum bytes for a single file of tool generated files",
612+
default=30 * 1024 * 1024,
613+
)
614+
605615

606616
class MailConfig(BaseSettings):
607617
"""

api/core/plugin/impl/tool.py

Lines changed: 47 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,44 @@
33

44
from pydantic import BaseModel
55

6+
from configs import dify_config
67
from core.plugin.entities.plugin import GenericProviderID, ToolProviderID
78
from core.plugin.entities.plugin_daemon import PluginBasicBooleanResponse, PluginToolProviderEntity
89
from core.plugin.impl.base import BasePluginClient
910
from core.tools.entities.tool_entities import ToolInvokeMessage, ToolParameter
1011

1112

13+
class FileChunk:
14+
"""
15+
Only used for internal processing.
16+
"""
17+
18+
__slots__ = ("bytes_written", "total_length", "data")
19+
20+
bytes_written: int
21+
total_length: int
22+
data: bytearray
23+
24+
def __init__(self, total_length: int):
25+
self.bytes_written = 0
26+
self.total_length = total_length
27+
self.data = bytearray(total_length)
28+
29+
def write_blob(self, blob_data):
30+
blob_data_length = len(blob_data)
31+
if blob_data_length == 0:
32+
return
33+
34+
# Validate write boundaries
35+
expected_final_size = self.bytes_written + blob_data_length
36+
if expected_final_size > self.total_length:
37+
raise ValueError(f"Chunk would exceed file size ({expected_final_size} > {self.total_length})")
38+
39+
start_pos = self.bytes_written
40+
self.data[start_pos : start_pos + blob_data_length] = blob_data
41+
self.bytes_written += blob_data_length
42+
43+
1244
class PluginToolManager(BasePluginClient):
1345
def fetch_tool_providers(self, tenant_id: str) -> list[PluginToolProviderEntity]:
1446
"""
@@ -111,20 +143,6 @@ def invoke(
111143
},
112144
)
113145

114-
class FileChunk:
115-
"""
116-
Only used for internal processing.
117-
"""
118-
119-
bytes_written: int
120-
total_length: int
121-
data: bytearray
122-
123-
def __init__(self, total_length: int):
124-
self.bytes_written = 0
125-
self.total_length = total_length
126-
self.data = bytearray(total_length)
127-
128146
files: dict[str, FileChunk] = {}
129147
for resp in response:
130148
if resp.type == ToolInvokeMessage.MessageType.BLOB_CHUNK:
@@ -134,36 +152,33 @@ def __init__(self, total_length: int):
134152
total_length = resp.message.total_length
135153
blob_data = resp.message.blob
136154
is_end = resp.message.end
155+
blob_data_length = len(blob_data)
156+
157+
# Pre-check conditions to avoid unnecessary processing
158+
file_size_limit = dify_config.TOOL_FILE_SIZE_LIMIT
159+
chunk_size_limit = dify_config.TOOL_FILE_CHUNK_SIZE_LIMIT
160+
if total_length > file_size_limit:
161+
raise ValueError(f"File size {total_length} exceeds limit of {file_size_limit} bytes")
162+
163+
if blob_data_length > chunk_size_limit:
164+
raise ValueError(f"Chunk size {blob_data_length} exceeds limit of {chunk_size_limit} bytes")
137165

138166
# Initialize buffer for this file if it doesn't exist
139167
if chunk_id not in files:
140168
files[chunk_id] = FileChunk(total_length)
169+
file_chunk = files[chunk_id]
141170

142171
# If this is the final chunk, yield a complete blob message
143172
if is_end:
144173
yield ToolInvokeMessage(
145174
type=ToolInvokeMessage.MessageType.BLOB,
146-
message=ToolInvokeMessage.BlobMessage(blob=files[chunk_id].data),
175+
message=ToolInvokeMessage.BlobMessage(blob=bytes(file_chunk.data)),
147176
meta=resp.meta,
148177
)
178+
del files[chunk_id]
149179
else:
150-
# Check if file is too large (30MB limit)
151-
if files[chunk_id].bytes_written + len(blob_data) > 30 * 1024 * 1024:
152-
# Delete the file if it's too large
153-
del files[chunk_id]
154-
# Skip yielding this message
155-
raise ValueError("File is too large which reached the limit of 30MB")
156-
157-
# Check if single chunk is too large (8KB limit)
158-
if len(blob_data) > 8192:
159-
# Skip yielding this message
160-
raise ValueError("File chunk is too large which reached the limit of 8KB")
161-
162-
# Append the blob data to the buffer
163-
files[chunk_id].data[
164-
files[chunk_id].bytes_written : files[chunk_id].bytes_written + len(blob_data)
165-
] = blob_data
166-
files[chunk_id].bytes_written += len(blob_data)
180+
# Write the blob data to the file chunk
181+
file_chunk.write_blob(blob_data)
167182
else:
168183
yield resp
169184

api/core/tools/entities/tool_entities.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import base64
22
import enum
33
from collections.abc import Mapping
4-
from enum import Enum
4+
from enum import Enum, StrEnum
55
from typing import Any, Optional, Union
66

77
from pydantic import BaseModel, ConfigDict, Field, ValidationInfo, field_serializer, field_validator, model_validator
@@ -176,7 +176,7 @@ class LogStatus(Enum):
176176
data: Mapping[str, Any] = Field(..., description="Detailed log data")
177177
metadata: Optional[Mapping[str, Any]] = Field(default=None, description="The metadata of the log")
178178

179-
class MessageType(Enum):
179+
class MessageType(StrEnum):
180180
TEXT = "text"
181181
IMAGE = "image"
182182
LINK = "link"

api/mypy.ini

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
warn_return_any = True
33
warn_unused_configs = True
44
check_untyped_defs = True
5+
strict_bytes = True
6+
sqlite_cache = True
7+
cache_fine_grained = True
58
exclude = (?x)(
69
core/model_runtime/model_providers/
710
| tests/

dev/mypy-check

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,4 @@ cd "$SCRIPT_DIR/.."
77

88
# run mypy checks
99
uv run --directory api --dev --with pip \
10-
python -m mypy --install-types --non-interactive --cache-fine-grained --sqlite-cache .
10+
python -m mypy --install-types --non-interactive .

0 commit comments

Comments
 (0)