Separate step configurations from deployment (#3739)

schustmi · web-flow · commit 3360a3cecc9d · 2025-06-20T14:02:22.000+02:00
* Don't convert deployment to model

* Skip recursive dehydration for some attributes

* Skip more dehydration

* Defer loading step configurations

* Add option to include only subset of step configurations

* Split up step configurations

* Fix migration

* Add step configuration unique constraint

* Don't join load in WITH UPDATE statement

* Don't join load in WITH UPDATE statement

* Merge migrations

* Linting
diff --git a/src/zenml/entrypoints/step_entrypoint_configuration.py b/src/zenml/entrypoints/step_entrypoint_configuration.py
@@ -16,9 +16,11 @@
 import os
 import sys
 from typing import TYPE_CHECKING, Any, List, Set
+from uuid import UUID
 
 from zenml.client import Client
 from zenml.entrypoints.base_entrypoint_configuration import (
+    DEPLOYMENT_ID_OPTION,
     BaseEntrypointConfiguration,
 )
 from zenml.integrations.registry import integration_registry
@@ -147,6 +149,18 @@ def get_entrypoint_arguments(
             kwargs[STEP_NAME_OPTION],
         ]
 
+    def load_deployment(self) -> "PipelineDeploymentResponse":
+        """Loads the deployment.
+
+        Returns:
+            The deployment.
+        """
+        deployment_id = UUID(self.entrypoint_args[DEPLOYMENT_ID_OPTION])
+        step_name = self.entrypoint_args[STEP_NAME_OPTION]
+        return Client().zen_store.get_deployment(
+            deployment_id=deployment_id, step_configuration_filter=[step_name]
+        )
+
     def run(self) -> None:
         """Prepares the environment and runs the configured step."""
         deployment = self.load_deployment()
diff --git a/src/zenml/models/v2/core/pipeline_build.py b/src/zenml/models/v2/core/pipeline_build.py
@@ -200,6 +200,10 @@ class PipelineBuildResponseBody(ProjectScopedResponseBody):
 class PipelineBuildResponseMetadata(ProjectScopedResponseMetadata):
     """Response metadata for pipeline builds."""
 
+    __zenml_skip_dehydration__: ClassVar[List[str]] = [
+        "images",
+    ]
+
     pipeline: Optional["PipelineResponse"] = Field(
         default=None, title="The pipeline that was used for this build."
     )
diff --git a/src/zenml/models/v2/core/pipeline_deployment.py b/src/zenml/models/v2/core/pipeline_deployment.py
@@ -13,7 +13,7 @@
 #  permissions and limitations under the License.
 """Models representing pipeline deployments."""
 
-from typing import Any, Dict, Optional, Union
+from typing import Any, ClassVar, Dict, List, Optional, Union
 from uuid import UUID
 
 from pydantic import Field
@@ -130,6 +130,13 @@ class PipelineDeploymentResponseBody(ProjectScopedResponseBody):
 class PipelineDeploymentResponseMetadata(ProjectScopedResponseMetadata):
     """Response metadata for pipeline deployments."""
 
+    __zenml_skip_dehydration__: ClassVar[List[str]] = [
+        "pipeline_configuration",
+        "step_configurations",
+        "client_environment",
+        "pipeline_spec",
+    ]
+
     run_name_template: str = Field(
         title="The run name template for runs created using this deployment.",
     )
diff --git a/src/zenml/models/v2/core/pipeline_run.py b/src/zenml/models/v2/core/pipeline_run.py
@@ -190,6 +190,13 @@ class PipelineRunResponseBody(ProjectScopedResponseBody):
 class PipelineRunResponseMetadata(ProjectScopedResponseMetadata):
     """Response metadata for pipeline runs."""
 
+    __zenml_skip_dehydration__: ClassVar[List[str]] = [
+        "run_metadata",
+        "config",
+        "client_environment",
+        "orchestrator_environment",
+    ]
+
     run_metadata: Dict[str, MetadataType] = Field(
         default={},
         title="Metadata associated with this pipeline run.",
diff --git a/src/zenml/models/v2/core/step_run.py b/src/zenml/models/v2/core/step_run.py
@@ -199,6 +199,12 @@ class StepRunResponseBody(ProjectScopedResponseBody):
 class StepRunResponseMetadata(ProjectScopedResponseMetadata):
     """Response metadata for step runs."""
 
+    __zenml_skip_dehydration__: ClassVar[List[str]] = [
+        "config",
+        "spec",
+        "metadata",
+    ]
+
     # Configuration
     config: "StepConfiguration" = Field(title="The configuration of the step.")
     spec: "StepSpec" = Field(title="The spec of the step.")
diff --git a/src/zenml/zen_server/rbac/utils.py b/src/zenml/zen_server/rbac/utils.py
@@ -120,12 +120,16 @@ def dehydrate_response_model(
         )
 
     dehydrated_values = {}
+    skip_dehydration = getattr(model, "__zenml_skip_dehydration__", [])
     # See `get_subresources_for_model(...)` for a detailed explanation why we
     # need to use `model.__iter__()` here
     for key, value in model.__iter__():
-        dehydrated_values[key] = _dehydrate_value(
-            value, permissions=permissions
-        )
+        if key in skip_dehydration:
+            dehydrated_values[key] = value
+        else:
+            dehydrated_values[key] = _dehydrate_value(
+                value, permissions=permissions
+            )
 
     return type(model).model_validate(dehydrated_values)
 
@@ -579,8 +583,10 @@ def get_subresources_for_model(
         for item in model:
             resources.update(_get_subresources_for_value(item))
     else:
-        for _, value in model.__iter__():
-            resources.update(_get_subresources_for_value(value))
+        skip_dehydration = getattr(model, "__zenml_skip_dehydration__", [])
+        for key, value in model.__iter__():
+            if key not in skip_dehydration:
+                resources.update(_get_subresources_for_value(value))
 
     return resources
 
diff --git a/src/zenml/zen_server/routers/pipeline_deployments_endpoints.py b/src/zenml/zen_server/routers/pipeline_deployments_endpoints.py
@@ -13,10 +13,10 @@
 #  permissions and limitations under the License.
 """Endpoint definitions for deployments."""
 
-from typing import Any, Optional, Union
+from typing import Any, List, Optional, Union
 from uuid import UUID
 
-from fastapi import APIRouter, Depends, Request, Security
+from fastapi import APIRouter, Depends, Query, Request, Security
 
 from zenml.constants import API, PIPELINE_DEPLOYMENTS, VERSION_1
 from zenml.logging.step_logging import fetch_logs
@@ -201,6 +201,7 @@ def get_deployment(
     request: Request,
     deployment_id: UUID,
     hydrate: bool = True,
+    step_configuration_filter: Optional[List[str]] = Query(None),
     _: AuthContext = Security(authorize),
 ) -> Any:
     """Gets a specific deployment using its unique id.
@@ -210,6 +211,9 @@ def get_deployment(
         deployment_id: ID of the deployment to get.
         hydrate: Flag deciding whether to hydrate the output model(s)
             by including metadata fields in the response.
+        step_configuration_filter: List of step configurations to include in
+            the response. If not given, all step configurations will be
+            included.
 
     Returns:
         A specific deployment object.
@@ -218,6 +222,7 @@ def get_deployment(
         id=deployment_id,
         get_method=zen_store().get_deployment,
         hydrate=hydrate,
+        step_configuration_filter=step_configuration_filter,
     )
 
     exclude = None
diff --git a/src/zenml/zen_stores/migrations/versions/3d7e39f3ac92_split_up_step_configurations.py b/src/zenml/zen_stores/migrations/versions/3d7e39f3ac92_split_up_step_configurations.py
@@ -0,0 +1,138 @@
+"""Split up step configurations [3d7e39f3ac92].
+
+Revision ID: 3d7e39f3ac92
+Revises: 0.83.0
+Create Date: 2025-06-17 17:45:31.702617
+
+"""
+
+import json
+import uuid
+
+import sqlalchemy as sa
+import sqlmodel
+from alembic import op
+from sqlalchemy.dialects import mysql
+
+from zenml.utils.time_utils import utc_now
+
+# revision identifiers, used by Alembic.
+revision = "3d7e39f3ac92"
+down_revision = "0.83.0"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    """Upgrade database schema and/or data, creating a new revision."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table(
+        "step_configuration",
+        sa.Column("id", sqlmodel.sql.sqltypes.GUID(), nullable=False),
+        sa.Column("created", sa.DateTime(), nullable=False),
+        sa.Column("updated", sa.DateTime(), nullable=False),
+        sa.Column("index", sa.Integer(), nullable=False),
+        sa.Column("name", sqlmodel.sql.sqltypes.AutoString(), nullable=False),
+        sa.Column(
+            "config",
+            sa.String(length=16777215).with_variant(mysql.MEDIUMTEXT, "mysql"),
+            nullable=False,
+        ),
+        sa.Column(
+            "deployment_id", sqlmodel.sql.sqltypes.GUID(), nullable=False
+        ),
+        sa.ForeignKeyConstraint(
+            ["deployment_id"],
+            ["pipeline_deployment.id"],
+            name="fk_step_configuration_deployment_id_pipeline_deployment",
+            ondelete="CASCADE",
+        ),
+        sa.PrimaryKeyConstraint("id"),
+        sa.UniqueConstraint(
+            "deployment_id", "name", name="unique_step_name_for_deployment"
+        ),
+    )
+    with op.batch_alter_table("pipeline_deployment", schema=None) as batch_op:
+        batch_op.add_column(
+            sa.Column("step_count", sa.Integer(), nullable=True)
+        )
+
+    # Migrate existing step configurations
+    connection = op.get_bind()
+    meta = sa.MetaData()
+    meta.reflect(
+        bind=connection, only=("pipeline_deployment", "step_configuration")
+    )
+    pipeline_deployment_table = sa.Table("pipeline_deployment", meta)
+    step_configuration_table = sa.Table("step_configuration", meta)
+
+    step_configurations_to_insert = []
+    deployment_updates = []
+
+    for deployment_id, step_configurations_json in connection.execute(
+        sa.select(
+            pipeline_deployment_table.c.id,
+            pipeline_deployment_table.c.step_configurations,
+        )
+    ):
+        step_configurations = json.loads(step_configurations_json)
+
+        step_count = len(step_configurations)
+        deployment_updates.append(
+            {
+                "id_": deployment_id,
+                "step_count": step_count,
+            }
+        )
+
+        for index, (step_name, step_config) in enumerate(
+            step_configurations.items()
+        ):
+            now = utc_now()
+            step_configurations_to_insert.append(
+                {
+                    "id": str(uuid.uuid4()).replace("-", ""),
+                    "created": now,
+                    "updated": now,
+                    "index": index,
+                    "name": step_name,
+                    "config": json.dumps(step_config),
+                    "deployment_id": deployment_id,
+                }
+            )
+
+    op.bulk_insert(
+        step_configuration_table, rows=step_configurations_to_insert
+    )
+    if deployment_updates:
+        connection.execute(
+            sa.update(pipeline_deployment_table).where(
+                pipeline_deployment_table.c.id == sa.bindparam("id_")
+            ),
+            deployment_updates,
+        )
+
+    with op.batch_alter_table("pipeline_deployment", schema=None) as batch_op:
+        batch_op.alter_column(
+            "step_count", existing_type=sa.Integer(), nullable=False
+        )
+        batch_op.drop_column("step_configurations")
+
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    """Downgrade database schema and/or data back to the previous revision."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table("pipeline_deployment", schema=None) as batch_op:
+        batch_op.add_column(
+            sa.Column(
+                "step_configurations",
+                sa.VARCHAR(length=16777215),
+                nullable=False,
+            )
+        )
+        batch_op.drop_column("step_count")
+
+    op.drop_table("step_configuration")
+    # ### end Alembic commands ###
diff --git a/src/zenml/zen_stores/rest_zen_store.py b/src/zenml/zen_stores/rest_zen_store.py
@@ -1631,14 +1631,20 @@ def create_deployment(
         )
 
     def get_deployment(
-        self, deployment_id: UUID, hydrate: bool = True
+        self,
+        deployment_id: UUID,
+        hydrate: bool = True,
+        step_configuration_filter: Optional[List[str]] = None,
     ) -> PipelineDeploymentResponse:
         """Get a deployment with a given ID.
 
         Args:
             deployment_id: ID of the deployment.
             hydrate: Flag deciding whether to hydrate the output model(s)
                 by including metadata fields in the response.
+            step_configuration_filter: List of step configurations to include in
+                the response. If not given, all step configurations will be
+                included.
 
         Returns:
             The deployment.
@@ -1647,7 +1653,10 @@ def get_deployment(
             resource_id=deployment_id,
             route=PIPELINE_DEPLOYMENTS,
             response_model=PipelineDeploymentResponse,
-            params={"hydrate": hydrate},
+            params={
+                "hydrate": hydrate,
+                "step_configuration_filter": step_configuration_filter,
+            },
         )
 
     def list_deployments(
diff --git a/src/zenml/zen_stores/schemas/__init__.py b/src/zenml/zen_stores/schemas/__init__.py
@@ -35,6 +35,7 @@
 from zenml.zen_stores.schemas.server_settings_schemas import ServerSettingsSchema
 from zenml.zen_stores.schemas.pipeline_deployment_schemas import (
     PipelineDeploymentSchema,
+    StepConfigurationSchema,
 )
 from zenml.zen_stores.schemas.pipeline_run_schemas import PipelineRunSchema
 from zenml.zen_stores.schemas.pipeline_schemas import PipelineSchema
@@ -91,6 +92,7 @@
     "OAuthDeviceSchema",
     "PipelineBuildSchema",
     "PipelineDeploymentSchema",
+    "StepConfigurationSchema",
     "PipelineRunSchema",
     "PipelineSchema",
     "RunMetadataResourceSchema",
diff --git a/src/zenml/zen_stores/schemas/pipeline_deployment_schemas.py b/src/zenml/zen_stores/schemas/pipeline_deployment_schemas.py
diff --git a/src/zenml/zen_stores/schemas/pipeline_run_schemas.py b/src/zenml/zen_stores/schemas/pipeline_run_schemas.py
diff --git a/src/zenml/zen_stores/schemas/step_run_schemas.py b/src/zenml/zen_stores/schemas/step_run_schemas.py
diff --git a/src/zenml/zen_stores/sql_zen_store.py b/src/zenml/zen_stores/sql_zen_store.py
diff --git a/src/zenml/zen_stores/zen_store_interface.py b/src/zenml/zen_stores/zen_store_interface.py