Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
changeKind: fix
packages:
- "@typespec/http-client-python"
---

Fix dangling `_types.X` references when template-instantiated models (e.g. `ResourceUpdateModel<Foo, FooProperties>`) share a `crossLanguageDefinitionId`. The TypedDict deduplication now pairs each model with its own copy by name, so distinct models such as `CacheUpdate` and `VolumeUpdate` are all rendered in `types.py`.

Also stop emitting unused TypedDicts for response-only models in `types.py`. Output-only models already render as classes in `models/` and are referenced via `_models.X`, so their TypedDict copies (e.g. `GetResponse`) were dead code. The set of TypedDicts (and discriminated-base union aliases) rendered in `types.py` is now the transitive closure of the request-body input models over their base classes, discriminated subtypes and property types. Input body overloads (including spread bodies whose usage lacks the `Input` flag) are still emitted, and any output-only model reachable from an input model — such as a discriminated subtype or an ARM `SystemData` property — is kept so no forward reference is left undefined. This fixes a `NameError` at import time when an output-only union alias (e.g. `Dinosaur = Union[TRex]`) referenced an excluded subtype, and a pyright `reportUndefinedVariable` error when an input model referenced an excluded property type (e.g. `SystemData`).
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@ def __init__(
def is_usage_output(self) -> bool:
return bool(self.usage & UsageFlags.Output.value)

@property
def is_usage_input(self) -> bool:
return bool(self.usage & UsageFlags.Input.value)

@property
def is_used_in_operations_via_types(self) -> bool:
"""Whether this model would be imported from types.py (not models) in operations."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
# --------------------------------------------------------------------------
import keyword
import re
from typing import Optional
from functools import cached_property
from typing import Any, Optional
from ..models import ModelType, CodeModel
from ..models.enum_type import EnumType
from ..models.imports import FileImport, ImportType
Expand Down Expand Up @@ -71,39 +72,131 @@ def declare_literal_enum(self, enum: EnumType) -> str:
values = [enum.get_declaration(v.value) for v in enum.values]
return f"{enum.name} = Literal[{', '.join(values)}]"

@staticmethod
def _renders_as_input_typeddict(m: "ModelType") -> bool:
"""Whether a non-json model is a *seed* for the types.py input surface.

TypedDicts (and discriminated-base union aliases) in ``types.py`` describe request-body
(*input*) shapes. Output-only models already render as classes in ``models/`` and are
referenced via ``_models.*``, so a response-only model that nothing input references would be
dead code. A model seeds the input surface when it is:

* a typeddict copy (``base == "typeddict"``) — these only exist as input body overloads
(their ``usage`` may carry ``Spread``/``Json`` rather than ``Input``), or
* ``is_typed_dict_only`` — includes every model in full ``typeddict`` mode (responses too),
and input-only anonymous bodies, or
* used as input (``is_usage_input``) — e.g. a model shared between request and response.

The full set rendered in types.py is the transitive closure of these seeds over base
classes, discriminated subtypes and property types (see :attr:`_types_file_model_names`), so
an output-only model that *is* referenced by an input model (e.g. ARM ``SystemData`` on
``Resource``) is still rendered.
"""
return m.base == "typeddict" or m.is_typed_dict_only or m.is_usage_input

@staticmethod
def _iter_referenced_models(base_type: Any):
"""Yield ModelType instances directly referenced by a type, recursing through containers.

Handles list/dict ``element_type``, constant/enum ``value_type`` and combined ``types``.
A referenced ModelType is yielded but not descended into — the closure walk descends into a
model's own properties/parents/subtypes when that model is itself visited.
"""
stack = [base_type]
seen: set[int] = set()
while stack:
t = stack.pop()
if t is None or id(t) in seen:
continue
seen.add(id(t))
if isinstance(t, ModelType):
yield t
continue
for attr in ("element_type", "value_type"):
child = getattr(t, attr, None)
if child is not None:
stack.append(child)
for child in getattr(t, "types", []) or []:
stack.append(child)

@cached_property
def _types_file_model_names(self) -> set[str]:
"""Names of every model that must be rendered in types.py.

Starts from the input seeds (:meth:`_renders_as_input_typeddict`) and takes the transitive
closure over base classes, discriminated subtypes and property types. Keyed on model
``name`` (dpg models and their typeddict copies share a name and render as one TypedDict), so
the result is stable regardless of which copy a reference points at.

Cached: the closure is a full walk over the model graph and is read several times per file
(via :attr:`typeddict_models` and :attr:`discriminated_base_models`). ``self._models`` is set
once at construction and never mutated, so memoizing on the instance is safe.
"""
needed: set[str] = set()
stack = [m for m in self._models if m.base != "json" and self._renders_as_input_typeddict(m)]
while stack:
m = stack.pop()
if m.base == "json" or m.name in needed:
continue
needed.add(m.name)
stack.extend(m.parents)
stack.extend(m.discriminated_subtypes.values())
for prop in m.properties:
stack.extend(self._iter_referenced_models(prop.type))
return needed

@property
def typeddict_models(self) -> list[ModelType]:
"""Models that should be rendered as TypedDicts (excluding discriminated bases which become unions).

When both a dpg model and its typeddict copy exist (same crossLanguageDefinitionId),
When both a dpg model and its typeddict copy exist for the same model,

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bc crossLangDefId can apply to multiple models if they have the same base type but diff names? I feel like thats a weird case but it does seem like something that is existing in typespec today

prefer the dpg model (it already renders as a TypedDict in types.py) and skip the copy.

The pairing is keyed on the model ``name`` (the copy is a shallow copy of the source, so it
shares the source's name). ``crossLanguageDefinitionId`` cannot be used here: template
instantiated models such as ``ResourceUpdateModel<Foo, FooProperties>`` all share the
template's cross-language id, so keying on it would wrongly collapse distinct models
(e.g. ``CacheUpdate`` and ``VolumeUpdate``) into one and drop the rest from types.py.

Only models in the input-surface closure (:attr:`_types_file_model_names`) are rendered, so
response-only models (e.g. ``GetResponse``) are dropped while models reachable from an input
model — including output-only ones such as a discriminated subtype or an ARM ``SystemData``
property — are kept, ensuring no forward reference is left undefined.
"""
candidates = [m for m in self._models if m.base != "json" and not m.discriminated_subtypes]
seen_ids: dict[str, "ModelType"] = {}
needed = self._types_file_model_names
candidates = [
m for m in self._models if m.base != "json" and not m.discriminated_subtypes and m.name in needed
]
seen_names: dict[str, "ModelType"] = {}
result: list["ModelType"] = []
for m in candidates:
clid = m.yaml_data.get("crossLanguageDefinitionId")
if clid and clid in seen_ids:
name = m.name
if name in seen_names:
# Prefer the dpg model over the typeddict copy
if m.base == "dpg" and seen_ids[clid].base == "typeddict":
if m.base == "dpg" and seen_names[name].base == "typeddict":
# Replace the typeddict copy with the dpg model
result = [r if r is not seen_ids[clid] else m for r in result]
seen_ids[clid] = m
result = [r if r is not seen_names[name] else m for r in result]
seen_names[name] = m
# Otherwise skip this duplicate
continue
if clid:
seen_ids[clid] = m
seen_names[name] = m
result.append(m)
return result

@property
def discriminated_base_models(self) -> list[ModelType]:
"""Discriminated base models that become Union type aliases in types.py.

Only bases in the input-surface closure (:attr:`_types_file_model_names`) are emitted: an
output-only ``Dinosaur = Union[TRex]`` alias is dead code and would reference subtype
TypedDicts that are themselves (correctly) omitted from types.py, causing a ``NameError`` at
import time.

Topologically sorted so that nested discriminated bases (e.g. Shark)
are defined before their parents (e.g. Fish = Union[Salmon, Shark]).
"""
bases = [m for m in self._models if m.base != "json" and m.discriminated_subtypes]
needed = self._types_file_model_names
bases = [m for m in self._models if m.base != "json" and m.discriminated_subtypes and m.name in needed]
base_names = {m.name for m in bases}
sorted_bases: list[ModelType] = []
visited: set[str] = set()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -307,8 +307,18 @@ def is_tsp(self) -> bool:
return self.options.get("tsp_file", False)

@staticmethod
def _find_existing_typeddict(code_model: dict[str, Any], cross_lang_id: Optional[str]) -> Optional[dict[str, Any]]:
"""Find an existing typeddict copy with the given crossLanguageDefinitionId."""
def _find_existing_typeddict(
code_model: dict[str, Any],
cross_lang_id: Optional[str],
name: Optional[str] = None,
) -> Optional[dict[str, Any]]:
"""Find an existing typeddict copy for the given model.

Matches on both ``crossLanguageDefinitionId`` and ``name``. The name is required because
template-instantiated models (e.g. ``ResourceUpdateModel<Foo, FooProperties>``) all share
the template's cross-language id, so matching on the id alone would reuse one model's copy
(e.g. ``CacheUpdate``) for a different model (e.g. ``VolumeUpdate``).
"""
if not cross_lang_id:
return None
return next(
Expand All @@ -318,10 +328,50 @@ def _find_existing_typeddict(code_model: dict[str, Any], cross_lang_id: Optional
if t.get("type") == "model"
and t.get("base") == "typeddict"
and t.get("crossLanguageDefinitionId") == cross_lang_id
and (name is None or t.get("name") == name)
),
None,
)

@staticmethod
def _find_spread_original(code_model: dict[str, Any], json_model: dict[str, Any]) -> Optional[dict[str, Any]]:
"""Recover the dpg model that a spread (json) body was cloned from.

When a spread body type is also used elsewhere, the emitter clones it, renames the clone to
``<Method>Request`` and sets ``base = "json"`` while keeping the original
``crossLanguageDefinitionId``. To reference the real model's TypedDict we look the clone up
by that id.

Distinct template-instantiated models share a single ``crossLanguageDefinitionId``, so an id
match alone is ambiguous. We only reuse an original when the choice is unambiguous:

* a dpg candidate whose ``name`` equals the json model's name (the body was not renamed), or
* exactly one dpg candidate carries the id.

Otherwise we return ``None`` so the caller falls back to the json model itself, avoiding a
reference to the wrong model's TypedDict.
"""
cross_lang_id = json_model.get("crossLanguageDefinitionId")
if not cross_lang_id:
return None
candidates = [
t
for t in code_model["types"]
if t.get("type") == "model"
and t.get("base") == "dpg"
and t.get("crossLanguageDefinitionId") == cross_lang_id
and t is not json_model
]
if not candidates:
return None
name = json_model.get("name")
for candidate in candidates:
if candidate.get("name") == name:
return candidate
if len(candidates) == 1:
return candidates[0]
return None

@staticmethod
def _insert_typeddict_overload(
code_model: dict[str, Any],
Expand Down Expand Up @@ -376,25 +426,14 @@ def add_body_param_type(
# Add typeddict overload for non-spread dpg models
if self.options["models-mode"] == "dpg" and is_dpg_model:
cross_lang_id = model_type.get("crossLanguageDefinitionId")
existing_td = self._find_existing_typeddict(code_model, cross_lang_id)
existing_td = self._find_existing_typeddict(code_model, cross_lang_id, model_type.get("name"))
self._insert_typeddict_overload(code_model, body_parameter, model_type, origin_type, existing_td)

# For spread bodies (json base), add a typeddict overload that references
# the original model. This replaces the JSON single-body overload.
if is_json_model:
cross_lang_id = model_type.get("crossLanguageDefinitionId")
original = None
if cross_lang_id:
original = next(
(
t
for t in code_model["types"]
if t.get("type") == "model"
and t.get("crossLanguageDefinitionId") == cross_lang_id
and t is not model_type
),
None,
)
original = self._find_spread_original(code_model, model_type)

if is_typeddict_only and original:
# In typeddict-only mode, the original dpg model already renders
Expand All @@ -407,7 +446,7 @@ def add_body_param_type(
body_parameter["type"]["types"].insert(1, td_list_or_dict)
else:
source = original or model_type
existing_td = self._find_existing_typeddict(code_model, cross_lang_id)
existing_td = self._find_existing_typeddict(code_model, cross_lang_id, source.get("name"))
self._insert_typeddict_overload(code_model, body_parameter, source, origin_type, existing_td)

if len(body_parameter["type"]["types"]) == 1:
Expand All @@ -420,7 +459,6 @@ def add_body_param_type(

code_model["types"].append(body_parameter["type"])


def pad_reserved_words(self, name: str, pad_type: PadType, yaml_type: dict[str, Any]) -> str:
# we want to pad hidden variables as well
if not name:
Expand Down
Loading
Loading