Skip to content

[Doc] Convert Sphinx directives ( {class}, {meth}, {attr}, ...) to MkDocs format for better documentation linking #18663

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
May 27, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions vllm/inputs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@

INPUT_REGISTRY = InputRegistry()
"""
The global {class}`~InputRegistry` which is used by {class}`~vllm.LLMEngine`
to dispatch data processing according to the target model.
The global [`InputRegistry`][vllm.inputs.registry.InputRegistry] which is used
by [`LLMEngine`][vllm.LLMEngine] to dispatch data processing according to the
target model.
"""

__all__ = [
Expand Down
68 changes: 39 additions & 29 deletions vllm/inputs/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,22 +80,24 @@ class EmbedsPrompt(TypedDict):
"""
Set of possible schemas for a single prompt:

- A text prompt ({class}`str` or {class}`TextPrompt`)
- A tokenized prompt ({class}`TokensPrompt`)
- An embeddings prompt ({class}`EmbedsPrompt`)
- A text prompt ([`str`][] or [`TextPrompt`][vllm.inputs.data.TextPrompt])
- A tokenized prompt ([`TokensPrompt`][vllm.inputs.data.TokensPrompt])
- An embeddings prompt ([`EmbedsPrompt`][vllm.inputs.data.EmbedsPrompt])

Note that "singleton" is as opposed to a data structure
which encapsulates multiple prompts, i.e. of the sort
which may be utilized for encoder/decoder models when
the user desires to express both the encoder & decoder
prompts explicitly, i.e. {class}`ExplicitEncoderDecoderPrompt`
prompts explicitly, i.e.
[`ExplicitEncoderDecoderPrompt`][vllm.inputs.data.ExplicitEncoderDecoderPrompt]

A prompt of type {class}`SingletonPrompt` may be employed
as (1) input to a decoder-only model, (2) input to
A prompt of type [`SingletonPrompt`][vllm.inputs.data.SingletonPrompt] may be
employed as (1) input to a decoder-only model, (2) input to
the encoder of an encoder/decoder model, in the scenario
where the decoder-prompt is not specified explicitly, or
(3) as a member of a larger data structure encapsulating
more than one prompt, i.e. {class}`ExplicitEncoderDecoderPrompt`
more than one prompt, i.e.
[`ExplicitEncoderDecoderPrompt`][vllm.inputs.data.ExplicitEncoderDecoderPrompt]
"""


Expand Down Expand Up @@ -126,18 +128,20 @@ class ExplicitEncoderDecoderPrompt(TypedDict, Generic[_T1_co, _T2_co]):
comprising an explicit encoder prompt and a decoder prompt.

The encoder and decoder prompts, respectively, may be formatted
according to any of the {class}`SingletonPrompt` schemas,
according to any of the
[`SingletonPrompt`][vllm.inputs.data.SingletonPrompt] schemas,
and are not required to have the same schema.

Only the encoder prompt may have multi-modal data. mm_processor_kwargs
should be at the top-level, and should not be set in the encoder/decoder
prompts, since they are agnostic to the encoder/decoder.

Note that an {class}`ExplicitEncoderDecoderPrompt` may not
be used as an input to a decoder-only model,
Note that an
[`ExplicitEncoderDecoderPrompt`][vllm.inputs.data.ExplicitEncoderDecoderPrompt]
may not be used as an input to a decoder-only model,
and that the `encoder_prompt` and `decoder_prompt`
fields of this data structure themselves must be
{class}`SingletonPrompt` instances.
[`SingletonPrompt`][vllm.inputs.data.SingletonPrompt] instances.
"""

encoder_prompt: _T1_co
Expand All @@ -152,11 +156,11 @@ class ExplicitEncoderDecoderPrompt(TypedDict, Generic[_T1_co, _T2_co]):
Set of possible schemas for an LLM input, including
both decoder-only and encoder/decoder input types:

- A text prompt ({class}`str` or {class}`TextPrompt`)
- A tokenized prompt ({class}`TokensPrompt`)
- An embeddings prompt ({class}`EmbedsPrompt`)
- A text prompt ([`str`][] or [`TextPrompt`][vllm.inputs.data.TextPrompt])
- A tokenized prompt ([`TokensPrompt`][vllm.inputs.data.TokensPrompt])
- An embeddings prompt ([`EmbedsPrompt`][vllm.inputs.data.EmbedsPrompt])
- A single data structure containing both an encoder and a decoder prompt
({class}`ExplicitEncoderDecoderPrompt`)
([`ExplicitEncoderDecoderPrompt`][vllm.inputs.data.ExplicitEncoderDecoderPrompt])
"""


Expand Down Expand Up @@ -189,7 +193,8 @@ def token_inputs(
prompt: Optional[str] = None,
cache_salt: Optional[str] = None,
) -> TokenInputs:
"""Construct {class}`TokenInputs` from optional values."""
"""Construct [`TokenInputs`][vllm.inputs.data.TokenInputs] from optional
values."""
inputs = TokenInputs(type="token", prompt_token_ids=prompt_token_ids)

if prompt is not None:
Expand Down Expand Up @@ -221,7 +226,8 @@ def embeds_inputs(
prompt_embeds: torch.Tensor,
cache_salt: Optional[str] = None,
) -> EmbedsInputs:
"""Construct :class:`EmbedsInputs` from optional values."""
"""Construct [`EmbedsInputs`][vllm.inputs.data.EmbedsInputs] from optional
values."""
inputs = EmbedsInputs(type="embeds", prompt_embeds=prompt_embeds)

if cache_salt is not None:
Expand All @@ -232,19 +238,20 @@ def embeds_inputs(

DecoderOnlyInputs = Union[TokenInputs, EmbedsInputs, "MultiModalInputs"]
"""
The inputs in {class}`~vllm.LLMEngine` before they are
The inputs in [`LLMEngine`][vllm.engine.llm_engine.LLMEngine] before they are
passed to the model executor.
This specifies the data required for decoder-only models.
"""


class EncoderDecoderInputs(TypedDict):
"""
The inputs in {class}`~vllm.LLMEngine` before they are
passed to the model executor.
The inputs in [`LLMEngine`][vllm.engine.llm_engine.LLMEngine] before they
are passed to the model executor.

This specifies the required data for encoder-decoder models.
"""

encoder: Union[TokenInputs, "MultiModalInputs"]
"""The inputs for the encoder portion."""

Expand All @@ -254,13 +261,13 @@ class EncoderDecoderInputs(TypedDict):

SingletonInputs = Union[TokenInputs, EmbedsInputs, "MultiModalInputs"]
"""
A processed {class}`SingletonPrompt` which can be passed to
{class}`vllm.sequence.Sequence`.
A processed [`SingletonPrompt`][vllm.inputs.data.SingletonPrompt] which can be
passed to [`vllm.sequence.Sequence`][].
"""

ProcessorInputs = Union[DecoderOnlyInputs, EncoderDecoderInputs]
"""
The inputs to {data}`vllm.inputs.InputProcessor`.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I fixed wrong description about ProcessorInputs

The outputs from [`vllm.inputs.preprocess.InputPreprocessor`][].
"""

_T1 = TypeVar("_T1", bound=SingletonPrompt, default=SingletonPrompt)
Expand All @@ -277,7 +284,8 @@ def build_explicit_enc_dec_prompt(
return ExplicitEncoderDecoderPrompt(
encoder_prompt=encoder_prompt,
decoder_prompt=decoder_prompt,
mm_processor_kwargs=mm_processor_kwargs)
mm_processor_kwargs=mm_processor_kwargs,
)


def zip_enc_dec_prompts(
Expand All @@ -288,7 +296,8 @@ def zip_enc_dec_prompts(
) -> list[ExplicitEncoderDecoderPrompt[_T1, _T2]]:
"""
Zip encoder and decoder prompts together into a list of
{class}`ExplicitEncoderDecoderPrompt` instances.
[`ExplicitEncoderDecoderPrompt`][vllm.inputs.data.ExplicitEncoderDecoderPrompt]
instances.

``mm_processor_kwargs`` may also be provided; if a dict is passed, the same
dictionary will be used for every encoder/decoder prompt. If an iterable is
Expand All @@ -299,10 +308,11 @@ def zip_enc_dec_prompts(
if isinstance(mm_processor_kwargs, dict):
return [
build_explicit_enc_dec_prompt(
encoder_prompt, decoder_prompt,
cast(dict[str, Any], mm_processor_kwargs))
for (encoder_prompt,
decoder_prompt) in zip(enc_prompts, dec_prompts)
encoder_prompt,
decoder_prompt,
cast(dict[str, Any], mm_processor_kwargs),
) for (encoder_prompt,
decoder_prompt) in zip(enc_prompts, dec_prompts)
]
return [
build_explicit_enc_dec_prompt(encoder_prompt, decoder_prompt,
Expand Down
8 changes: 4 additions & 4 deletions vllm/inputs/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@ class ParsedTokens(TypedDict):

@overload
def parse_and_batch_prompt(
prompt: Union[str, list[str]]) -> Sequence[ParsedText]:
prompt: Union[str, list[str]], ) -> Sequence[ParsedText]:
...


@overload
def parse_and_batch_prompt(
prompt: Union[list[int], list[list[int]]]) -> Sequence[ParsedTokens]:
prompt: Union[list[int], list[list[int]]], ) -> Sequence[ParsedTokens]:
...


Expand Down Expand Up @@ -86,7 +86,7 @@ class ParsedTokensPrompt(TypedDict):


class ParsedEmbedsPrompt(TypedDict):
type: Literal['embeds']
type: Literal["embeds"]
content: EmbedsPrompt


Expand Down Expand Up @@ -133,7 +133,7 @@ def parse_singleton_prompt(prompt: SingletonPrompt) -> ParsedSingletonPrompt:


def is_explicit_encoder_decoder_prompt(
prompt: PromptType) -> TypeIs[ExplicitEncoderDecoderPrompt]:
prompt: PromptType, ) -> TypeIs[ExplicitEncoderDecoderPrompt]:
return isinstance(prompt, dict) and "encoder_prompt" in prompt


Expand Down
56 changes: 39 additions & 17 deletions vllm/inputs/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,26 +67,26 @@ def get_eos_token_id(self,
return self.tokenizer.get_lora_tokenizer(lora_request).eos_token_id

def get_decoder_start_token_id(self) -> Optional[int]:
'''
"""
Obtain the decoder start token id employed by an encoder/decoder
model. Returns None for non-encoder/decoder models or if the
model config is unavailable.
'''
"""

if not self.model_config.is_encoder_decoder:
logger.warning_once(
"Using None for decoder start token id because "
"this is not an encoder/decoder model.")
return None

if (self.model_config is None or self.model_config.hf_config is None):
if self.model_config is None or self.model_config.hf_config is None:
logger.warning_once(
"Using None for decoder start token id because "
"model config is not available.")
return None

dec_start_token_id = getattr(self.model_config.hf_config,
'decoder_start_token_id', None)
"decoder_start_token_id", None)
if dec_start_token_id is None:
logger.warning_once(
"Falling back on <BOS> for decoder start token "
Expand All @@ -97,7 +97,7 @@ def get_decoder_start_token_id(self) -> Optional[int]:
return dec_start_token_id

def _get_default_enc_dec_decoder_prompt(self) -> list[int]:
'''
"""
Specifically for encoder/decoder models:
generate a default decoder prompt for when
the user specifies only the encoder prompt.
Expand Down Expand Up @@ -126,7 +126,7 @@ def _get_default_enc_dec_decoder_prompt(self) -> list[int]:
Returns:

* prompt_token_ids
'''
"""

bos_token_id = self.get_bos_token_id()
assert bos_token_id is not None
Expand Down Expand Up @@ -224,7 +224,10 @@ async def _tokenize_prompt_async(
lora_request: Optional[LoRARequest],
tokenization_kwargs: Optional[dict[str, Any]] = None,
) -> list[int]:
"""Async version of {meth}`_tokenize_prompt`."""
"""
Async version of
[`_tokenize_prompt`][vllm.inputs.preprocess.InputPreprocessor._tokenize_prompt].
"""
tokenizer = self.get_tokenizer_group()
tokenization_kwargs = self._get_tokenization_kw(tokenization_kwargs)

Expand Down Expand Up @@ -287,7 +290,10 @@ async def _process_multimodal_async(
lora_request: Optional[LoRARequest],
return_mm_hashes: bool = False,
) -> MultiModalInputs:
"""Async version of {meth}`_process_multimodal`."""
"""
Async version of
[`_process_multimodal`][vllm.inputs.preprocess.InputPreprocessor._process_multimodal].
"""
tokenizer = await self._get_mm_tokenizer_async(lora_request)

mm_processor = self.mm_registry.create_processor(self.model_config,
Expand Down Expand Up @@ -472,7 +478,7 @@ def _prompt_to_llm_inputs(

Returns:

* {class}`SingletonInputs` instance
* [`SingletonInputs`][vllm.inputs.data.SingletonInputs] instance
"""
parsed = parse_singleton_prompt(prompt)

Expand Down Expand Up @@ -508,7 +514,10 @@ async def _prompt_to_llm_inputs_async(
lora_request: Optional[LoRARequest] = None,
return_mm_hashes: bool = False,
) -> SingletonInputs:
"""Async version of {meth}`_prompt_to_llm_inputs`."""
"""
Async version of
[`_prompt_to_llm_inputs`][vllm.inputs.preprocess.InputPreprocessor._prompt_to_llm_inputs].
"""
parsed = parse_singleton_prompt(prompt)

if parsed["type"] == "embeds":
Expand Down Expand Up @@ -644,7 +653,9 @@ def _process_encoder_decoder_prompt(
) -> EncoderDecoderInputs:
"""
For encoder/decoder models only:
Process an input prompt into an {class}`EncoderDecoderInputs` instance.
Process an input prompt into an
[`EncoderDecoderInputs`][vllm.inputs.data.EncoderDecoderInputs]
instance.

There are two types of input prompts:
singleton prompts which carry only the
Expand All @@ -670,7 +681,8 @@ def _process_encoder_decoder_prompt(

Returns:

* {class}`EncoderDecoderInputs` instance
* [`EncoderDecoderInputs`][vllm.inputs.data.EncoderDecoderInputs]
instance
"""
encoder_inputs: SingletonInputs
decoder_inputs: Optional[SingletonInputs]
Expand Down Expand Up @@ -710,7 +722,10 @@ async def _process_encoder_decoder_prompt_async(
prompt: PromptType,
tokenization_kwargs: Optional[dict[str, Any]] = None,
) -> EncoderDecoderInputs:
"""Async version of {meth}`_process_encoder_decoder_prompt`."""
"""
Async version of
[`_process_encoder_decoder_prompt`][vllm.inputs.preprocess.InputPreprocessor._process_encoder_decoder_prompt].
"""
encoder_inputs: SingletonInputs
decoder_inputs: Optional[SingletonInputs]

Expand Down Expand Up @@ -778,7 +793,8 @@ def _process_decoder_only_prompt(
) -> DecoderOnlyInputs:
"""
For decoder-only models:
Process an input prompt into an {class}`DecoderOnlyInputs` instance.
Process an input prompt into a
[`DecoderOnlyInputs`][vllm.inputs.data.DecoderOnlyInputs] instance.

Arguments:

Expand All @@ -789,7 +805,7 @@ def _process_decoder_only_prompt(

Returns:

* {class}`DecoderOnlyInputs` instance
* [`DecoderOnlyInputs`][vllm.inputs.data.DecoderOnlyInputs] instance
"""

prompt_comps = self._prompt_to_llm_inputs(
Expand All @@ -812,7 +828,10 @@ async def _process_decoder_only_prompt_async(
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
return_mm_hashes: bool = False,
) -> DecoderOnlyInputs:
"""Async version of {meth}`_process_decoder_only_prompt`."""
"""
Async version of
[`_process_decoder_only_prompt`][vllm.inputs.preprocess.InputPreprocessor._process_decoder_only_prompt].
"""
prompt_comps = await self._prompt_to_llm_inputs_async(
prompt,
tokenization_kwargs=tokenization_kwargs,
Expand Down Expand Up @@ -863,7 +882,10 @@ async def preprocess_async(
prompt_adapter_request: Optional[PromptAdapterRequest] = None,
return_mm_hashes: bool = False,
) -> ProcessorInputs:
"""Async version of {meth}`preprocess`."""
"""
Async version of
[`preprocess`][vllm.inputs.preprocess.InputPreprocessor.preprocess].
"""
if self.model_config.is_encoder_decoder:
assert not return_mm_hashes, (
"Multimodal hashes for encoder-decoder models should not be ",
Expand Down
Loading
Loading