abstract ¶

Classes¶

fastvideo.attention.backends.abstract.AttentionBackend ¶

Bases: ABC

Abstract class for attention backends.

fastvideo.attention.backends.abstract.AttentionImpl ¶

AttentionImpl(num_heads: int, head_size: int, softmax_scale: float, causal: bool = False, num_kv_heads: int | None = None, prefix: str = '', **extra_impl_args)

Bases: ABC, Generic[T]

Source code in fastvideo/attention/backends/abstract.py

@abstractmethod
def __init__(
    self,
    num_heads: int,
    head_size: int,
    softmax_scale: float,
    causal: bool = False,
    num_kv_heads: int | None = None,
    prefix: str = "",
    **extra_impl_args,
) -> None:
    raise NotImplementedError

Functions¶

fastvideo.attention.backends.abstract.AttentionImpl.postprocess_output ¶

postprocess_output(output: Tensor, attn_metadata: T) -> Tensor

Postprocess the output tensor after the attention operation.

Default implementation returns the tensor unchanged. Subclasses can override this to implement custom postprocessing like untiling, scaling, or other transformations.

Called BEFORE all_to_all for distributed attention

Parameters:

Name	Type	Description	Default
`output`	`Tensor`	The output tensor from the attention operation	required
`attn_metadata`	`T`	Metadata for the attention operation	required

Returns:

Type	Description
`Tensor`	Postprocessed output tensor

Source code in fastvideo/attention/backends/abstract.py

def postprocess_output(
    self,
    output: torch.Tensor,
    attn_metadata: T,
) -> torch.Tensor:
    """Postprocess the output tensor after the attention operation.

    Default implementation returns the tensor unchanged.
    Subclasses can override this to implement custom postprocessing
    like untiling, scaling, or other transformations.

    Called BEFORE all_to_all for distributed attention

    Args:
        output: The output tensor from the attention operation
        attn_metadata: Metadata for the attention operation

    Returns:
        Postprocessed output tensor
    """

    return output

fastvideo.attention.backends.abstract.AttentionImpl.preprocess_qkv ¶

preprocess_qkv(qkv: Tensor, attn_metadata: T) -> Tensor

Preprocess QKV tensor before performing attention operation.

Default implementation returns the tensor unchanged. Subclasses can override this to implement custom preprocessing like reshaping, tiling, scaling, or other transformations.

Called AFTER all_to_all for distributed attention

Parameters:

Name	Type	Description	Default
`qkv`	`Tensor`	The query-key-value tensor	required
`attn_metadata`	`T`	Metadata for the attention operation	required

Returns:

Type	Description
`Tensor`	Processed QKV tensor

Source code in fastvideo/attention/backends/abstract.py

def preprocess_qkv(self, qkv: torch.Tensor,
                   attn_metadata: T) -> torch.Tensor:
    """Preprocess QKV tensor before performing attention operation.

    Default implementation returns the tensor unchanged.
    Subclasses can override this to implement custom preprocessing
    like reshaping, tiling, scaling, or other transformations.

    Called AFTER all_to_all for distributed attention

    Args:
        qkv: The query-key-value tensor
        attn_metadata: Metadata for the attention operation

    Returns:
        Processed QKV tensor
    """
    return qkv

fastvideo.attention.backends.abstract.AttentionMetadata `dataclass` ¶

AttentionMetadata(current_timestep: int)

Attention metadata for prefill and decode batched together.

Functions¶

fastvideo.attention.backends.abstract.AttentionMetadata.asdict_zerocopy ¶

asdict_zerocopy(skip_fields: set[str] | None = None) -> dict[str, Any]

Similar to dataclasses.asdict, but avoids deepcopying.

Source code in fastvideo/attention/backends/abstract.py

def asdict_zerocopy(self,
                    skip_fields: set[str] | None = None) -> dict[str, Any]:
    """Similar to dataclasses.asdict, but avoids deepcopying."""
    if skip_fields is None:
        skip_fields = set()
    # Note that if we add dataclasses as fields, they will need
    # similar handling.
    return {
        field.name: getattr(self, field.name)
        for field in fields(self) if field.name not in skip_fields
    }

fastvideo.attention.backends.abstract.AttentionMetadataBuilder ¶

AttentionMetadataBuilder()

Bases: ABC, Generic[T]

Abstract class for attention metadata builders.

Create the builder, remember some configuration and parameters.

Source code in fastvideo/attention/backends/abstract.py

@abstractmethod
def __init__(self) -> None:
    """Create the builder, remember some configuration and parameters."""
    raise NotImplementedError

Functions¶

fastvideo.attention.backends.abstract.AttentionMetadataBuilder.build `abstractmethod` ¶

build(**kwargs: dict[str, Any]) -> AttentionMetadata

Build attention metadata with on-device tensors.

Source code in fastvideo/attention/backends/abstract.py

@abstractmethod
def build(
    self,
    **kwargs: dict[str, Any],
) -> AttentionMetadata:
    """Build attention metadata with on-device tensors."""
    raise NotImplementedError

fastvideo.attention.backends.abstract.AttentionMetadataBuilder.prepare `abstractmethod` ¶

prepare() -> None

Prepare for one batch.

Source code in fastvideo/attention/backends/abstract.py

@abstractmethod
def prepare(self) -> None:
    """Prepare for one batch."""
    raise NotImplementedError

abstract ¶

Classes¶

fastvideo.attention.backends.abstract.AttentionBackend ¶

fastvideo.attention.backends.abstract.AttentionImpl ¶

Functions¶

fastvideo.attention.backends.abstract.AttentionImpl.postprocess_output ¶

fastvideo.attention.backends.abstract.AttentionImpl.preprocess_qkv ¶

fastvideo.attention.backends.abstract.AttentionMetadata dataclass ¶

Functions¶

fastvideo.attention.backends.abstract.AttentionMetadata.asdict_zerocopy ¶

fastvideo.attention.backends.abstract.AttentionMetadataBuilder ¶

Functions¶

fastvideo.attention.backends.abstract.AttentionMetadataBuilder.build abstractmethod ¶

fastvideo.attention.backends.abstract.AttentionMetadataBuilder.prepare abstractmethod ¶

fastvideo.attention.backends.abstract.AttentionMetadata `dataclass` ¶

fastvideo.attention.backends.abstract.AttentionMetadataBuilder.build `abstractmethod` ¶

fastvideo.attention.backends.abstract.AttentionMetadataBuilder.prepare `abstractmethod` ¶