Skip to content

configs

Modules

fastvideo.configs.configs

Classes

fastvideo.configs.configs.DatasetType

Bases: str, Enum

Enumeration for different dataset types.

Functions
fastvideo.configs.configs.DatasetType.choices classmethod
choices() -> list[str]

Get all available choices as strings for argparse.

Source code in fastvideo/configs/configs.py
@classmethod
def choices(cls) -> list[str]:
    """Get all available choices as strings for argparse."""
    return [dataset_type.value for dataset_type in cls]
fastvideo.configs.configs.DatasetType.from_string classmethod
from_string(value: str) -> DatasetType

Convert string to DatasetType enum.

Source code in fastvideo/configs/configs.py
@classmethod
def from_string(cls, value: str) -> "DatasetType":
    """Convert string to DatasetType enum."""
    try:
        return cls(value.lower())
    except ValueError:
        raise ValueError(
            f"Invalid dataset type: {value}. Must be one of: {', '.join([m.value for m in cls])}"
        ) from None
fastvideo.configs.configs.PreprocessConfig dataclass
PreprocessConfig(model_path: str = '', dataset_path: str = '', dataset_type: DatasetType = HF, dataset_output_dir: str = './output', dataloader_num_workers: int = 1, preprocess_video_batch_size: int = 2, samples_per_file: int = 64, flush_frequency: int = 256, video_loader_type: VideoLoaderType = TORCHCODEC, max_height: int = 480, max_width: int = 848, num_frames: int = 163, video_length_tolerance_range: float = 2.0, train_fps: int = 30, speed_factor: float = 1.0, drop_short_ratio: float = 1.0, do_temporal_sample: bool = False, training_cfg_rate: float = 0.0, seed: int = 42)

Configuration for preprocessing operations.

Functions
fastvideo.configs.configs.PreprocessConfig.add_cli_args staticmethod
add_cli_args(parser: FlexibleArgumentParser, prefix: str = 'preprocess') -> FlexibleArgumentParser

Add preprocessing configuration arguments to the parser.

Source code in fastvideo/configs/configs.py
@staticmethod
def add_cli_args(parser: FlexibleArgumentParser,
                 prefix: str = "preprocess") -> FlexibleArgumentParser:
    """Add preprocessing configuration arguments to the parser."""
    prefix_with_dot = f"{prefix}." if (prefix.strip() != "") else ""

    preprocess_args = parser.add_argument_group("Preprocessing Arguments")
    # Model & Dataset
    preprocess_args.add_argument(f"--{prefix_with_dot}model-path",
                                 type=str,
                                 default=PreprocessConfig.model_path,
                                 help="Path to the model for preprocessing")
    preprocess_args.add_argument(
        f"--{prefix_with_dot}dataset-path",
        type=str,
        default=PreprocessConfig.dataset_path,
        help="Path to the dataset directory for preprocessing")
    preprocess_args.add_argument(
        f"--{prefix_with_dot}dataset-type",
        type=str,
        choices=DatasetType.choices(),
        default=PreprocessConfig.dataset_type.value,
        help="Type of the dataset")
    preprocess_args.add_argument(
        f"--{prefix_with_dot}dataset-output-dir",
        type=str,
        default=PreprocessConfig.dataset_output_dir,
        help="The output directory where the dataset will be written.")

    # Dataloader
    preprocess_args.add_argument(
        f"--{prefix_with_dot}dataloader-num-workers",
        type=int,
        default=PreprocessConfig.dataloader_num_workers,
        help=
        "Number of subprocesses to use for data loading. 0 means that the data will be loaded in the main process."
    )
    preprocess_args.add_argument(
        f"--{prefix_with_dot}preprocess-video-batch-size",
        type=int,
        default=PreprocessConfig.preprocess_video_batch_size,
        help="Batch size (per device) for the training dataloader.")

    # Saver
    preprocess_args.add_argument(f"--{prefix_with_dot}samples-per-file",
                                 type=int,
                                 default=PreprocessConfig.samples_per_file,
                                 help="Number of samples per output file")
    preprocess_args.add_argument(f"--{prefix_with_dot}flush-frequency",
                                 type=int,
                                 default=PreprocessConfig.flush_frequency,
                                 help="How often to save to parquet files")

    # Video processing parameters
    preprocess_args.add_argument(
        f"--{prefix_with_dot}video-loader-type",
        type=str,
        choices=VideoLoaderType.choices(),
        default=PreprocessConfig.video_loader_type.value,
        help="Type of the video loader")
    preprocess_args.add_argument(f"--{prefix_with_dot}max-height",
                                 type=int,
                                 default=PreprocessConfig.max_height,
                                 help="Maximum height for video processing")
    preprocess_args.add_argument(f"--{prefix_with_dot}max-width",
                                 type=int,
                                 default=PreprocessConfig.max_width,
                                 help="Maximum width for video processing")
    preprocess_args.add_argument(f"--{prefix_with_dot}num-frames",
                                 type=int,
                                 default=PreprocessConfig.num_frames,
                                 help="Number of frames to process")
    preprocess_args.add_argument(
        f"--{prefix_with_dot}video-length-tolerance-range",
        type=float,
        default=PreprocessConfig.video_length_tolerance_range,
        help="Video length tolerance range")
    preprocess_args.add_argument(f"--{prefix_with_dot}train-fps",
                                 type=int,
                                 default=PreprocessConfig.train_fps,
                                 help="Training FPS")
    preprocess_args.add_argument(f"--{prefix_with_dot}speed-factor",
                                 type=float,
                                 default=PreprocessConfig.speed_factor,
                                 help="Speed factor for video processing")
    preprocess_args.add_argument(f"--{prefix_with_dot}drop-short-ratio",
                                 type=float,
                                 default=PreprocessConfig.drop_short_ratio,
                                 help="Ratio for dropping short videos")
    preprocess_args.add_argument(
        f"--{prefix_with_dot}do-temporal-sample",
        action=StoreBoolean,
        default=PreprocessConfig.do_temporal_sample,
        help="Whether to do temporal sampling")

    # Model Training configuration
    preprocess_args.add_argument(f"--{prefix_with_dot}training-cfg-rate",
                                 type=float,
                                 default=PreprocessConfig.training_cfg_rate,
                                 help="Training CFG rate")
    preprocess_args.add_argument(f"--{prefix_with_dot}seed",
                                 type=int,
                                 default=PreprocessConfig.seed,
                                 help="Seed for random number generator")

    return parser
fastvideo.configs.configs.PreprocessConfig.from_kwargs classmethod
from_kwargs(kwargs: dict[str, Any]) -> Optional[PreprocessConfig]

Create PreprocessConfig from keyword arguments.

Source code in fastvideo/configs/configs.py
@classmethod
def from_kwargs(cls, kwargs: dict[str,
                                  Any]) -> Optional["PreprocessConfig"]:
    """Create PreprocessConfig from keyword arguments."""
    if 'dataset_type' in kwargs and isinstance(kwargs['dataset_type'], str):
        kwargs['dataset_type'] = DatasetType.from_string(
            kwargs['dataset_type'])
    if 'video_loader_type' in kwargs and isinstance(
            kwargs['video_loader_type'], str):
        kwargs['video_loader_type'] = VideoLoaderType.from_string(
            kwargs['video_loader_type'])

    preprocess_config = cls()
    if not update_config_from_args(
            preprocess_config, kwargs, prefix="preprocess", pop_args=True):
        return None
    return preprocess_config
fastvideo.configs.configs.VideoLoaderType

Bases: str, Enum

Enumeration for different video loaders.

Functions
fastvideo.configs.configs.VideoLoaderType.choices classmethod
choices() -> list[str]

Get all available choices as strings for argparse.

Source code in fastvideo/configs/configs.py
@classmethod
def choices(cls) -> list[str]:
    """Get all available choices as strings for argparse."""
    return [video_loader.value for video_loader in cls]
fastvideo.configs.configs.VideoLoaderType.from_string classmethod
from_string(value: str) -> VideoLoaderType

Convert string to VideoLoader enum.

Source code in fastvideo/configs/configs.py
@classmethod
def from_string(cls, value: str) -> "VideoLoaderType":
    """Convert string to VideoLoader enum."""
    try:
        return cls(value.lower())
    except ValueError:
        raise ValueError(
            f"Invalid video loader: {value}. Must be one of: {', '.join([m.value for m in cls])}"
        ) from None

Functions

fastvideo.configs.models

Classes

fastvideo.configs.models.DiTConfig dataclass
DiTConfig(arch_config: DiTArchConfig = DiTArchConfig(), prefix: str = '', quant_config: QuantizationConfig | None = None)

Bases: ModelConfig

Functions
fastvideo.configs.models.DiTConfig.add_cli_args staticmethod
add_cli_args(parser: Any, prefix: str = 'dit-config') -> Any

Add CLI arguments for DiTConfig fields

Source code in fastvideo/configs/models/dits/base.py
@staticmethod
def add_cli_args(parser: Any, prefix: str = "dit-config") -> Any:
    """Add CLI arguments for DiTConfig fields"""
    parser.add_argument(
        f"--{prefix}.prefix",
        type=str,
        dest=f"{prefix.replace('-', '_')}.prefix",
        default=DiTConfig.prefix,
        help="Prefix for the DiT model",
    )

    parser.add_argument(
        f"--{prefix}.quant-config",
        type=str,
        dest=f"{prefix.replace('-', '_')}.quant_config",
        default=None,
        help="Quantization configuration for the DiT model",
    )

    return parser
fastvideo.configs.models.VAEConfig dataclass
VAEConfig(arch_config: VAEArchConfig = VAEArchConfig(), load_encoder: bool = True, load_decoder: bool = True, tile_sample_min_height: int = 256, tile_sample_min_width: int = 256, tile_sample_min_num_frames: int = 16, tile_sample_stride_height: int = 192, tile_sample_stride_width: int = 192, tile_sample_stride_num_frames: int = 12, blend_num_frames: int = 0, use_tiling: bool = True, use_temporal_tiling: bool = True, use_parallel_tiling: bool = True, use_temporal_scaling_frames: bool = True)

Bases: ModelConfig

Functions
fastvideo.configs.models.VAEConfig.add_cli_args staticmethod
add_cli_args(parser: Any, prefix: str = 'vae-config') -> Any

Add CLI arguments for VAEConfig fields

Source code in fastvideo/configs/models/vaes/base.py
@staticmethod
def add_cli_args(parser: Any, prefix: str = "vae-config") -> Any:
    """Add CLI arguments for VAEConfig fields"""
    parser.add_argument(
        f"--{prefix}.load-encoder",
        action=StoreBoolean,
        dest=f"{prefix.replace('-', '_')}.load_encoder",
        default=VAEConfig.load_encoder,
        help="Whether to load the VAE encoder",
    )
    parser.add_argument(
        f"--{prefix}.load-decoder",
        action=StoreBoolean,
        dest=f"{prefix.replace('-', '_')}.load_decoder",
        default=VAEConfig.load_decoder,
        help="Whether to load the VAE decoder",
    )
    parser.add_argument(
        f"--{prefix}.tile-sample-min-height",
        type=int,
        dest=f"{prefix.replace('-', '_')}.tile_sample_min_height",
        default=VAEConfig.tile_sample_min_height,
        help="Minimum height for VAE tile sampling",
    )
    parser.add_argument(
        f"--{prefix}.tile-sample-min-width",
        type=int,
        dest=f"{prefix.replace('-', '_')}.tile_sample_min_width",
        default=VAEConfig.tile_sample_min_width,
        help="Minimum width for VAE tile sampling",
    )
    parser.add_argument(
        f"--{prefix}.tile-sample-min-num-frames",
        type=int,
        dest=f"{prefix.replace('-', '_')}.tile_sample_min_num_frames",
        default=VAEConfig.tile_sample_min_num_frames,
        help="Minimum number of frames for VAE tile sampling",
    )
    parser.add_argument(
        f"--{prefix}.tile-sample-stride-height",
        type=int,
        dest=f"{prefix.replace('-', '_')}.tile_sample_stride_height",
        default=VAEConfig.tile_sample_stride_height,
        help="Stride height for VAE tile sampling",
    )
    parser.add_argument(
        f"--{prefix}.tile-sample-stride-width",
        type=int,
        dest=f"{prefix.replace('-', '_')}.tile_sample_stride_width",
        default=VAEConfig.tile_sample_stride_width,
        help="Stride width for VAE tile sampling",
    )
    parser.add_argument(
        f"--{prefix}.tile-sample-stride-num-frames",
        type=int,
        dest=f"{prefix.replace('-', '_')}.tile_sample_stride_num_frames",
        default=VAEConfig.tile_sample_stride_num_frames,
        help="Stride number of frames for VAE tile sampling",
    )
    parser.add_argument(
        f"--{prefix}.blend-num-frames",
        type=int,
        dest=f"{prefix.replace('-', '_')}.blend_num_frames",
        default=VAEConfig.blend_num_frames,
        help="Number of frames to blend for VAE tile sampling",
    )
    parser.add_argument(
        f"--{prefix}.use-tiling",
        action=StoreBoolean,
        dest=f"{prefix.replace('-', '_')}.use_tiling",
        default=VAEConfig.use_tiling,
        help="Whether to use tiling for VAE",
    )
    parser.add_argument(
        f"--{prefix}.use-temporal-tiling",
        action=StoreBoolean,
        dest=f"{prefix.replace('-', '_')}.use_temporal_tiling",
        default=VAEConfig.use_temporal_tiling,
        help="Whether to use temporal tiling for VAE",
    )
    parser.add_argument(
        f"--{prefix}.use-parallel-tiling",
        action=StoreBoolean,
        dest=f"{prefix.replace('-', '_')}.use_parallel_tiling",
        default=VAEConfig.use_parallel_tiling,
        help="Whether to use parallel tiling for VAE",
    )

    return parser

Modules

fastvideo.configs.models.dits
Modules
fastvideo.configs.models.dits.base
Classes
fastvideo.configs.models.dits.base.DiTConfig dataclass
DiTConfig(arch_config: DiTArchConfig = DiTArchConfig(), prefix: str = '', quant_config: QuantizationConfig | None = None)

Bases: ModelConfig

Functions
fastvideo.configs.models.dits.base.DiTConfig.add_cli_args staticmethod
add_cli_args(parser: Any, prefix: str = 'dit-config') -> Any

Add CLI arguments for DiTConfig fields

Source code in fastvideo/configs/models/dits/base.py
@staticmethod
def add_cli_args(parser: Any, prefix: str = "dit-config") -> Any:
    """Add CLI arguments for DiTConfig fields"""
    parser.add_argument(
        f"--{prefix}.prefix",
        type=str,
        dest=f"{prefix.replace('-', '_')}.prefix",
        default=DiTConfig.prefix,
        help="Prefix for the DiT model",
    )

    parser.add_argument(
        f"--{prefix}.quant-config",
        type=str,
        dest=f"{prefix.replace('-', '_')}.quant_config",
        default=None,
        help="Quantization configuration for the DiT model",
    )

    return parser
fastvideo.configs.models.encoders
Classes
fastvideo.configs.models.encoders.T5LargeConfig dataclass
T5LargeConfig(arch_config: TextEncoderArchConfig = T5LargeArchConfig(), prefix: str = 't5', quant_config: QuantizationConfig | None = None, lora_config: Any | None = None)

Bases: TextEncoderConfig

T5 Large configuration for your specific model.

Modules
fastvideo.configs.models.encoders.t5
Classes
fastvideo.configs.models.encoders.t5.T5LargeArchConfig dataclass
T5LargeArchConfig(stacked_params_mapping: list[tuple[str, str, str]] = (lambda: [('.qkv_proj', '.q', 'q'), ('.qkv_proj', '.k', 'k'), ('.qkv_proj', '.v', 'v')])(), architectures: list[str] = (lambda: [])(), _supported_attention_backends: tuple[AttentionBackendEnum, ...] = (FLASH_ATTN, TORCH_SDPA), output_hidden_states: bool = False, use_return_dict: bool = True, vocab_size: int = 32128, hidden_size: int = 0, num_hidden_layers: int = 0, num_attention_heads: int = 0, pad_token_id: int = 0, eos_token_id: int = 1, text_len: int = 512, hidden_state_skip_layer: int = 0, decoder_start_token_id: int = 0, output_past: bool = True, scalable_attention: bool = True, tie_word_embeddings: bool = False, tokenizer_kwargs: dict[str, Any] = dict(), _fsdp_shard_conditions: list = (lambda: [_is_transformer_layer, _is_embeddings, _is_final_layernorm])(), d_model: int = 1024, d_kv: int = 128, d_ff: int = 65536, num_layers: int = 24, num_decoder_layers: int | None = 24, num_heads: int = 128, relative_attention_num_buckets: int = 32, relative_attention_max_distance: int = 128, dropout_rate: float = 0.1, layer_norm_epsilon: float = 1e-06, initializer_factor: float = 1.0, feed_forward_proj: str = 'relu', dense_act_fn: str = '', is_gated_act: bool = False, is_encoder_decoder: bool = True, use_cache: bool = True, classifier_dropout: float = 0.0, n_positions: int = 512, task_specific_params: dict | None = None)

Bases: T5ArchConfig

T5 Large architecture config with parameters for your specific model.

fastvideo.configs.models.encoders.t5.T5LargeConfig dataclass
T5LargeConfig(arch_config: TextEncoderArchConfig = T5LargeArchConfig(), prefix: str = 't5', quant_config: QuantizationConfig | None = None, lora_config: Any | None = None)

Bases: TextEncoderConfig

T5 Large configuration for your specific model.

fastvideo.configs.models.vaes
Modules
fastvideo.configs.models.vaes.base
Classes
fastvideo.configs.models.vaes.base.VAEConfig dataclass
VAEConfig(arch_config: VAEArchConfig = VAEArchConfig(), load_encoder: bool = True, load_decoder: bool = True, tile_sample_min_height: int = 256, tile_sample_min_width: int = 256, tile_sample_min_num_frames: int = 16, tile_sample_stride_height: int = 192, tile_sample_stride_width: int = 192, tile_sample_stride_num_frames: int = 12, blend_num_frames: int = 0, use_tiling: bool = True, use_temporal_tiling: bool = True, use_parallel_tiling: bool = True, use_temporal_scaling_frames: bool = True)

Bases: ModelConfig

Functions
fastvideo.configs.models.vaes.base.VAEConfig.add_cli_args staticmethod
add_cli_args(parser: Any, prefix: str = 'vae-config') -> Any

Add CLI arguments for VAEConfig fields

Source code in fastvideo/configs/models/vaes/base.py
@staticmethod
def add_cli_args(parser: Any, prefix: str = "vae-config") -> Any:
    """Add CLI arguments for VAEConfig fields"""
    parser.add_argument(
        f"--{prefix}.load-encoder",
        action=StoreBoolean,
        dest=f"{prefix.replace('-', '_')}.load_encoder",
        default=VAEConfig.load_encoder,
        help="Whether to load the VAE encoder",
    )
    parser.add_argument(
        f"--{prefix}.load-decoder",
        action=StoreBoolean,
        dest=f"{prefix.replace('-', '_')}.load_decoder",
        default=VAEConfig.load_decoder,
        help="Whether to load the VAE decoder",
    )
    parser.add_argument(
        f"--{prefix}.tile-sample-min-height",
        type=int,
        dest=f"{prefix.replace('-', '_')}.tile_sample_min_height",
        default=VAEConfig.tile_sample_min_height,
        help="Minimum height for VAE tile sampling",
    )
    parser.add_argument(
        f"--{prefix}.tile-sample-min-width",
        type=int,
        dest=f"{prefix.replace('-', '_')}.tile_sample_min_width",
        default=VAEConfig.tile_sample_min_width,
        help="Minimum width for VAE tile sampling",
    )
    parser.add_argument(
        f"--{prefix}.tile-sample-min-num-frames",
        type=int,
        dest=f"{prefix.replace('-', '_')}.tile_sample_min_num_frames",
        default=VAEConfig.tile_sample_min_num_frames,
        help="Minimum number of frames for VAE tile sampling",
    )
    parser.add_argument(
        f"--{prefix}.tile-sample-stride-height",
        type=int,
        dest=f"{prefix.replace('-', '_')}.tile_sample_stride_height",
        default=VAEConfig.tile_sample_stride_height,
        help="Stride height for VAE tile sampling",
    )
    parser.add_argument(
        f"--{prefix}.tile-sample-stride-width",
        type=int,
        dest=f"{prefix.replace('-', '_')}.tile_sample_stride_width",
        default=VAEConfig.tile_sample_stride_width,
        help="Stride width for VAE tile sampling",
    )
    parser.add_argument(
        f"--{prefix}.tile-sample-stride-num-frames",
        type=int,
        dest=f"{prefix.replace('-', '_')}.tile_sample_stride_num_frames",
        default=VAEConfig.tile_sample_stride_num_frames,
        help="Stride number of frames for VAE tile sampling",
    )
    parser.add_argument(
        f"--{prefix}.blend-num-frames",
        type=int,
        dest=f"{prefix.replace('-', '_')}.blend_num_frames",
        default=VAEConfig.blend_num_frames,
        help="Number of frames to blend for VAE tile sampling",
    )
    parser.add_argument(
        f"--{prefix}.use-tiling",
        action=StoreBoolean,
        dest=f"{prefix.replace('-', '_')}.use_tiling",
        default=VAEConfig.use_tiling,
        help="Whether to use tiling for VAE",
    )
    parser.add_argument(
        f"--{prefix}.use-temporal-tiling",
        action=StoreBoolean,
        dest=f"{prefix.replace('-', '_')}.use_temporal_tiling",
        default=VAEConfig.use_temporal_tiling,
        help="Whether to use temporal tiling for VAE",
    )
    parser.add_argument(
        f"--{prefix}.use-parallel-tiling",
        action=StoreBoolean,
        dest=f"{prefix.replace('-', '_')}.use_parallel_tiling",
        default=VAEConfig.use_parallel_tiling,
        help="Whether to use parallel tiling for VAE",
    )

    return parser

fastvideo.configs.pipelines

Classes

fastvideo.configs.pipelines.CosmosConfig dataclass
CosmosConfig(model_path: str = '', pipeline_config_path: str | None = None, embedded_cfg_scale: int = 6, flow_shift: float = 1.0, disable_autocast: bool = False, dit_config: DiTConfig = CosmosVideoConfig(), dit_precision: str = 'bf16', vae_config: VAEConfig = CosmosVAEConfig(), vae_precision: str = 'fp16', vae_tiling: bool = True, vae_sp: bool = True, image_encoder_config: EncoderConfig = EncoderConfig(), image_encoder_precision: str = 'fp32', text_encoder_configs: tuple[EncoderConfig, ...] = (lambda: (T5LargeConfig(),))(), text_encoder_precisions: tuple[str, ...] = (lambda: ('bf16',))(), preprocess_text_funcs: tuple[Callable[[str], str], ...] = (lambda: (preprocess_text,))(), postprocess_text_funcs: tuple[Callable[[BaseEncoderOutput], Tensor], ...] = (lambda: (t5_large_postprocess_text,))(), pos_magic: str | None = None, neg_magic: str | None = None, timesteps_scale: bool | None = None, mask_strategy_file_path: str | None = None, STA_mode: STA_Mode = STA_INFERENCE, skip_time_steps: int = 15, dmd_denoising_steps: list[int] | None = None, ti2v_task: bool = False, boundary_ratio: float | None = None, conditioning_strategy: str = 'frame_replace', min_num_conditional_frames: int = 1, max_num_conditional_frames: int = 2, sigma_conditional: float = 0.0001, sigma_data: float = 1.0, state_ch: int = 16, state_t: int = 24, text_encoder_class: str = 'T5')

Bases: PipelineConfig

Configuration for Cosmos2 Video2World pipeline matching diffusers.

fastvideo.configs.pipelines.FastHunyuanConfig dataclass
FastHunyuanConfig(model_path: str = '', pipeline_config_path: str | None = None, embedded_cfg_scale: int = 6, flow_shift: int = 17, disable_autocast: bool = False, dit_config: DiTConfig = HunyuanVideoConfig(), dit_precision: str = 'bf16', vae_config: VAEConfig = HunyuanVAEConfig(), vae_precision: str = 'fp16', vae_tiling: bool = True, vae_sp: bool = True, image_encoder_config: EncoderConfig = EncoderConfig(), image_encoder_precision: str = 'fp32', text_encoder_configs: tuple[EncoderConfig, ...] = (lambda: (LlamaConfig(), CLIPTextConfig()))(), text_encoder_precisions: tuple[str, ...] = (lambda: ('fp16', 'fp16'))(), preprocess_text_funcs: tuple[Callable[[str], str], ...] = (lambda: (llama_preprocess_text, clip_preprocess_text))(), postprocess_text_funcs: tuple[Callable[[BaseEncoderOutput], tensor], ...] = (lambda: (llama_postprocess_text, clip_postprocess_text))(), pos_magic: str | None = None, neg_magic: str | None = None, timesteps_scale: bool | None = None, mask_strategy_file_path: str | None = None, STA_mode: STA_Mode = STA_INFERENCE, skip_time_steps: int = 15, dmd_denoising_steps: list[int] | None = None, ti2v_task: bool = False, boundary_ratio: float | None = None)

Bases: HunyuanConfig

Configuration specifically optimized for FastHunyuan weights.

fastvideo.configs.pipelines.HunyuanConfig dataclass
HunyuanConfig(model_path: str = '', pipeline_config_path: str | None = None, embedded_cfg_scale: int = 6, flow_shift: int = 7, disable_autocast: bool = False, dit_config: DiTConfig = HunyuanVideoConfig(), dit_precision: str = 'bf16', vae_config: VAEConfig = HunyuanVAEConfig(), vae_precision: str = 'fp16', vae_tiling: bool = True, vae_sp: bool = True, image_encoder_config: EncoderConfig = EncoderConfig(), image_encoder_precision: str = 'fp32', text_encoder_configs: tuple[EncoderConfig, ...] = (lambda: (LlamaConfig(), CLIPTextConfig()))(), text_encoder_precisions: tuple[str, ...] = (lambda: ('fp16', 'fp16'))(), preprocess_text_funcs: tuple[Callable[[str], str], ...] = (lambda: (llama_preprocess_text, clip_preprocess_text))(), postprocess_text_funcs: tuple[Callable[[BaseEncoderOutput], tensor], ...] = (lambda: (llama_postprocess_text, clip_postprocess_text))(), pos_magic: str | None = None, neg_magic: str | None = None, timesteps_scale: bool | None = None, mask_strategy_file_path: str | None = None, STA_mode: STA_Mode = STA_INFERENCE, skip_time_steps: int = 15, dmd_denoising_steps: list[int] | None = None, ti2v_task: bool = False, boundary_ratio: float | None = None)

Bases: PipelineConfig

Base configuration for HunYuan pipeline architecture.

fastvideo.configs.pipelines.PipelineConfig dataclass
PipelineConfig(model_path: str = '', pipeline_config_path: str | None = None, embedded_cfg_scale: float = 6.0, flow_shift: float | None = None, disable_autocast: bool = False, dit_config: DiTConfig = DiTConfig(), dit_precision: str = 'bf16', vae_config: VAEConfig = VAEConfig(), vae_precision: str = 'fp32', vae_tiling: bool = True, vae_sp: bool = True, image_encoder_config: EncoderConfig = EncoderConfig(), image_encoder_precision: str = 'fp32', text_encoder_configs: tuple[EncoderConfig, ...] = (lambda: (EncoderConfig(),))(), text_encoder_precisions: tuple[str, ...] = (lambda: ('fp32',))(), preprocess_text_funcs: tuple[Callable[[str], str], ...] = (lambda: (preprocess_text,))(), postprocess_text_funcs: tuple[Callable[[BaseEncoderOutput], tensor], ...] = (lambda: (postprocess_text,))(), pos_magic: str | None = None, neg_magic: str | None = None, timesteps_scale: bool | None = None, mask_strategy_file_path: str | None = None, STA_mode: STA_Mode = STA_INFERENCE, skip_time_steps: int = 15, dmd_denoising_steps: list[int] | None = None, ti2v_task: bool = False, boundary_ratio: float | None = None)

Base configuration for all pipeline architectures.

Functions
fastvideo.configs.pipelines.PipelineConfig.from_kwargs classmethod
from_kwargs(kwargs: dict[str, Any], config_cli_prefix: str = '') -> PipelineConfig

Load PipelineConfig from kwargs Dictionary. kwargs: dictionary of kwargs config_cli_prefix: prefix of CLI arguments for this PipelineConfig instance

Source code in fastvideo/configs/pipelines/base.py
@classmethod
def from_kwargs(cls,
                kwargs: dict[str, Any],
                config_cli_prefix: str = "") -> "PipelineConfig":
    """
    Load PipelineConfig from kwargs Dictionary.
    kwargs: dictionary of kwargs
    config_cli_prefix: prefix of CLI arguments for this PipelineConfig instance
    """
    from fastvideo.configs.pipelines.registry import (
        get_pipeline_config_cls_from_name)

    prefix_with_dot = f"{config_cli_prefix}." if (config_cli_prefix.strip()
                                                  != "") else ""
    model_path: str | None = kwargs.get(prefix_with_dot + 'model_path',
                                        None) or kwargs.get('model_path')
    pipeline_config_or_path: str | PipelineConfig | dict[
        str, Any] | None = kwargs.get(prefix_with_dot + 'pipeline_config',
                                      None) or kwargs.get('pipeline_config')
    if model_path is None:
        raise ValueError("model_path is required in kwargs")

    # 1. Get the pipeline config class from the registry
    pipeline_config_cls = get_pipeline_config_cls_from_name(model_path)

    # 2. Instantiate PipelineConfig
    if pipeline_config_cls is None:
        logger.warning(
            "Couldn't find pipeline config for %s. Using the default pipeline config.",
            model_path)
        pipeline_config = cls()
    else:
        pipeline_config = pipeline_config_cls()

    # 3. Load PipelineConfig from a json file or a PipelineConfig object if provided
    if isinstance(pipeline_config_or_path, str):
        pipeline_config.load_from_json(pipeline_config_or_path)
        kwargs[prefix_with_dot +
               'pipeline_config_path'] = pipeline_config_or_path
    elif isinstance(pipeline_config_or_path, PipelineConfig):
        pipeline_config = pipeline_config_or_path
    elif isinstance(pipeline_config_or_path, dict):
        pipeline_config.update_pipeline_config(pipeline_config_or_path)

    # 4. Update PipelineConfig from CLI arguments if provided
    kwargs[prefix_with_dot + 'model_path'] = model_path
    pipeline_config.update_config_from_dict(kwargs, config_cli_prefix)
    return pipeline_config
fastvideo.configs.pipelines.PipelineConfig.from_pretrained classmethod
from_pretrained(model_path: str) -> PipelineConfig

use the pipeline class setting from model_path to match the pipeline config

Source code in fastvideo/configs/pipelines/base.py
@classmethod
def from_pretrained(cls, model_path: str) -> "PipelineConfig":
    """
    use the pipeline class setting from model_path to match the pipeline config
    """
    from fastvideo.configs.pipelines.registry import (
        get_pipeline_config_cls_from_name)
    pipeline_config_cls = get_pipeline_config_cls_from_name(model_path)

    return cast(PipelineConfig, pipeline_config_cls(model_path=model_path))
fastvideo.configs.pipelines.SlidingTileAttnConfig dataclass
SlidingTileAttnConfig(model_path: str = '', pipeline_config_path: str | None = None, embedded_cfg_scale: float = 6.0, flow_shift: float | None = None, disable_autocast: bool = False, dit_config: DiTConfig = DiTConfig(), dit_precision: str = 'bf16', vae_config: VAEConfig = VAEConfig(), vae_precision: str = 'fp32', vae_tiling: bool = True, vae_sp: bool = True, image_encoder_config: EncoderConfig = EncoderConfig(), image_encoder_precision: str = 'fp32', text_encoder_configs: tuple[EncoderConfig, ...] = (lambda: (EncoderConfig(),))(), text_encoder_precisions: tuple[str, ...] = (lambda: ('fp32',))(), preprocess_text_funcs: tuple[Callable[[str], str], ...] = (lambda: (preprocess_text,))(), postprocess_text_funcs: tuple[Callable[[BaseEncoderOutput], tensor], ...] = (lambda: (postprocess_text,))(), pos_magic: str | None = None, neg_magic: str | None = None, timesteps_scale: bool | None = None, mask_strategy_file_path: str | None = None, STA_mode: STA_Mode = STA_INFERENCE, skip_time_steps: int = 15, dmd_denoising_steps: list[int] | None = None, ti2v_task: bool = False, boundary_ratio: float | None = None, window_size: int = 16, stride: int = 8, height: int = 576, width: int = 1024, pad_to_square: bool = False, use_overlap_optimization: bool = True)

Bases: PipelineConfig

Configuration for sliding tile attention.

fastvideo.configs.pipelines.StepVideoT2VConfig dataclass
StepVideoT2VConfig(model_path: str = '', pipeline_config_path: str | None = None, embedded_cfg_scale: float = 6.0, flow_shift: int = 13, disable_autocast: bool = False, dit_config: DiTConfig = StepVideoConfig(), dit_precision: str = 'bf16', vae_config: VAEConfig = StepVideoVAEConfig(), vae_precision: str = 'bf16', vae_tiling: bool = False, vae_sp: bool = False, image_encoder_config: EncoderConfig = EncoderConfig(), image_encoder_precision: str = 'fp32', text_encoder_configs: tuple[EncoderConfig, ...] = (lambda: (EncoderConfig(),))(), text_encoder_precisions: tuple[str, ...] = (lambda: ('fp32',))(), preprocess_text_funcs: tuple[Callable[[str], str], ...] = (lambda: (preprocess_text,))(), postprocess_text_funcs: tuple[Callable[[BaseEncoderOutput], tensor], ...] = (lambda: (postprocess_text,))(), pos_magic: str = '超高清、HDR 视频、环境光、杜比全景声、画面稳定、流畅动作、逼真的细节、专业级构图、超现实主义、自然、生动、超细节、清晰。', neg_magic: str = '画面暗、低分辨率、不良手、文本、缺少手指、多余的手指、裁剪、低质量、颗粒状、签名、水印、用户名、模糊。', timesteps_scale: bool = False, mask_strategy_file_path: str | None = None, STA_mode: STA_Mode = STA_INFERENCE, skip_time_steps: int = 15, dmd_denoising_steps: list[int] | None = None, ti2v_task: bool = False, boundary_ratio: float | None = None, precision: str = 'bf16')

Bases: PipelineConfig

Base configuration for StepVideo pipeline architecture.

fastvideo.configs.pipelines.WanI2V480PConfig dataclass
WanI2V480PConfig(model_path: str = '', pipeline_config_path: str | None = None, embedded_cfg_scale: float = 6.0, flow_shift: float | None = 3.0, disable_autocast: bool = False, dit_config: DiTConfig = WanVideoConfig(), dit_precision: str = 'bf16', vae_config: VAEConfig = WanVAEConfig(), vae_precision: str = 'fp32', vae_tiling: bool = False, vae_sp: bool = False, image_encoder_config: EncoderConfig = CLIPVisionConfig(), image_encoder_precision: str = 'fp32', text_encoder_configs: tuple[EncoderConfig, ...] = (lambda: (T5Config(),))(), text_encoder_precisions: tuple[str, ...] = (lambda: ('fp32',))(), preprocess_text_funcs: tuple[Callable[[str], str], ...] = (lambda: (preprocess_text,))(), postprocess_text_funcs: tuple[Callable[[BaseEncoderOutput], Tensor], ...] = (lambda: (t5_postprocess_text,))(), pos_magic: str | None = None, neg_magic: str | None = None, timesteps_scale: bool | None = None, mask_strategy_file_path: str | None = None, STA_mode: STA_Mode = STA_INFERENCE, skip_time_steps: int = 15, dmd_denoising_steps: list[int] | None = None, ti2v_task: bool = False, boundary_ratio: float | None = None, precision: str = 'bf16', warp_denoising_step: bool = True)

Bases: WanT2V480PConfig

Base configuration for Wan I2V 14B 480P pipeline architecture.

fastvideo.configs.pipelines.WanI2V720PConfig dataclass
WanI2V720PConfig(model_path: str = '', pipeline_config_path: str | None = None, embedded_cfg_scale: float = 6.0, flow_shift: float | None = 5.0, disable_autocast: bool = False, dit_config: DiTConfig = WanVideoConfig(), dit_precision: str = 'bf16', vae_config: VAEConfig = WanVAEConfig(), vae_precision: str = 'fp32', vae_tiling: bool = False, vae_sp: bool = False, image_encoder_config: EncoderConfig = CLIPVisionConfig(), image_encoder_precision: str = 'fp32', text_encoder_configs: tuple[EncoderConfig, ...] = (lambda: (T5Config(),))(), text_encoder_precisions: tuple[str, ...] = (lambda: ('fp32',))(), preprocess_text_funcs: tuple[Callable[[str], str], ...] = (lambda: (preprocess_text,))(), postprocess_text_funcs: tuple[Callable[[BaseEncoderOutput], Tensor], ...] = (lambda: (t5_postprocess_text,))(), pos_magic: str | None = None, neg_magic: str | None = None, timesteps_scale: bool | None = None, mask_strategy_file_path: str | None = None, STA_mode: STA_Mode = STA_INFERENCE, skip_time_steps: int = 15, dmd_denoising_steps: list[int] | None = None, ti2v_task: bool = False, boundary_ratio: float | None = None, precision: str = 'bf16', warp_denoising_step: bool = True)

Bases: WanI2V480PConfig

Base configuration for Wan I2V 14B 720P pipeline architecture.

fastvideo.configs.pipelines.WanT2V480PConfig dataclass
WanT2V480PConfig(model_path: str = '', pipeline_config_path: str | None = None, embedded_cfg_scale: float = 6.0, flow_shift: float | None = 3.0, disable_autocast: bool = False, dit_config: DiTConfig = WanVideoConfig(), dit_precision: str = 'bf16', vae_config: VAEConfig = WanVAEConfig(), vae_precision: str = 'fp32', vae_tiling: bool = False, vae_sp: bool = False, image_encoder_config: EncoderConfig = EncoderConfig(), image_encoder_precision: str = 'fp32', text_encoder_configs: tuple[EncoderConfig, ...] = (lambda: (T5Config(),))(), text_encoder_precisions: tuple[str, ...] = (lambda: ('fp32',))(), preprocess_text_funcs: tuple[Callable[[str], str], ...] = (lambda: (preprocess_text,))(), postprocess_text_funcs: tuple[Callable[[BaseEncoderOutput], Tensor], ...] = (lambda: (t5_postprocess_text,))(), pos_magic: str | None = None, neg_magic: str | None = None, timesteps_scale: bool | None = None, mask_strategy_file_path: str | None = None, STA_mode: STA_Mode = STA_INFERENCE, skip_time_steps: int = 15, dmd_denoising_steps: list[int] | None = None, ti2v_task: bool = False, boundary_ratio: float | None = None, precision: str = 'bf16', warp_denoising_step: bool = True)

Bases: PipelineConfig

Base configuration for Wan T2V 1.3B pipeline architecture.

fastvideo.configs.pipelines.WanT2V720PConfig dataclass
WanT2V720PConfig(model_path: str = '', pipeline_config_path: str | None = None, embedded_cfg_scale: float = 6.0, flow_shift: float | None = 5.0, disable_autocast: bool = False, dit_config: DiTConfig = WanVideoConfig(), dit_precision: str = 'bf16', vae_config: VAEConfig = WanVAEConfig(), vae_precision: str = 'fp32', vae_tiling: bool = False, vae_sp: bool = False, image_encoder_config: EncoderConfig = EncoderConfig(), image_encoder_precision: str = 'fp32', text_encoder_configs: tuple[EncoderConfig, ...] = (lambda: (T5Config(),))(), text_encoder_precisions: tuple[str, ...] = (lambda: ('fp32',))(), preprocess_text_funcs: tuple[Callable[[str], str], ...] = (lambda: (preprocess_text,))(), postprocess_text_funcs: tuple[Callable[[BaseEncoderOutput], Tensor], ...] = (lambda: (t5_postprocess_text,))(), pos_magic: str | None = None, neg_magic: str | None = None, timesteps_scale: bool | None = None, mask_strategy_file_path: str | None = None, STA_mode: STA_Mode = STA_INFERENCE, skip_time_steps: int = 15, dmd_denoising_steps: list[int] | None = None, ti2v_task: bool = False, boundary_ratio: float | None = None, precision: str = 'bf16', warp_denoising_step: bool = True)

Bases: WanT2V480PConfig

Base configuration for Wan T2V 14B 720P pipeline architecture.

Functions

fastvideo.configs.pipelines.get_pipeline_config_cls_from_name
get_pipeline_config_cls_from_name(pipeline_name_or_path: str) -> type[PipelineConfig]

Get the appropriate configuration class for a given pipeline name or path.

This function implements a multi-step lookup process to find the most suitable configuration class for a given pipeline. It follows this order: 1. Exact match in the PIPE_NAME_TO_CONFIG 2. Partial match in the PIPE_NAME_TO_CONFIG 3. Fallback to class name in the model_index.json 4. else raise an error

Parameters:

Name Type Description Default
pipeline_name_or_path str

The name or path of the pipeline. This can be: - A registered model ID (e.g., "FastVideo/FastHunyuan-diffusers") - A local path to a model directory - A model ID that will be downloaded

required

Returns:

Type Description
type[PipelineConfig]

Type[PipelineConfig]: The configuration class that best matches the pipeline. This will be one of: - A specific weight configuration class if an exact match is found - A fallback configuration class based on the pipeline architecture - The base PipelineConfig class if no matches are found

Note
  • For local paths, the function will verify the model configuration
  • For remote models, it will attempt to download the model index
  • Warning messages are logged when falling back to less specific configurations
Source code in fastvideo/configs/pipelines/registry.py
def get_pipeline_config_cls_from_name(
        pipeline_name_or_path: str) -> type[PipelineConfig]:
    """Get the appropriate configuration class for a given pipeline name or path.

    This function implements a multi-step lookup process to find the most suitable
    configuration class for a given pipeline. It follows this order:
    1. Exact match in the PIPE_NAME_TO_CONFIG
    2. Partial match in the PIPE_NAME_TO_CONFIG
    3. Fallback to class name in the model_index.json
    4. else raise an error

    Args:
        pipeline_name_or_path (str): The name or path of the pipeline. This can be:
            - A registered model ID (e.g., "FastVideo/FastHunyuan-diffusers")
            - A local path to a model directory
            - A model ID that will be downloaded

    Returns:
        Type[PipelineConfig]: The configuration class that best matches the pipeline.
            This will be one of:
            - A specific weight configuration class if an exact match is found
            - A fallback configuration class based on the pipeline architecture
            - The base PipelineConfig class if no matches are found

    Note:
        - For local paths, the function will verify the model configuration
        - For remote models, it will attempt to download the model index
        - Warning messages are logged when falling back to less specific configurations
    """

    pipeline_config_cls: type[PipelineConfig] | None = None

    # First try exact match for specific weights
    if pipeline_name_or_path in PIPE_NAME_TO_CONFIG:
        pipeline_config_cls = PIPE_NAME_TO_CONFIG[pipeline_name_or_path]

    # Try partial matches (for local paths that might include the weight ID)
    for registered_id, config_class in PIPE_NAME_TO_CONFIG.items():
        if registered_id in pipeline_name_or_path:
            pipeline_config_cls = config_class
            break

    # If no match, try to use the fallback config
    if pipeline_config_cls is None:
        if os.path.exists(pipeline_name_or_path):
            config = verify_model_config_and_directory(pipeline_name_or_path)
        else:
            config = maybe_download_model_index(pipeline_name_or_path)
        logger.warning(
            "Trying to use the config from the model_index.json. FastVideo may not correctly identify the optimal config for this model in this situation."
        )

        pipeline_name = config["_class_name"]
        # Try to determine pipeline architecture for fallback
        for pipeline_type, detector in PIPELINE_DETECTOR.items():
            if detector(pipeline_name.lower()):
                pipeline_config_cls = PIPELINE_FALLBACK_CONFIG.get(
                    pipeline_type)
                break

        if pipeline_config_cls is not None:
            logger.warning(
                "No match found for pipeline %s, using fallback config %s.",
                pipeline_name_or_path, pipeline_config_cls)

    if pipeline_config_cls is None:
        raise ValueError(
            f"No match found for pipeline {pipeline_name_or_path}, please check the pipeline name or path."
        )

    return pipeline_config_cls

Modules

fastvideo.configs.pipelines.base
Classes
fastvideo.configs.pipelines.base.PipelineConfig dataclass
PipelineConfig(model_path: str = '', pipeline_config_path: str | None = None, embedded_cfg_scale: float = 6.0, flow_shift: float | None = None, disable_autocast: bool = False, dit_config: DiTConfig = DiTConfig(), dit_precision: str = 'bf16', vae_config: VAEConfig = VAEConfig(), vae_precision: str = 'fp32', vae_tiling: bool = True, vae_sp: bool = True, image_encoder_config: EncoderConfig = EncoderConfig(), image_encoder_precision: str = 'fp32', text_encoder_configs: tuple[EncoderConfig, ...] = (lambda: (EncoderConfig(),))(), text_encoder_precisions: tuple[str, ...] = (lambda: ('fp32',))(), preprocess_text_funcs: tuple[Callable[[str], str], ...] = (lambda: (preprocess_text,))(), postprocess_text_funcs: tuple[Callable[[BaseEncoderOutput], tensor], ...] = (lambda: (postprocess_text,))(), pos_magic: str | None = None, neg_magic: str | None = None, timesteps_scale: bool | None = None, mask_strategy_file_path: str | None = None, STA_mode: STA_Mode = STA_INFERENCE, skip_time_steps: int = 15, dmd_denoising_steps: list[int] | None = None, ti2v_task: bool = False, boundary_ratio: float | None = None)

Base configuration for all pipeline architectures.

Functions
fastvideo.configs.pipelines.base.PipelineConfig.from_kwargs classmethod
from_kwargs(kwargs: dict[str, Any], config_cli_prefix: str = '') -> PipelineConfig

Load PipelineConfig from kwargs Dictionary. kwargs: dictionary of kwargs config_cli_prefix: prefix of CLI arguments for this PipelineConfig instance

Source code in fastvideo/configs/pipelines/base.py
@classmethod
def from_kwargs(cls,
                kwargs: dict[str, Any],
                config_cli_prefix: str = "") -> "PipelineConfig":
    """
    Load PipelineConfig from kwargs Dictionary.
    kwargs: dictionary of kwargs
    config_cli_prefix: prefix of CLI arguments for this PipelineConfig instance
    """
    from fastvideo.configs.pipelines.registry import (
        get_pipeline_config_cls_from_name)

    prefix_with_dot = f"{config_cli_prefix}." if (config_cli_prefix.strip()
                                                  != "") else ""
    model_path: str | None = kwargs.get(prefix_with_dot + 'model_path',
                                        None) or kwargs.get('model_path')
    pipeline_config_or_path: str | PipelineConfig | dict[
        str, Any] | None = kwargs.get(prefix_with_dot + 'pipeline_config',
                                      None) or kwargs.get('pipeline_config')
    if model_path is None:
        raise ValueError("model_path is required in kwargs")

    # 1. Get the pipeline config class from the registry
    pipeline_config_cls = get_pipeline_config_cls_from_name(model_path)

    # 2. Instantiate PipelineConfig
    if pipeline_config_cls is None:
        logger.warning(
            "Couldn't find pipeline config for %s. Using the default pipeline config.",
            model_path)
        pipeline_config = cls()
    else:
        pipeline_config = pipeline_config_cls()

    # 3. Load PipelineConfig from a json file or a PipelineConfig object if provided
    if isinstance(pipeline_config_or_path, str):
        pipeline_config.load_from_json(pipeline_config_or_path)
        kwargs[prefix_with_dot +
               'pipeline_config_path'] = pipeline_config_or_path
    elif isinstance(pipeline_config_or_path, PipelineConfig):
        pipeline_config = pipeline_config_or_path
    elif isinstance(pipeline_config_or_path, dict):
        pipeline_config.update_pipeline_config(pipeline_config_or_path)

    # 4. Update PipelineConfig from CLI arguments if provided
    kwargs[prefix_with_dot + 'model_path'] = model_path
    pipeline_config.update_config_from_dict(kwargs, config_cli_prefix)
    return pipeline_config
fastvideo.configs.pipelines.base.PipelineConfig.from_pretrained classmethod
from_pretrained(model_path: str) -> PipelineConfig

use the pipeline class setting from model_path to match the pipeline config

Source code in fastvideo/configs/pipelines/base.py
@classmethod
def from_pretrained(cls, model_path: str) -> "PipelineConfig":
    """
    use the pipeline class setting from model_path to match the pipeline config
    """
    from fastvideo.configs.pipelines.registry import (
        get_pipeline_config_cls_from_name)
    pipeline_config_cls = get_pipeline_config_cls_from_name(model_path)

    return cast(PipelineConfig, pipeline_config_cls(model_path=model_path))
fastvideo.configs.pipelines.base.STA_Mode

Bases: str, Enum

STA (Sliding Tile Attention) modes.

fastvideo.configs.pipelines.base.SlidingTileAttnConfig dataclass
SlidingTileAttnConfig(model_path: str = '', pipeline_config_path: str | None = None, embedded_cfg_scale: float = 6.0, flow_shift: float | None = None, disable_autocast: bool = False, dit_config: DiTConfig = DiTConfig(), dit_precision: str = 'bf16', vae_config: VAEConfig = VAEConfig(), vae_precision: str = 'fp32', vae_tiling: bool = True, vae_sp: bool = True, image_encoder_config: EncoderConfig = EncoderConfig(), image_encoder_precision: str = 'fp32', text_encoder_configs: tuple[EncoderConfig, ...] = (lambda: (EncoderConfig(),))(), text_encoder_precisions: tuple[str, ...] = (lambda: ('fp32',))(), preprocess_text_funcs: tuple[Callable[[str], str], ...] = (lambda: (preprocess_text,))(), postprocess_text_funcs: tuple[Callable[[BaseEncoderOutput], tensor], ...] = (lambda: (postprocess_text,))(), pos_magic: str | None = None, neg_magic: str | None = None, timesteps_scale: bool | None = None, mask_strategy_file_path: str | None = None, STA_mode: STA_Mode = STA_INFERENCE, skip_time_steps: int = 15, dmd_denoising_steps: list[int] | None = None, ti2v_task: bool = False, boundary_ratio: float | None = None, window_size: int = 16, stride: int = 8, height: int = 576, width: int = 1024, pad_to_square: bool = False, use_overlap_optimization: bool = True)

Bases: PipelineConfig

Configuration for sliding tile attention.

Functions
fastvideo.configs.pipelines.base.parse_int_list
parse_int_list(value: str) -> list[int]

Parse a comma-separated string of integers into a list.

Source code in fastvideo/configs/pipelines/base.py
def parse_int_list(value: str) -> list[int]:
    """Parse a comma-separated string of integers into a list."""
    if not value:
        return []
    return [int(x.strip()) for x in value.split(",")]
fastvideo.configs.pipelines.cosmos
Classes
fastvideo.configs.pipelines.cosmos.CosmosConfig dataclass
CosmosConfig(model_path: str = '', pipeline_config_path: str | None = None, embedded_cfg_scale: int = 6, flow_shift: float = 1.0, disable_autocast: bool = False, dit_config: DiTConfig = CosmosVideoConfig(), dit_precision: str = 'bf16', vae_config: VAEConfig = CosmosVAEConfig(), vae_precision: str = 'fp16', vae_tiling: bool = True, vae_sp: bool = True, image_encoder_config: EncoderConfig = EncoderConfig(), image_encoder_precision: str = 'fp32', text_encoder_configs: tuple[EncoderConfig, ...] = (lambda: (T5LargeConfig(),))(), text_encoder_precisions: tuple[str, ...] = (lambda: ('bf16',))(), preprocess_text_funcs: tuple[Callable[[str], str], ...] = (lambda: (preprocess_text,))(), postprocess_text_funcs: tuple[Callable[[BaseEncoderOutput], Tensor], ...] = (lambda: (t5_large_postprocess_text,))(), pos_magic: str | None = None, neg_magic: str | None = None, timesteps_scale: bool | None = None, mask_strategy_file_path: str | None = None, STA_mode: STA_Mode = STA_INFERENCE, skip_time_steps: int = 15, dmd_denoising_steps: list[int] | None = None, ti2v_task: bool = False, boundary_ratio: float | None = None, conditioning_strategy: str = 'frame_replace', min_num_conditional_frames: int = 1, max_num_conditional_frames: int = 2, sigma_conditional: float = 0.0001, sigma_data: float = 1.0, state_ch: int = 16, state_t: int = 24, text_encoder_class: str = 'T5')

Bases: PipelineConfig

Configuration for Cosmos2 Video2World pipeline matching diffusers.

Functions
fastvideo.configs.pipelines.cosmos.t5_large_postprocess_text
t5_large_postprocess_text(outputs: BaseEncoderOutput) -> Tensor

Postprocess T5 Large text encoder outputs for Cosmos pipeline.

Return raw last_hidden_state without truncation/padding.

Source code in fastvideo/configs/pipelines/cosmos.py
def t5_large_postprocess_text(outputs: BaseEncoderOutput) -> torch.Tensor:
    """Postprocess T5 Large text encoder outputs for Cosmos pipeline.

    Return raw last_hidden_state without truncation/padding.
    """
    hidden_state = outputs.last_hidden_state

    if hidden_state is None:
        raise ValueError("T5 Large outputs missing last_hidden_state")

    nan_count = torch.isnan(hidden_state).sum()
    if nan_count > 0:
        hidden_state = hidden_state.masked_fill(torch.isnan(hidden_state), 0.0)

    # Zero out embeddings beyond actual sequence length
    if outputs.attention_mask is not None:
        attention_mask = outputs.attention_mask
        lengths = attention_mask.sum(dim=1).cpu()
        for i, length in enumerate(lengths):
            hidden_state[i, length:] = 0

    return hidden_state
fastvideo.configs.pipelines.hunyuan
Classes
fastvideo.configs.pipelines.hunyuan.FastHunyuanConfig dataclass
FastHunyuanConfig(model_path: str = '', pipeline_config_path: str | None = None, embedded_cfg_scale: int = 6, flow_shift: int = 17, disable_autocast: bool = False, dit_config: DiTConfig = HunyuanVideoConfig(), dit_precision: str = 'bf16', vae_config: VAEConfig = HunyuanVAEConfig(), vae_precision: str = 'fp16', vae_tiling: bool = True, vae_sp: bool = True, image_encoder_config: EncoderConfig = EncoderConfig(), image_encoder_precision: str = 'fp32', text_encoder_configs: tuple[EncoderConfig, ...] = (lambda: (LlamaConfig(), CLIPTextConfig()))(), text_encoder_precisions: tuple[str, ...] = (lambda: ('fp16', 'fp16'))(), preprocess_text_funcs: tuple[Callable[[str], str], ...] = (lambda: (llama_preprocess_text, clip_preprocess_text))(), postprocess_text_funcs: tuple[Callable[[BaseEncoderOutput], tensor], ...] = (lambda: (llama_postprocess_text, clip_postprocess_text))(), pos_magic: str | None = None, neg_magic: str | None = None, timesteps_scale: bool | None = None, mask_strategy_file_path: str | None = None, STA_mode: STA_Mode = STA_INFERENCE, skip_time_steps: int = 15, dmd_denoising_steps: list[int] | None = None, ti2v_task: bool = False, boundary_ratio: float | None = None)

Bases: HunyuanConfig

Configuration specifically optimized for FastHunyuan weights.

fastvideo.configs.pipelines.hunyuan.HunyuanConfig dataclass
HunyuanConfig(model_path: str = '', pipeline_config_path: str | None = None, embedded_cfg_scale: int = 6, flow_shift: int = 7, disable_autocast: bool = False, dit_config: DiTConfig = HunyuanVideoConfig(), dit_precision: str = 'bf16', vae_config: VAEConfig = HunyuanVAEConfig(), vae_precision: str = 'fp16', vae_tiling: bool = True, vae_sp: bool = True, image_encoder_config: EncoderConfig = EncoderConfig(), image_encoder_precision: str = 'fp32', text_encoder_configs: tuple[EncoderConfig, ...] = (lambda: (LlamaConfig(), CLIPTextConfig()))(), text_encoder_precisions: tuple[str, ...] = (lambda: ('fp16', 'fp16'))(), preprocess_text_funcs: tuple[Callable[[str], str], ...] = (lambda: (llama_preprocess_text, clip_preprocess_text))(), postprocess_text_funcs: tuple[Callable[[BaseEncoderOutput], tensor], ...] = (lambda: (llama_postprocess_text, clip_postprocess_text))(), pos_magic: str | None = None, neg_magic: str | None = None, timesteps_scale: bool | None = None, mask_strategy_file_path: str | None = None, STA_mode: STA_Mode = STA_INFERENCE, skip_time_steps: int = 15, dmd_denoising_steps: list[int] | None = None, ti2v_task: bool = False, boundary_ratio: float | None = None)

Bases: PipelineConfig

Base configuration for HunYuan pipeline architecture.

fastvideo.configs.pipelines.registry

Registry for pipeline weight-specific configurations.

Classes
Functions
fastvideo.configs.pipelines.registry.get_pipeline_config_cls_from_name
get_pipeline_config_cls_from_name(pipeline_name_or_path: str) -> type[PipelineConfig]

Get the appropriate configuration class for a given pipeline name or path.

This function implements a multi-step lookup process to find the most suitable configuration class for a given pipeline. It follows this order: 1. Exact match in the PIPE_NAME_TO_CONFIG 2. Partial match in the PIPE_NAME_TO_CONFIG 3. Fallback to class name in the model_index.json 4. else raise an error

Parameters:

Name Type Description Default
pipeline_name_or_path str

The name or path of the pipeline. This can be: - A registered model ID (e.g., "FastVideo/FastHunyuan-diffusers") - A local path to a model directory - A model ID that will be downloaded

required

Returns:

Type Description
type[PipelineConfig]

Type[PipelineConfig]: The configuration class that best matches the pipeline. This will be one of: - A specific weight configuration class if an exact match is found - A fallback configuration class based on the pipeline architecture - The base PipelineConfig class if no matches are found

Note
  • For local paths, the function will verify the model configuration
  • For remote models, it will attempt to download the model index
  • Warning messages are logged when falling back to less specific configurations
Source code in fastvideo/configs/pipelines/registry.py
def get_pipeline_config_cls_from_name(
        pipeline_name_or_path: str) -> type[PipelineConfig]:
    """Get the appropriate configuration class for a given pipeline name or path.

    This function implements a multi-step lookup process to find the most suitable
    configuration class for a given pipeline. It follows this order:
    1. Exact match in the PIPE_NAME_TO_CONFIG
    2. Partial match in the PIPE_NAME_TO_CONFIG
    3. Fallback to class name in the model_index.json
    4. else raise an error

    Args:
        pipeline_name_or_path (str): The name or path of the pipeline. This can be:
            - A registered model ID (e.g., "FastVideo/FastHunyuan-diffusers")
            - A local path to a model directory
            - A model ID that will be downloaded

    Returns:
        Type[PipelineConfig]: The configuration class that best matches the pipeline.
            This will be one of:
            - A specific weight configuration class if an exact match is found
            - A fallback configuration class based on the pipeline architecture
            - The base PipelineConfig class if no matches are found

    Note:
        - For local paths, the function will verify the model configuration
        - For remote models, it will attempt to download the model index
        - Warning messages are logged when falling back to less specific configurations
    """

    pipeline_config_cls: type[PipelineConfig] | None = None

    # First try exact match for specific weights
    if pipeline_name_or_path in PIPE_NAME_TO_CONFIG:
        pipeline_config_cls = PIPE_NAME_TO_CONFIG[pipeline_name_or_path]

    # Try partial matches (for local paths that might include the weight ID)
    for registered_id, config_class in PIPE_NAME_TO_CONFIG.items():
        if registered_id in pipeline_name_or_path:
            pipeline_config_cls = config_class
            break

    # If no match, try to use the fallback config
    if pipeline_config_cls is None:
        if os.path.exists(pipeline_name_or_path):
            config = verify_model_config_and_directory(pipeline_name_or_path)
        else:
            config = maybe_download_model_index(pipeline_name_or_path)
        logger.warning(
            "Trying to use the config from the model_index.json. FastVideo may not correctly identify the optimal config for this model in this situation."
        )

        pipeline_name = config["_class_name"]
        # Try to determine pipeline architecture for fallback
        for pipeline_type, detector in PIPELINE_DETECTOR.items():
            if detector(pipeline_name.lower()):
                pipeline_config_cls = PIPELINE_FALLBACK_CONFIG.get(
                    pipeline_type)
                break

        if pipeline_config_cls is not None:
            logger.warning(
                "No match found for pipeline %s, using fallback config %s.",
                pipeline_name_or_path, pipeline_config_cls)

    if pipeline_config_cls is None:
        raise ValueError(
            f"No match found for pipeline {pipeline_name_or_path}, please check the pipeline name or path."
        )

    return pipeline_config_cls
fastvideo.configs.pipelines.stepvideo
Classes
fastvideo.configs.pipelines.stepvideo.StepVideoT2VConfig dataclass
StepVideoT2VConfig(model_path: str = '', pipeline_config_path: str | None = None, embedded_cfg_scale: float = 6.0, flow_shift: int = 13, disable_autocast: bool = False, dit_config: DiTConfig = StepVideoConfig(), dit_precision: str = 'bf16', vae_config: VAEConfig = StepVideoVAEConfig(), vae_precision: str = 'bf16', vae_tiling: bool = False, vae_sp: bool = False, image_encoder_config: EncoderConfig = EncoderConfig(), image_encoder_precision: str = 'fp32', text_encoder_configs: tuple[EncoderConfig, ...] = (lambda: (EncoderConfig(),))(), text_encoder_precisions: tuple[str, ...] = (lambda: ('fp32',))(), preprocess_text_funcs: tuple[Callable[[str], str], ...] = (lambda: (preprocess_text,))(), postprocess_text_funcs: tuple[Callable[[BaseEncoderOutput], tensor], ...] = (lambda: (postprocess_text,))(), pos_magic: str = '超高清、HDR 视频、环境光、杜比全景声、画面稳定、流畅动作、逼真的细节、专业级构图、超现实主义、自然、生动、超细节、清晰。', neg_magic: str = '画面暗、低分辨率、不良手、文本、缺少手指、多余的手指、裁剪、低质量、颗粒状、签名、水印、用户名、模糊。', timesteps_scale: bool = False, mask_strategy_file_path: str | None = None, STA_mode: STA_Mode = STA_INFERENCE, skip_time_steps: int = 15, dmd_denoising_steps: list[int] | None = None, ti2v_task: bool = False, boundary_ratio: float | None = None, precision: str = 'bf16')

Bases: PipelineConfig

Base configuration for StepVideo pipeline architecture.

fastvideo.configs.pipelines.wan
Classes
fastvideo.configs.pipelines.wan.FastWan2_1_T2V_480P_Config dataclass
FastWan2_1_T2V_480P_Config(model_path: str = '', pipeline_config_path: str | None = None, embedded_cfg_scale: float = 6.0, flow_shift: float | None = 8.0, disable_autocast: bool = False, dit_config: DiTConfig = WanVideoConfig(), dit_precision: str = 'bf16', vae_config: VAEConfig = WanVAEConfig(), vae_precision: str = 'fp32', vae_tiling: bool = False, vae_sp: bool = False, image_encoder_config: EncoderConfig = EncoderConfig(), image_encoder_precision: str = 'fp32', text_encoder_configs: tuple[EncoderConfig, ...] = (lambda: (T5Config(),))(), text_encoder_precisions: tuple[str, ...] = (lambda: ('fp32',))(), preprocess_text_funcs: tuple[Callable[[str], str], ...] = (lambda: (preprocess_text,))(), postprocess_text_funcs: tuple[Callable[[BaseEncoderOutput], Tensor], ...] = (lambda: (t5_postprocess_text,))(), pos_magic: str | None = None, neg_magic: str | None = None, timesteps_scale: bool | None = None, mask_strategy_file_path: str | None = None, STA_mode: STA_Mode = STA_INFERENCE, skip_time_steps: int = 15, dmd_denoising_steps: list[int] | None = (lambda: [1000, 757, 522])(), ti2v_task: bool = False, boundary_ratio: float | None = None, precision: str = 'bf16', warp_denoising_step: bool = True)

Bases: WanT2V480PConfig

Base configuration for FastWan T2V 1.3B 480P pipeline architecture with DMD

fastvideo.configs.pipelines.wan.WANV2VConfig dataclass
WANV2VConfig(model_path: str = '', pipeline_config_path: str | None = None, embedded_cfg_scale: float = 6.0, flow_shift: float | None = 3.0, disable_autocast: bool = False, dit_config: DiTConfig = WanVideoConfig(), dit_precision: str = 'bf16', vae_config: VAEConfig = WanVAEConfig(), vae_precision: str = 'fp32', vae_tiling: bool = False, vae_sp: bool = False, image_encoder_config: EncoderConfig = WAN2_1ControlCLIPVisionConfig(), image_encoder_precision: str = 'bf16', text_encoder_configs: tuple[EncoderConfig, ...] = (lambda: (T5Config(),))(), text_encoder_precisions: tuple[str, ...] = (lambda: ('fp32',))(), preprocess_text_funcs: tuple[Callable[[str], str], ...] = (lambda: (preprocess_text,))(), postprocess_text_funcs: tuple[Callable[[BaseEncoderOutput], Tensor], ...] = (lambda: (t5_postprocess_text,))(), pos_magic: str | None = None, neg_magic: str | None = None, timesteps_scale: bool | None = None, mask_strategy_file_path: str | None = None, STA_mode: STA_Mode = STA_INFERENCE, skip_time_steps: int = 15, dmd_denoising_steps: list[int] | None = None, ti2v_task: bool = False, boundary_ratio: float | None = None, precision: str = 'bf16', warp_denoising_step: bool = True)

Bases: WanI2V480PConfig

Configuration for WAN2.1 1.3B Control pipeline.

fastvideo.configs.pipelines.wan.WanI2V480PConfig dataclass
WanI2V480PConfig(model_path: str = '', pipeline_config_path: str | None = None, embedded_cfg_scale: float = 6.0, flow_shift: float | None = 3.0, disable_autocast: bool = False, dit_config: DiTConfig = WanVideoConfig(), dit_precision: str = 'bf16', vae_config: VAEConfig = WanVAEConfig(), vae_precision: str = 'fp32', vae_tiling: bool = False, vae_sp: bool = False, image_encoder_config: EncoderConfig = CLIPVisionConfig(), image_encoder_precision: str = 'fp32', text_encoder_configs: tuple[EncoderConfig, ...] = (lambda: (T5Config(),))(), text_encoder_precisions: tuple[str, ...] = (lambda: ('fp32',))(), preprocess_text_funcs: tuple[Callable[[str], str], ...] = (lambda: (preprocess_text,))(), postprocess_text_funcs: tuple[Callable[[BaseEncoderOutput], Tensor], ...] = (lambda: (t5_postprocess_text,))(), pos_magic: str | None = None, neg_magic: str | None = None, timesteps_scale: bool | None = None, mask_strategy_file_path: str | None = None, STA_mode: STA_Mode = STA_INFERENCE, skip_time_steps: int = 15, dmd_denoising_steps: list[int] | None = None, ti2v_task: bool = False, boundary_ratio: float | None = None, precision: str = 'bf16', warp_denoising_step: bool = True)

Bases: WanT2V480PConfig

Base configuration for Wan I2V 14B 480P pipeline architecture.

fastvideo.configs.pipelines.wan.WanI2V720PConfig dataclass
WanI2V720PConfig(model_path: str = '', pipeline_config_path: str | None = None, embedded_cfg_scale: float = 6.0, flow_shift: float | None = 5.0, disable_autocast: bool = False, dit_config: DiTConfig = WanVideoConfig(), dit_precision: str = 'bf16', vae_config: VAEConfig = WanVAEConfig(), vae_precision: str = 'fp32', vae_tiling: bool = False, vae_sp: bool = False, image_encoder_config: EncoderConfig = CLIPVisionConfig(), image_encoder_precision: str = 'fp32', text_encoder_configs: tuple[EncoderConfig, ...] = (lambda: (T5Config(),))(), text_encoder_precisions: tuple[str, ...] = (lambda: ('fp32',))(), preprocess_text_funcs: tuple[Callable[[str], str], ...] = (lambda: (preprocess_text,))(), postprocess_text_funcs: tuple[Callable[[BaseEncoderOutput], Tensor], ...] = (lambda: (t5_postprocess_text,))(), pos_magic: str | None = None, neg_magic: str | None = None, timesteps_scale: bool | None = None, mask_strategy_file_path: str | None = None, STA_mode: STA_Mode = STA_INFERENCE, skip_time_steps: int = 15, dmd_denoising_steps: list[int] | None = None, ti2v_task: bool = False, boundary_ratio: float | None = None, precision: str = 'bf16', warp_denoising_step: bool = True)

Bases: WanI2V480PConfig

Base configuration for Wan I2V 14B 720P pipeline architecture.

fastvideo.configs.pipelines.wan.WanT2V480PConfig dataclass
WanT2V480PConfig(model_path: str = '', pipeline_config_path: str | None = None, embedded_cfg_scale: float = 6.0, flow_shift: float | None = 3.0, disable_autocast: bool = False, dit_config: DiTConfig = WanVideoConfig(), dit_precision: str = 'bf16', vae_config: VAEConfig = WanVAEConfig(), vae_precision: str = 'fp32', vae_tiling: bool = False, vae_sp: bool = False, image_encoder_config: EncoderConfig = EncoderConfig(), image_encoder_precision: str = 'fp32', text_encoder_configs: tuple[EncoderConfig, ...] = (lambda: (T5Config(),))(), text_encoder_precisions: tuple[str, ...] = (lambda: ('fp32',))(), preprocess_text_funcs: tuple[Callable[[str], str], ...] = (lambda: (preprocess_text,))(), postprocess_text_funcs: tuple[Callable[[BaseEncoderOutput], Tensor], ...] = (lambda: (t5_postprocess_text,))(), pos_magic: str | None = None, neg_magic: str | None = None, timesteps_scale: bool | None = None, mask_strategy_file_path: str | None = None, STA_mode: STA_Mode = STA_INFERENCE, skip_time_steps: int = 15, dmd_denoising_steps: list[int] | None = None, ti2v_task: bool = False, boundary_ratio: float | None = None, precision: str = 'bf16', warp_denoising_step: bool = True)

Bases: PipelineConfig

Base configuration for Wan T2V 1.3B pipeline architecture.

fastvideo.configs.pipelines.wan.WanT2V720PConfig dataclass
WanT2V720PConfig(model_path: str = '', pipeline_config_path: str | None = None, embedded_cfg_scale: float = 6.0, flow_shift: float | None = 5.0, disable_autocast: bool = False, dit_config: DiTConfig = WanVideoConfig(), dit_precision: str = 'bf16', vae_config: VAEConfig = WanVAEConfig(), vae_precision: str = 'fp32', vae_tiling: bool = False, vae_sp: bool = False, image_encoder_config: EncoderConfig = EncoderConfig(), image_encoder_precision: str = 'fp32', text_encoder_configs: tuple[EncoderConfig, ...] = (lambda: (T5Config(),))(), text_encoder_precisions: tuple[str, ...] = (lambda: ('fp32',))(), preprocess_text_funcs: tuple[Callable[[str], str], ...] = (lambda: (preprocess_text,))(), postprocess_text_funcs: tuple[Callable[[BaseEncoderOutput], Tensor], ...] = (lambda: (t5_postprocess_text,))(), pos_magic: str | None = None, neg_magic: str | None = None, timesteps_scale: bool | None = None, mask_strategy_file_path: str | None = None, STA_mode: STA_Mode = STA_INFERENCE, skip_time_steps: int = 15, dmd_denoising_steps: list[int] | None = None, ti2v_task: bool = False, boundary_ratio: float | None = None, precision: str = 'bf16', warp_denoising_step: bool = True)

Bases: WanT2V480PConfig

Base configuration for Wan T2V 14B 720P pipeline architecture.

fastvideo.configs.sample

Classes

fastvideo.configs.sample.SamplingParam dataclass
SamplingParam(data_type: str = 'video', image_path: str | None = None, video_path: str | None = None, prompt: str | list[str] | None = None, negative_prompt: str = 'Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards', prompt_path: str | None = None, output_path: str = 'outputs/', output_video_name: str | None = None, num_videos_per_prompt: int = 1, seed: int = 1024, num_frames: int = 125, num_frames_round_down: bool = False, height: int = 720, width: int = 1280, fps: int = 24, num_inference_steps: int = 50, guidance_scale: float = 1.0, guidance_rescale: float = 0.0, boundary_ratio: float | None = None, enable_teacache: bool = False, save_video: bool = True, return_frames: bool = False, return_trajectory_latents: bool = False, return_trajectory_decoded: bool = False)

Sampling parameters for video generation.

Functions
fastvideo.configs.sample.SamplingParam.add_cli_args staticmethod
add_cli_args(parser: Any) -> Any

Add CLI arguments for SamplingParam fields

Source code in fastvideo/configs/sample/base.py
@staticmethod
def add_cli_args(parser: Any) -> Any:
    """Add CLI arguments for SamplingParam fields"""
    parser.add_argument(
        "--prompt",
        type=str,
        default=SamplingParam.prompt,
        help="Text prompt for video generation",
    )
    parser.add_argument(
        "--negative-prompt",
        type=str,
        default=SamplingParam.negative_prompt,
        help="Negative text prompt for video generation",
    )
    parser.add_argument(
        "--prompt-path",
        type=str,
        default=SamplingParam.prompt_path,
        help="Path to a text file containing the prompt",
    )
    parser.add_argument(
        "--output-path",
        type=str,
        default=SamplingParam.output_path,
        help="Path to save the generated video",
    )
    parser.add_argument(
        "--output-video-name",
        type=str,
        default=SamplingParam.output_video_name,
        help="Name of the output video",
    )
    parser.add_argument(
        "--num-videos-per-prompt",
        type=int,
        default=SamplingParam.num_videos_per_prompt,
        help="Number of videos to generate per prompt",
    )
    parser.add_argument(
        "--seed",
        type=int,
        default=SamplingParam.seed,
        help="Random seed for generation",
    )
    parser.add_argument(
        "--num-frames",
        type=int,
        default=SamplingParam.num_frames,
        help="Number of frames to generate",
    )
    parser.add_argument(
        "--height",
        type=int,
        default=SamplingParam.height,
        help="Height of generated video",
    )
    parser.add_argument(
        "--width",
        type=int,
        default=SamplingParam.width,
        help="Width of generated video",
    )
    parser.add_argument(
        "--fps",
        type=int,
        default=SamplingParam.fps,
        help="Frames per second for saved video",
    )
    parser.add_argument(
        "--num-inference-steps",
        type=int,
        default=SamplingParam.num_inference_steps,
        help="Number of denoising steps",
    )
    parser.add_argument(
        "--guidance-scale",
        type=float,
        default=SamplingParam.guidance_scale,
        help="Classifier-free guidance scale",
    )
    parser.add_argument(
        "--guidance-rescale",
        type=float,
        default=SamplingParam.guidance_rescale,
        help="Guidance rescale factor",
    )
    parser.add_argument(
        "--boundary-ratio",
        type=float,
        default=SamplingParam.boundary_ratio,
        help="Boundary timestep ratio",
    )
    parser.add_argument(
        "--save-video",
        action="store_true",
        default=SamplingParam.save_video,
        help="Whether to save the video to disk",
    )
    parser.add_argument(
        "--no-save-video",
        action="store_false",
        dest="save_video",
        help="Don't save the video to disk",
    )
    parser.add_argument(
        "--return-frames",
        action="store_true",
        default=SamplingParam.return_frames,
        help="Whether to return the raw frames",
    )
    parser.add_argument(
        "--image-path",
        type=str,
        default=SamplingParam.image_path,
        help="Path to input image for image-to-video generation",
    )
    parser.add_argument(
        "--video_path",
        type=str,
        default=SamplingParam.video_path,
        help="Path to input video for video-to-video generation",
    )
    parser.add_argument(
        "--moba-config-path",
        type=str,
        default=None,
        help=
        "Path to a JSON file containing V-MoBA specific configurations.",
    )
    parser.add_argument(
        "--return-trajectory-latents",
        action="store_true",
        default=SamplingParam.return_trajectory_latents,
        help="Whether to return the trajectory",
    )
    parser.add_argument(
        "--return-trajectory-decoded",
        action="store_true",
        default=SamplingParam.return_trajectory_decoded,
        help="Whether to return the decoded trajectory",
    )
    return parser

Modules

fastvideo.configs.sample.base
Classes
fastvideo.configs.sample.base.SamplingParam dataclass
SamplingParam(data_type: str = 'video', image_path: str | None = None, video_path: str | None = None, prompt: str | list[str] | None = None, negative_prompt: str = 'Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards', prompt_path: str | None = None, output_path: str = 'outputs/', output_video_name: str | None = None, num_videos_per_prompt: int = 1, seed: int = 1024, num_frames: int = 125, num_frames_round_down: bool = False, height: int = 720, width: int = 1280, fps: int = 24, num_inference_steps: int = 50, guidance_scale: float = 1.0, guidance_rescale: float = 0.0, boundary_ratio: float | None = None, enable_teacache: bool = False, save_video: bool = True, return_frames: bool = False, return_trajectory_latents: bool = False, return_trajectory_decoded: bool = False)

Sampling parameters for video generation.

Functions
fastvideo.configs.sample.base.SamplingParam.add_cli_args staticmethod
add_cli_args(parser: Any) -> Any

Add CLI arguments for SamplingParam fields

Source code in fastvideo/configs/sample/base.py
@staticmethod
def add_cli_args(parser: Any) -> Any:
    """Add CLI arguments for SamplingParam fields"""
    parser.add_argument(
        "--prompt",
        type=str,
        default=SamplingParam.prompt,
        help="Text prompt for video generation",
    )
    parser.add_argument(
        "--negative-prompt",
        type=str,
        default=SamplingParam.negative_prompt,
        help="Negative text prompt for video generation",
    )
    parser.add_argument(
        "--prompt-path",
        type=str,
        default=SamplingParam.prompt_path,
        help="Path to a text file containing the prompt",
    )
    parser.add_argument(
        "--output-path",
        type=str,
        default=SamplingParam.output_path,
        help="Path to save the generated video",
    )
    parser.add_argument(
        "--output-video-name",
        type=str,
        default=SamplingParam.output_video_name,
        help="Name of the output video",
    )
    parser.add_argument(
        "--num-videos-per-prompt",
        type=int,
        default=SamplingParam.num_videos_per_prompt,
        help="Number of videos to generate per prompt",
    )
    parser.add_argument(
        "--seed",
        type=int,
        default=SamplingParam.seed,
        help="Random seed for generation",
    )
    parser.add_argument(
        "--num-frames",
        type=int,
        default=SamplingParam.num_frames,
        help="Number of frames to generate",
    )
    parser.add_argument(
        "--height",
        type=int,
        default=SamplingParam.height,
        help="Height of generated video",
    )
    parser.add_argument(
        "--width",
        type=int,
        default=SamplingParam.width,
        help="Width of generated video",
    )
    parser.add_argument(
        "--fps",
        type=int,
        default=SamplingParam.fps,
        help="Frames per second for saved video",
    )
    parser.add_argument(
        "--num-inference-steps",
        type=int,
        default=SamplingParam.num_inference_steps,
        help="Number of denoising steps",
    )
    parser.add_argument(
        "--guidance-scale",
        type=float,
        default=SamplingParam.guidance_scale,
        help="Classifier-free guidance scale",
    )
    parser.add_argument(
        "--guidance-rescale",
        type=float,
        default=SamplingParam.guidance_rescale,
        help="Guidance rescale factor",
    )
    parser.add_argument(
        "--boundary-ratio",
        type=float,
        default=SamplingParam.boundary_ratio,
        help="Boundary timestep ratio",
    )
    parser.add_argument(
        "--save-video",
        action="store_true",
        default=SamplingParam.save_video,
        help="Whether to save the video to disk",
    )
    parser.add_argument(
        "--no-save-video",
        action="store_false",
        dest="save_video",
        help="Don't save the video to disk",
    )
    parser.add_argument(
        "--return-frames",
        action="store_true",
        default=SamplingParam.return_frames,
        help="Whether to return the raw frames",
    )
    parser.add_argument(
        "--image-path",
        type=str,
        default=SamplingParam.image_path,
        help="Path to input image for image-to-video generation",
    )
    parser.add_argument(
        "--video_path",
        type=str,
        default=SamplingParam.video_path,
        help="Path to input video for video-to-video generation",
    )
    parser.add_argument(
        "--moba-config-path",
        type=str,
        default=None,
        help=
        "Path to a JSON file containing V-MoBA specific configurations.",
    )
    parser.add_argument(
        "--return-trajectory-latents",
        action="store_true",
        default=SamplingParam.return_trajectory_latents,
        help="Whether to return the trajectory",
    )
    parser.add_argument(
        "--return-trajectory-decoded",
        action="store_true",
        default=SamplingParam.return_trajectory_decoded,
        help="Whether to return the decoded trajectory",
    )
    return parser
Functions
fastvideo.configs.sample.registry
Classes
Functions
fastvideo.configs.sample.registry.get_sampling_param_cls_for_name
get_sampling_param_cls_for_name(pipeline_name_or_path: str) -> Any | None

Get the appropriate sampling param for specific pretrained weights.

Source code in fastvideo/configs/sample/registry.py
def get_sampling_param_cls_for_name(pipeline_name_or_path: str) -> Any | None:
    """Get the appropriate sampling param for specific pretrained weights."""

    if os.path.exists(pipeline_name_or_path):
        config = verify_model_config_and_directory(pipeline_name_or_path)
        logger.warning(
            "FastVideo may not correctly identify the optimal sampling param for this model, as the local directory may have been renamed."
        )
    else:
        config = maybe_download_model_index(pipeline_name_or_path)

    pipeline_name = config["_class_name"]

    # First try exact match for specific weights
    if pipeline_name_or_path in SAMPLING_PARAM_REGISTRY:
        return SAMPLING_PARAM_REGISTRY[pipeline_name_or_path]

    # Try partial matches (for local paths that might include the weight ID)
    for registered_id, config_class in SAMPLING_PARAM_REGISTRY.items():
        if registered_id in pipeline_name_or_path:
            return config_class

    # If no match, try to use the fallback config
    fallback_config = None
    # Try to determine pipeline architecture for fallback
    for pipeline_type, detector in SAMPLING_PARAM_DETECTOR.items():
        if detector(pipeline_name.lower()):
            fallback_config = SAMPLING_FALLBACK_PARAM.get(pipeline_type)
            break

    logger.warning(
        "No match found for pipeline %s, using fallback sampling param %s.",
        pipeline_name_or_path, fallback_config)
    return fallback_config
fastvideo.configs.sample.wan
Classes
fastvideo.configs.sample.wan.Wan2_1_Fun_1_3B_InP_SamplingParam dataclass
Wan2_1_Fun_1_3B_InP_SamplingParam(data_type: str = 'video', image_path: str | None = None, video_path: str | None = None, prompt: str | list[str] | None = None, negative_prompt: str | None = '色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走', prompt_path: str | None = None, output_path: str = 'outputs/', output_video_name: str | None = None, num_videos_per_prompt: int = 1, seed: int = 1024, num_frames: int = 81, num_frames_round_down: bool = False, height: int = 480, width: int = 832, fps: int = 16, num_inference_steps: int = 50, guidance_scale: float = 6.0, guidance_rescale: float = 0.0, boundary_ratio: float | None = None, enable_teacache: bool = False, save_video: bool = True, return_frames: bool = False, return_trajectory_latents: bool = False, return_trajectory_decoded: bool = False)

Bases: SamplingParam

Sampling parameters for Wan2.1 Fun 1.3B InP model.

fastvideo.configs.sample.wan.Wan2_2_Base_SamplingParam dataclass
Wan2_2_Base_SamplingParam(data_type: str = 'video', image_path: str | None = None, video_path: str | None = None, prompt: str | list[str] | None = None, negative_prompt: str | None = '色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走', prompt_path: str | None = None, output_path: str = 'outputs/', output_video_name: str | None = None, num_videos_per_prompt: int = 1, seed: int = 1024, num_frames: int = 125, num_frames_round_down: bool = False, height: int = 720, width: int = 1280, fps: int = 24, num_inference_steps: int = 50, guidance_scale: float = 1.0, guidance_rescale: float = 0.0, boundary_ratio: float | None = None, enable_teacache: bool = False, save_video: bool = True, return_frames: bool = False, return_trajectory_latents: bool = False, return_trajectory_decoded: bool = False)

Bases: SamplingParam

Sampling parameters for Wan2.2 TI2V 5B model.

fastvideo.configs.sample.wan.Wan2_2_TI2V_5B_SamplingParam dataclass
Wan2_2_TI2V_5B_SamplingParam(data_type: str = 'video', image_path: str | None = None, video_path: str | None = None, prompt: str | list[str] | None = None, negative_prompt: str | None = '色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走', prompt_path: str | None = None, output_path: str = 'outputs/', output_video_name: str | None = None, num_videos_per_prompt: int = 1, seed: int = 1024, num_frames: int = 121, num_frames_round_down: bool = False, height: int = 704, width: int = 1280, fps: int = 24, num_inference_steps: int = 50, guidance_scale: float = 5.0, guidance_rescale: float = 0.0, boundary_ratio: float | None = None, enable_teacache: bool = False, save_video: bool = True, return_frames: bool = False, return_trajectory_latents: bool = False, return_trajectory_decoded: bool = False)

Bases: Wan2_2_Base_SamplingParam

Sampling parameters for Wan2.2 TI2V 5B model.

fastvideo.configs.utils

Functions

fastvideo.configs.utils.clean_cli_args
clean_cli_args(args: Namespace) -> dict[str, Any]

Clean the arguments by removing the ones that not explicitly provided by the user.

Source code in fastvideo/configs/utils.py
def clean_cli_args(args: argparse.Namespace) -> dict[str, Any]:
    """
    Clean the arguments by removing the ones that not explicitly provided by the user.
    """
    provided_args = {}
    for k, v in vars(args).items():
        if (v is not None and hasattr(args, '_provided')
                and k in args._provided):
            provided_args[k] = v

    return provided_args
fastvideo.configs.utils.update_config_from_args
update_config_from_args(config: Any, args_dict: dict[str, Any], prefix: str = '', pop_args: bool = False) -> bool

Update configuration object from arguments dictionary.

Parameters:

Name Type Description Default
config Any

The configuration object to update

required
args_dict dict[str, Any]

Dictionary containing arguments

required
prefix str

Prefix for the configuration parameters in the args_dict. If None, assumes direct attribute mapping without prefix.

''
Source code in fastvideo/configs/utils.py
def update_config_from_args(config: Any,
                            args_dict: dict[str, Any],
                            prefix: str = "",
                            pop_args: bool = False) -> bool:
    """
    Update configuration object from arguments dictionary.

    Args:
        config: The configuration object to update
        args_dict: Dictionary containing arguments
        prefix: Prefix for the configuration parameters in the args_dict.
               If None, assumes direct attribute mapping without prefix.
    """
    # Handle top-level attributes (no prefix)
    args_not_to_remove = [
        'model_path',
    ]
    args_to_remove = []
    if prefix.strip() == "":
        for key, value in args_dict.items():
            if hasattr(config, key) and value is not None:
                if key == "text_encoder_precisions" and isinstance(value, list):
                    setattr(config, key, tuple(value))
                else:
                    setattr(config, key, value)
                if pop_args:
                    args_to_remove.append(key)
    else:
        # Handle nested attributes with prefix
        prefix_with_dot = f"{prefix}."
        for key, value in args_dict.items():
            if key.startswith(prefix_with_dot) and value is not None:
                attr_name = key[len(prefix_with_dot):]
                if hasattr(config, attr_name):
                    setattr(config, attr_name, value)
                if pop_args:
                    args_to_remove.append(key)

    if pop_args:
        for key in args_to_remove:
            if key not in args_not_to_remove:
                args_dict.pop(key)

    return len(args_to_remove) > 0