preprocess_stages ¶

Classes¶

fastvideo.pipelines.preprocess.preprocess_stages.TextTransformStage ¶

TextTransformStage(cfg_uncondition_drop_rate: float, seed: int)

Bases: PipelineStage

Process text data according to the cfg rate.

Source code in fastvideo/pipelines/preprocess/preprocess_stages.py

def __init__(self, cfg_uncondition_drop_rate: float, seed: int) -> None:
    self.cfg_rate = cfg_uncondition_drop_rate
    self.rng = random.Random(seed)

fastvideo.pipelines.preprocess.preprocess_stages.VideoTransformStage ¶

VideoTransformStage(train_fps: int, num_frames: int, max_height: int, max_width: int, do_temporal_sample: bool)

Bases: PipelineStage

Crop a video in temporal dimension.

Source code in fastvideo/pipelines/preprocess/preprocess_stages.py

def __init__(self, train_fps: int, num_frames: int, max_height: int,
             max_width: int, do_temporal_sample: bool) -> None:
    self.train_fps = train_fps
    self.num_frames = num_frames
    if do_temporal_sample:
        self.temporal_sample_fn: Callable | None = TemporalRandomCrop(
            num_frames)
    else:
        self.temporal_sample_fn = None

    self.video_transform = transforms.Compose([
        CenterCropResizeVideo((max_height, max_width)),
    ])