Source examples/inference/basic.
Basic Video Generation Tutorial#
The VideoGenerator
class provides the primary Python interface for doing offline video generation, which is interacting with a diffusion pipeline without using a separate inference api server.
Requirements#
At least a single NVIDIA GPU with CUDA 12.4.
Python 3.10-3.12
Installation#
If you have not installed FastVideo, please following these instructions first.
Usage#
The first script in this example shows the most basic usage of FastVideo. If you are new to Python and FastVideo, you should start here.
# if you have not cloned the directory:
git clone https://github.com/hao-ai-lab/FastVideo.git && cd FastVideo
python examples/inference/basic/basic.py
Basic Walkthrough#
All you need to generate videos using multi-gpus from state-of-the-art diffusion pipelines is the following few lines!
from fastvideo import VideoGenerator
def main():
generator = VideoGenerator.from_pretrained(
"Wan-AI/Wan2.1-T2V-1.3B-Diffusers",
num_gpus=1,
)
prompt = ("A curious raccoon peers through a vibrant field of yellow sunflowers, its eyes "
"wide with interest. The playful yet serene atmosphere is complemented by soft "
"natural light filtering through the petals. Mid-shot, warm and cheerful tones.")
video = generator.generate_video(prompt)
if __name__ == "__main__":
main()
Example materials#
basic.py
from fastvideo import VideoGenerator
# from fastvideo.v1.configs.sample import SamplingParam
def main():
# FastVideo will automatically use the optimal default arguments for the
# model.
# If a local path is provided, FastVideo will make a best effort
# attempt to identify the optimal arguments.
generator = VideoGenerator.from_pretrained(
"Wan-AI/Wan2.1-T2V-1.3B-Diffusers",
# if num_gpus > 1, FastVideo will automatically handle distributed setup
num_gpus=1,
)
# sampling_param = SamplingParam.from_pretrained("Wan-AI/Wan2.1-T2V-1.3B-Diffusers")
# sampling_param.num_frames = 45
# sampling_param.image_path = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/astronaut.jpg"
# Generate videos with the same simple API, regardless of GPU count
prompt = (
"A curious raccoon peers through a vibrant field of yellow sunflowers, its eyes "
"wide with interest. The playful yet serene atmosphere is complemented by soft "
"natural light filtering through the petals. Mid-shot, warm and cheerful tones."
)
video = generator.generate_video(prompt)
# video = generator.generate_video(prompt, sampling_param=sampling_param, output_path="wan_t2v_videos/")
# Generate another video with a different prompt, without reloading the
# model!
prompt2 = (
"A majestic lion strides across the golden savanna, its powerful frame "
"glistening under the warm afternoon sun. The tall grass ripples gently in "
"the breeze, enhancing the lion's commanding presence. The tone is vibrant, "
"embodying the raw energy of the wild. Low angle, steady tracking shot, "
"cinematic.")
video2 = generator.generate_video(prompt2)
if __name__ == "__main__":
main()
default_args.py
from fastvideo import VideoGenerator
def main():
# This is the config class for the model initialization
config = PipelineConfig.from_pretrained("FastVideo/FastHunyuan-Diffusers")
# can be used to dump the config to a yaml file
config.dump_to_yaml("config.yaml")
print(config)
# {
# 'vae_config': {
# 'scale_factor': 8,
# 'sp': True,
# 'tiling': True,
# 'precision': 'fp16'
# },
# 'text_encoder_config': {
# 'precision': 'fp16'
# },
# 'dit_config': {
# 'precision': 'fp16'
# },
# 'inference_args': {
# 'guidance_scale': 7.5,
# 'num_inference_steps': 5,
# 'seed': 1024,
# 'guidance_rescale': 0.0,
# 'flow_shift': 17,
# 'num_inference_steps': 5,
# }
# }
config.vae_config.scale_factor = 16
# FastVideo will automatically used the optimal default arguments for the model
# If a local path is provided, FastVideo will make a best effort attempt to
# identify the optimal arguments.
generator = VideoGenerator.from_pretrained(
"FastVideo/FastHunyuan-Diffusers",
num_gpus=4,
config=config,
# or
config_path="config.yaml",
)
sampling_param = SamplingParam.from_pretrained(
"FastVideo/FastHunyuan-Diffusers")
sampling_param.num_inference_steps = 5
# Generate videos with the same simple API, regardless of GPU count
prompt = "A beautiful woman in a red dress walking down a street"
video = generator.generate_video(prompt,
sampling_param=sampling_param,
num_inference_steps=6)
video2 = generator.generate_video(prompt2)
prompt2 = "A beautiful woman in a blue dress walking down a street"
if __name__ == "__main__":
main()