Skip to content

quantization

Classes

Functions

fastvideo.layers.quantization.register_quantization_config

register_quantization_config(quantization: str)

Register a customized vllm quantization config.

When a quantization method is not supported by vllm, you can register a customized quantization config to support it.

Parameters:

Name Type Description Default
quantization str

The quantization method name.

required

Examples:

>>> from fastvideo.layers.quantization import register_quantization_config
>>> from fastvideo.layers.quantization import get_quantization_config
>>> from fastvideo.layers.quantization.base_config import QuantizationConfig
>>>
>>> @register_quantization_config("my_quant")
... class MyQuantConfig(QuantizationConfig):
...     pass
>>>
>>> get_quantization_config("my_quant")
<class 'MyQuantConfig'>
Source code in fastvideo/layers/quantization/__init__.py
def register_quantization_config(quantization: str):
    """Register a customized vllm quantization config.

    When a quantization method is not supported by vllm, you can register a customized
    quantization config to support it.

    Args:
        quantization (str): The quantization method name.

    Examples:
        >>> from fastvideo.layers.quantization import register_quantization_config
        >>> from fastvideo.layers.quantization import get_quantization_config
        >>> from fastvideo.layers.quantization.base_config import QuantizationConfig
        >>>
        >>> @register_quantization_config("my_quant")
        ... class MyQuantConfig(QuantizationConfig):
        ...     pass
        >>>
        >>> get_quantization_config("my_quant")
        <class 'MyQuantConfig'>
    """  # noqa: E501

    def _wrapper(quant_config_cls):
        if quantization in QUANTIZATION_METHODS:
            raise ValueError(
                f"The quantization method `{quantization}` is already exists.")
        if not issubclass(quant_config_cls, QuantizationConfig):
            raise ValueError("The quantization config must be a subclass of "
                             "`QuantizationConfig`.")
        _CUSTOMIZED_METHOD_TO_QUANT_CONFIG[quantization] = quant_config_cls
        QUANTIZATION_METHODS.append(quantization)
        return quant_config_cls

    return _wrapper

Modules

fastvideo.layers.quantization.base_config

Classes

fastvideo.layers.quantization.base_config.QuantizationConfig
QuantizationConfig()

Bases: ABC

Base class for quantization configs.

Source code in fastvideo/layers/quantization/base_config.py
def __init__(self):
    super().__init__()
    # mapping is updated by models as they initialize
    self.packed_modules_mapping: dict[str, list[str]] = dict()
Functions
fastvideo.layers.quantization.base_config.QuantizationConfig.from_config abstractmethod classmethod
from_config(config: dict[str, Any]) -> QuantizationConfig

Create a config class from the model's quantization config.

Source code in fastvideo/layers/quantization/base_config.py
@classmethod
@abstractmethod
def from_config(cls, config: dict[str, Any]) -> "QuantizationConfig":
    """Create a config class from the model's quantization config."""
    raise NotImplementedError
fastvideo.layers.quantization.base_config.QuantizationConfig.get_config_filenames abstractmethod staticmethod
get_config_filenames() -> list[str]

List of filenames to search for in the model directory.

Source code in fastvideo/layers/quantization/base_config.py
@staticmethod
@abstractmethod
def get_config_filenames() -> list[str]:
    """List of filenames to search for in the model directory."""
    raise NotImplementedError
fastvideo.layers.quantization.base_config.QuantizationConfig.get_from_keys staticmethod
get_from_keys(config: dict[str, Any], keys: list[str]) -> Any

Get a value from the model's quantization config.

Source code in fastvideo/layers/quantization/base_config.py
@staticmethod
def get_from_keys(config: dict[str, Any], keys: list[str]) -> Any:
    """Get a value from the model's quantization config."""
    for key in keys:
        if key in config:
            return config[key]
    raise ValueError(f"Cannot find any of {keys} in the model's "
                     "quantization config.")
fastvideo.layers.quantization.base_config.QuantizationConfig.get_from_keys_or staticmethod
get_from_keys_or(config: dict[str, Any], keys: list[str], default: Any) -> Any

Get a optional value from the model's quantization config.

Source code in fastvideo/layers/quantization/base_config.py
@staticmethod
def get_from_keys_or(config: dict[str, Any], keys: list[str],
                     default: Any) -> Any:
    """Get a optional value from the model's quantization config."""
    try:
        return QuantizationConfig.get_from_keys(config, keys)
    except ValueError:
        return default
fastvideo.layers.quantization.base_config.QuantizationConfig.get_min_capability abstractmethod classmethod
get_min_capability() -> int

Minimum GPU capability to support the quantization method.

E.g., 70 for Volta, 75 for Turing, 80 for Ampere. This requirement is due to the custom CUDA kernels used by the quantization method.

Source code in fastvideo/layers/quantization/base_config.py
@classmethod
@abstractmethod
def get_min_capability(cls) -> int:
    """Minimum GPU capability to support the quantization method.

    E.g., 70 for Volta, 75 for Turing, 80 for Ampere.
    This requirement is due to the custom CUDA kernels used by the
    quantization method.
    """
    raise NotImplementedError
fastvideo.layers.quantization.base_config.QuantizationConfig.get_name abstractmethod
get_name() -> QuantizationMethods

Name of the quantization method.

Source code in fastvideo/layers/quantization/base_config.py
@abstractmethod
def get_name(self) -> QuantizationMethods:
    """Name of the quantization method."""
    raise NotImplementedError
fastvideo.layers.quantization.base_config.QuantizationConfig.get_quant_method abstractmethod
get_quant_method(layer: Module, prefix: str) -> QuantizeMethodBase | None

Get the quantize method to use for the quantized layer.

Parameters:

Name Type Description Default
layer Module

The layer for the quant method.

required
prefix str

The full name of the layer in the state dict

required

Returns: The quantize method. None if the given layer doesn't support quant method.

Source code in fastvideo/layers/quantization/base_config.py
@abstractmethod
def get_quant_method(self, layer: torch.nn.Module,
                     prefix: str) -> QuantizeMethodBase | None:
    """Get the quantize method to use for the quantized layer.

    Args:
        layer: The layer for the quant method.
        prefix: The full name of the layer in the state dict
    Returns:
        The quantize method. None if the given layer doesn't support quant
        method.
    """
    raise NotImplementedError
fastvideo.layers.quantization.base_config.QuantizationConfig.get_supported_act_dtypes abstractmethod
get_supported_act_dtypes() -> list[dtype]

List of supported activation dtypes.

Source code in fastvideo/layers/quantization/base_config.py
@abstractmethod
def get_supported_act_dtypes(self) -> list[torch.dtype]:
    """List of supported activation dtypes."""
    raise NotImplementedError
fastvideo.layers.quantization.base_config.QuantizationConfig.override_quantization_method classmethod
override_quantization_method(hf_quant_cfg, user_quant) -> QuantizationMethods | None

Detects if this quantization method can support a given checkpoint format by overriding the user specified quantization method -- this method should only be overwritten by subclasses in exceptional circumstances

Source code in fastvideo/layers/quantization/base_config.py
@classmethod
def override_quantization_method(cls, hf_quant_cfg,
                                 user_quant) -> QuantizationMethods | None:
    """
       Detects if this quantization method can support a given checkpoint
       format by overriding the user specified quantization method -- 
       this method should only be overwritten by subclasses in exceptional 
       circumstances
    """
    return None
fastvideo.layers.quantization.base_config.QuantizeMethodBase

Bases: ABC

Base class for different quantized methods.

Functions
fastvideo.layers.quantization.base_config.QuantizeMethodBase.apply abstractmethod
apply(layer: Module, *args, **kwargs) -> Tensor

Apply the weights in layer to the input tensor.

Expects create_weights to have been called before on the layer.

Source code in fastvideo/layers/quantization/base_config.py
@abstractmethod
def apply(self, layer: torch.nn.Module, *args, **kwargs) -> torch.Tensor:
    """Apply the weights in layer to the input tensor.

    Expects create_weights to have been called before on the layer."""
    raise NotImplementedError
fastvideo.layers.quantization.base_config.QuantizeMethodBase.create_weights abstractmethod
create_weights(layer: Module, *weight_args, **extra_weight_attrs)

Create weights for a layer.

The weights will be set as attributes of the layer.

Source code in fastvideo/layers/quantization/base_config.py
@abstractmethod
def create_weights(self, layer: torch.nn.Module, *weight_args,
                   **extra_weight_attrs):
    """Create weights for a layer.

    The weights will be set as attributes of the layer."""
    raise NotImplementedError
fastvideo.layers.quantization.base_config.QuantizeMethodBase.embedding
embedding(layer: Module, *args, **kwargs) -> Tensor

Gather embeddings in the layer based on indices in the input tensor.

Expects create_weights to have been called before on the layer.

Source code in fastvideo/layers/quantization/base_config.py
def embedding(self, layer: torch.nn.Module, *args,
              **kwargs) -> torch.Tensor:
    """Gather embeddings in the layer based on indices in the input tensor.

    Expects create_weights to have been called before on the layer."""
    raise NotImplementedError
fastvideo.layers.quantization.base_config.QuantizeMethodBase.process_weights_after_loading
process_weights_after_loading(layer: Module) -> None

Process the weight after loading.

This can be used for example, to transpose weights for computation.

Source code in fastvideo/layers/quantization/base_config.py
def process_weights_after_loading(self, layer: nn.Module) -> None:
    """Process the weight after loading.

    This can be used for example, to transpose weights for computation.
    """
    return

Functions

fastvideo.layers.quantization.base_config.method_has_implemented_embedding
method_has_implemented_embedding(method_class: type[QuantizeMethodBase]) -> bool

Not all quant methods have embedding implemented, so we need to check that it exists for our given method. We check this by making sure the function has been changed from the base implementation.

Source code in fastvideo/layers/quantization/base_config.py
def method_has_implemented_embedding(
        method_class: type[QuantizeMethodBase]) -> bool:
    """
    Not all quant methods have embedding implemented, so we need to check that
    it exists for our given method. We check this by making sure the function
    has been changed from the base implementation.
    """
    base_embedding = inspect.getattr_static(QuantizeMethodBase, "embedding",
                                            None)
    class_embedding = inspect.getattr_static(method_class, "embedding", None)

    return (class_embedding is not None
            and class_embedding is not base_embedding)