# -*- mode: python -*-
# =============================================================================
# @@-COPYRIGHT-START-@@
#
# Copyright (c) 2019-2023, Qualcomm Innovation Center, Inc. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its contributors
# may be used to endorse or promote products derived from this software
# without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# SPDX-License-Identifier: BSD-3-Clause
#
# @@-COPYRIGHT-END-@@
# =============================================================================
"""Common type definitions that are used across aimet"""
import io
from enum import Enum
from typing import Union, Callable, Any, Optional, Dict, List
from decimal import Decimal
from dataclasses import dataclass
import abc
from aimet_common.layer_database import Layer
from aimet_common import libpymo
[docs]
class QuantScheme(Enum):
"""Quantization schemes"""
min_max = 1
post_training_tf = min_max
post_training_tf_enhanced = 2
### Below are deprecated ###
training_range_learning_with_tf_init = 3
training_range_learning_with_tf_enhanced_init = 4
training_range_learning = 5
post_training_percentile = 6
#############################
[docs]
@classmethod
def from_str(cls, alias: str) -> "QuantScheme":
"""
Returns QuantScheme object from string alias
"""
try:
return _quant_scheme_aliases[alias]
except KeyError as e:
raise ValueError(
f"Invalid string literal {alias}"
f"Expected one of {list(_quant_scheme_aliases.keys())}"
) from e
_quant_scheme_aliases = {
"min_max": QuantScheme.min_max,
"tf": QuantScheme.min_max,
"tf_enhanced": QuantScheme.post_training_tf_enhanced,
"percentile": QuantScheme.post_training_percentile,
}
MAP_QUANT_SCHEME_TO_PYMO = {
QuantScheme.post_training_tf: libpymo.QuantizationMode.QUANTIZATION_TF,
QuantScheme.post_training_tf_enhanced: libpymo.QuantizationMode.QUANTIZATION_TF_ENHANCED,
QuantScheme.training_range_learning_with_tf_init: libpymo.QuantizationMode.QUANTIZATION_TF,
QuantScheme.training_range_learning_with_tf_enhanced_init: libpymo.QuantizationMode.QUANTIZATION_TF_ENHANCED,
QuantScheme.post_training_percentile: libpymo.QuantizationMode.QUANTIZATION_PERCENTILE,
}
MAP_ROUND_MODE_TO_PYMO = {
"nearest": libpymo.RoundingMode.ROUND_NEAREST,
"stochastic": libpymo.RoundingMode.ROUND_STOCHASTIC,
}
RANGE_LEARNING_SCHEMES = {
QuantScheme.training_range_learning_with_tf_init,
QuantScheme.training_range_learning_with_tf_enhanced_init,
}
class ActivationType(Enum):
"""Enums to identify activation type"""
no_activation = 0
""" No activation """
relu = 1
""" ReLU activation """
relu6 = 2
""" ReLU6 activation """
def __eq__(self, other: "ActivationType"):
return self.value == other.value and self.name == other.name # pylint: disable=comparison-with-callable
[docs]
class CostMetric(Enum):
"""Enumeration of metrics to measure cost of a model/layer"""
mac = 1
""" MAC: Cost modeled for compute requirements """
memory = 2
""" Memory: Cost modeled for space requirements """
[docs]
class CompressionScheme(Enum):
"""Enumeration of compression schemes supported in aimet"""
weight_svd = 1
""" Weight SVD """
spatial_svd = 2
""" Spatial SVD """
channel_pruning = 3
""" Channel Pruning """
class RankSelectScheme(Enum):
"""Enumeration of rank selection schemes supported in aimet"""
greedy = 1
""" Greedy scheme"""
class LayerCompRatioPair:
"""
Models a pair of (layer: nn.Module, CompRatio: Decimal)
"""
def __init__(self, layer: Layer, comp_ratio: Union[Decimal, None]):
"""
Constructor
:param layer: Reference to layer
:param comp_ratio: Comp-ratio as a floating point number between 0 and 1
"""
self.layer = layer
self.comp_ratio = comp_ratio
def __str__(self):
return "LayerCompRatioPair: layer={}, comp-ratio={}".format(
self.layer.name, self.comp_ratio
)
class LayerCompRatioEvalScore:
"""
Models data element with (layer: nn.Module, CompRatio: Decimal, EvalScore: Decimal) attributes
"""
def __init__(
self,
layer: Layer,
comp_ratio: Union[Decimal, None],
eval_score: Optional[Union[Decimal, None]],
):
"""
Constructor
:param layer: Reference to layer
:param comp_ratio: Comp-ratio as a floating point number between 0 and 1
:param eval_score: Eval score as floating point number
"""
self.layer = layer
self.comp_ratio = comp_ratio
self.eval_score = eval_score
def __str__(self):
return "LayerCompRatioEvalScore: layer={}, comp-ratio={}, eval_score={}".format(
self.layer.name, self.comp_ratio, self.eval_score
)
EvalFunction = Callable[[Any, Optional[int], bool], float]
[docs]
class GreedySelectionParameters:
"""
Configuration parameters for the Greedy compression-ratio selection algorithm
:ivar target_comp_ratio: Target compression ratio. Expressed as value between 0 and 1.
Compression ratio is the ratio of cost of compressed model to cost of the original model.
:ivar num_comp_ratio_candidates: Number of comp-ratio candidates to analyze per-layer
More candidates allows more granular distribution of compression at the cost
of increased run-time during analysis. Default value=10. Value should be greater than 1.
:ivar use_monotonic_fit: If True, eval scores in the eval dictionary are fitted to a monotonically increasing
function. This is useful if you see the eval dict scores for some layers are not monotonically increasing.
By default, this option is set to False.
:ivar saved_eval_scores_dict: Path to the eval_scores dictionary pickle file that was
saved in a previous run. This is useful to speed-up experiments when trying
different target compression-ratios for example. aimet will save eval_scores
dictionary pickle file automatically in a ./data directory relative to the
current path. num_comp_ratio_candidates parameter will be ignored when this option is used.
"""
def __init__(
self,
target_comp_ratio: float,
num_comp_ratio_candidates: int = 10,
use_monotonic_fit: bool = False,
saved_eval_scores_dict: Optional[str] = None,
):
self.target_comp_ratio = target_comp_ratio
# Sanity check
if num_comp_ratio_candidates < 2:
raise ValueError(
"Error: num_comp_ratio_candidates={}. Need more than 1 candidate for "
"Greedy compression-ratio selection".format(num_comp_ratio_candidates)
)
self.num_comp_ratio_candidates = num_comp_ratio_candidates
self.use_monotonic_fit = use_monotonic_fit
self.saved_eval_scores_dict = saved_eval_scores_dict
class GreedyCompressionRatioSelectionStats:
"""Statistics for the greedy compression-ratio selection algorithm"""
def __init__(self, eval_scores_dict: Dict[str, Dict[Decimal, float]]):
"""
Constructor
:param eval_scores_dict: Dictionary of {layer_name: {compression_ratio: eval_score}}
"""
self.eval_scores_dictionary = eval_scores_dict
def __str__(self):
stream = io.StringIO(newline="\n")
stream.write("\nGreedy Eval Dict\n")
layer_dict = self.eval_scores_dictionary
for layer in layer_dict:
stream.write(" Layer: {}\n".format(layer))
for ratio in sorted(layer_dict[layer]):
stream.write(
" Ratio={}, Eval score={}\n".format(
ratio, layer_dict[layer][ratio]
)
)
return stream.getvalue()
class TarCompressionRatioSelectionStats:
"""Statistics for the TAR compression-ratio selection algorithm"""
def __init__(self, layers_comp_ratio_eval_score_per_rank_index):
"""
Constructor
:param layers_comp_ratio_eval_score_per_rank_index: List of [layer_name: compression_ratio: eval_score] params
"""
self.layers_comp_ratio_eval_score_per_rank_index = (
layers_comp_ratio_eval_score_per_rank_index
)
def __str__(self):
stream = io.StringIO(newline="\n")
stream.write("\nTar Eval table\n")
for data_to_print in self.layers_comp_ratio_eval_score_per_rank_index:
stream.write(" Layer: {}\n".format(data_to_print.layer))
stream.write(
" Ratio={}, Eval score={}\n".format(
(data_to_print.comp_ratio), (data_to_print.eval_score)
)
)
return stream.getvalue()
class CompressionStats:
"""Statistics generated during model compression"""
class LayerStats:
"""Statistics for every layer in the model that was compressed"""
def __init__(self, name: str, comp_ratio: Decimal):
self.name = name
self.compression_ratio = comp_ratio
def __init__(
self,
base_accuracy: float,
comp_accuracy: float,
mem_comp_ratio: Decimal,
mac_comp_ratio: Decimal,
per_layer_stats: List[LayerStats],
comp_ratio_select_stats: Union[GreedyCompressionRatioSelectionStats, None],
):
self.baseline_model_accuracy = format(base_accuracy, ".6f")
self.compressed_model_accuracy = format(comp_accuracy, ".6f")
self.memory_compression_ratio = format(mem_comp_ratio, ".6f")
self.mac_compression_ratio = format(mac_comp_ratio, ".6f")
self.per_layer_stats = per_layer_stats
self.compression_ratio_selection_stats = comp_ratio_select_stats
def __str__(self):
stream = io.StringIO(newline="\n")
stream.write(
"**********************************************************************************************\n"
)
stream.write("Compressed Model Statistics\n")
stream.write(
"Baseline model accuracy: {}, Compressed model accuracy: {}\n".format(
self.baseline_model_accuracy, self.compressed_model_accuracy
)
)
stream.write(
"Compression ratio for memory={}, mac={}\n".format(
self.memory_compression_ratio, self.mac_compression_ratio
)
)
stream.write("\n")
stream.write(
"**********************************************************************************************\n"
)
stream.write("\nPer-layer Stats\n")
for layer in self.per_layer_stats:
stream.write(
" Name:{}, compression-ratio: {}\n".format(
layer.name, layer.compression_ratio
)
)
stream.write("\n")
stream.write(
"**********************************************************************************************\n"
)
stream.write("{}\n".format(self.compression_ratio_selection_stats))
stream.write(
"**********************************************************************************************\n"
)
return stream.getvalue()
class AdaroundConstants:
"""Constants used for Adarounding"""
GAMMA = -0.1
ZETA = 1.1
class QuantizationDataType(Enum):
"""Enumeration of tensor quantizer data types supported"""
undefined = 0
int = 1
float = 2
class SupportedKernelsAction(Enum):
"""Enumeration to specify the action to apply during supported_kernels validation"""
allow_error = 1
warn_on_error = 2
assert_on_error = 3
class QuantDtypeBwInfo:
"""
QuantDtypeBwInfo holds activation dtype/bw and param dtype/bw
"""
def __init__(
self,
act_dtype: QuantizationDataType,
act_bw: int,
param_dtype: QuantizationDataType = QuantizationDataType.undefined,
param_bw: int = 0,
):
"""
Data class to hold dtype and bw info
:param act_dtype: Activation datatype of type QuantizationDataType
:param act_bw: Activation bitwidth of type int
:param param_dtype: Param datatype of type QuantizationDataType
:param param_bw: Param bitwidth of type int
"""
self.act_dtype = act_dtype
self.act_bw = act_bw
self.param_dtype = param_dtype
self.param_bw = param_bw
self._validate_inputs()
def __repr__(self):
return f"(activation:({self.act_dtype}, {self.act_bw}) param:({self.param_dtype}, {self.param_bw})"
def __str__(self):
return f"activation:({self.act_dtype}, {self.act_bw}) param:({self.param_dtype}, {self.param_bw})"
def __eq__(self, other):
return (
self.act_dtype == other.act_dtype
and self.act_bw == other.act_bw
and self.param_dtype == other.param_dtype
and self.param_bw == other.param_bw
)
def _validate_inputs(self):
"""
Validate inputs
"""
if self.param_dtype and self.param_bw:
if self.param_dtype == QuantizationDataType.float and self.param_bw not in [
16,
32,
]:
raise ValueError(
"float param_dtype can only be used when param_bw is set to 16, not "
+ str(self.param_bw)
)
if self.act_dtype == QuantizationDataType.float and self.act_bw not in [16, 32]:
raise ValueError(
"float act_dtype can only be used when act_bw is set to 16, not "
+ str(self.act_bw)
)
def is_same_activation(self, dtype: QuantizationDataType, bw: int):
"""
helper function to check if activation of the object is same as input
:param bw: bitwidth to verify against
:param dtype: dtype to verify against
"""
return bw == self.act_bw and dtype == self.act_dtype
def is_same_param(self, dtype: QuantizationDataType, bw: int):
"""
helper function to check if param of the object is same as input
:param bw: bitwidth to verify against
:param dtype: dtype to verify against
"""
return bw == self.param_bw and dtype == self.param_dtype
def get_activation(self) -> tuple:
"""getter method for activation candidate"""
return self.act_dtype, self.act_bw
def get_param(self) -> tuple:
"""getter method for param candidate"""
return self.param_dtype, self.param_bw
[docs]
class CallbackFunc:
"""
Class encapsulating call back function and it's arguments
"""
def __init__(self, func: Callable, func_callback_args=None):
"""
:param func: Callable Function
:param func_callback_args: Arguments passed to the callable function
"""
self.func = func
self.args = func_callback_args
def __call__(self, arg):
return self.func(arg, self.args)
class EncodingType(Enum):
"""Encoding type"""
PER_TENSOR = 0
PER_CHANNEL = 1
PER_BLOCK = 2
LPBQ = 3
VECTOR = 4
class qtype(abc.ABC):
"""Parent class for all quantized data type definitions"""
@abc.abstractmethod
def __repr__(self):
pass
@abc.abstractmethod
def to_legacy_repr(self) -> tuple[QuantizationDataType, int]:
"""
Returns the qtype represented as a tuple of (QuantizationDataType, bitwidth)
"""
@staticmethod
def from_legacy_repr(dtype: QuantizationDataType, bitwidth: int) -> "qtype":
"""Converts (QuantizationDataType, bitwidth) tuple to qtype"""
if dtype == QuantizationDataType.int:
return qtype.int(bitwidth)
if dtype != QuantizationDataType.float:
raise ValueError(f"Unsupported data type: {dtype}")
if bitwidth == 16:
return QTYPE_ALIASES["float16"]
if bitwidth == 32:
return QTYPE_ALIASES["float32"]
if bitwidth == 8:
# Default to float8e4m3
return qtype.float(
exponent_bits=4, mantissa_bits=3, finite=False, unsigned_zero=False
)
raise ValueError(
"float data type is only supported with bitwidth of 8, 16, or 32, got {bitwidth}"
)
@staticmethod
def int(bits: int) -> "Int":
"""
Constructs an integer QType with the specified number of bits
Args:
bits: Number of bits
Returns:
Integer QType with the specified number of bits
"""
return Int(bits)
@staticmethod
def float(
exponent_bits: int,
mantissa_bits: int,
finite: bool = False,
unsigned_zero: bool = False,
) -> "Float":
"""
Constructs an floating point QType with the specified configuration
Args:
exponent_bits: Number of exponent bits
mantissa_bits: Number of mantissa bits
finite: If False, the QType can represent infinite values
unsigned_zero: If False, the QType uses both signed and unsigned zero/NaN values
Returns:
Floating point QType with the specified configuration
"""
return Float(exponent_bits, mantissa_bits, finite, unsigned_zero)
@staticmethod
def from_string(name: str):
"""
Returns the qtype object associated with the given string alias
"""
if name not in QTYPE_ALIASES:
raise ValueError(
f"{name} is not a defined qtype alias. Use {qtype.int.__qualname__} or {qtype.float.__qualname__} to construct a custom qtype."
)
return QTYPE_ALIASES[name]
@dataclass(frozen=True)
class Int(qtype):
"""Quantized integer types"""
bits: int
def __repr__(self):
return f"int{self.bits}"
def to_legacy_repr(self):
return QuantizationDataType.int, self.bits
def __post_init__(self):
if self.bits < 1:
raise ValueError(f"bits must be strictly positive, got {self.bits}")
@dataclass(frozen=True)
class Float(qtype):
"""Quantized float types"""
exponent_bits: int
mantissa_bits: int
finite: bool
unsigned_zero: bool
def __repr__(self):
for name, dtype in QTYPE_ALIASES.items():
if dtype == self:
return name
e = self.exponent_bits
m = self.mantissa_bits
fn = "fn" if self.finite else ""
uz = "uz" if self.unsigned_zero else ""
return f"float{e + m + 1}e{e}m{m}{fn}{uz}"
def to_legacy_repr(self):
bits = self.exponent_bits + self.mantissa_bits + 1
return QuantizationDataType.float, bits
def __post_init__(self):
if self.exponent_bits < 1:
raise ValueError(
f"exponent_bits must be strictly positive, got {self.exponent_bits}"
)
if self.mantissa_bits < 1:
raise ValueError(
f"mantissa_bits must be strictly positive, got {self.mantissa_bits}"
)
int4 = qtype.int(4)
int8 = qtype.int(8)
int16 = qtype.int(16)
float16 = qtype.float(
exponent_bits=5, mantissa_bits=10, finite=False, unsigned_zero=False
)
float32 = qtype.float(
exponent_bits=8, mantissa_bits=23, finite=False, unsigned_zero=False
)
QTYPE_ALIASES = {
"int4": int4,
"int8": int8,
"int16": int16,
"float16": float16,
"float32": float32,
}