Source code for aimet_torch.v2.quantization.affine.backends

# -*- mode: python -*-
# =============================================================================
#  @@-COPYRIGHT-START-@@
#
#  Copyright (c) 2023, Qualcomm Innovation Center, Inc. All rights reserved.
#
#  Redistribution and use in source and binary forms, with or without
#  modification, are permitted provided that the following conditions are met:
#
#  1. Redistributions of source code must retain the above copyright notice,
#     this list of conditions and the following disclaimer.
#
#  2. Redistributions in binary form must reproduce the above copyright notice,
#     this list of conditions and the following disclaimer in the documentation
#     and/or other materials provided with the distribution.
#
#  3. Neither the name of the copyright holder nor the names of its contributors
#     may be used to endorse or promote products derived from this software
#     without specific prior written permission.
#
#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
#  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
#  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
#  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
#  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
#  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
#  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
#  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
#  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
#  POSSIBILITY OF SUCH DAMAGE.
#
#  SPDX-License-Identifier: BSD-3-Clause
#
#  @@-COPYRIGHT-END-@@
# =============================================================================
# pylint: disable=all

import math
from typing import overload, Union, Tuple, Optional
import torch
from .utils import *


@overload
def quantize(tensor: torch.Tensor, scale: torch.Tensor, offset: torch.Tensor,
             bitwidth: Union[int, float], signed: bool = False,
             block_size: Optional[Tuple[int, ...]] = None):
    ...

@overload
def quantize(tensor: torch.Tensor, scale: torch.Tensor, offset: torch.Tensor, *,
             num_steps: int, signed: bool = False, block_size: Optional[Tuple[int, ...]] = None):
    ...

@overload
def quantize(tensor: torch.Tensor, scale: torch.Tensor, offset: torch.Tensor, *,
             qmin: int, qmax: int, block_size: Optional[Tuple[int, ...]] = None):
    ...


[docs]def quantize(tensor: torch.Tensor, scale: torch.Tensor, offset: torch.Tensor, *args, **kwargs): r""" Applies quantization to the input. Precisely, .. math:: out = clamp\left(\left\lceil\frac{input}{scale}\right\rfloor - offset, qmin, qmax\right) If block size :math:`B = \begin{pmatrix} B_0 & B_1 & \cdots & B_{D-1} \end{pmatrix}` is specified, this equation will be further generalized as .. math:: out_{j_0 \cdots j_{D-1}} & = clamp\left( \left\lceil\frac{input_{j_0 \cdots j_{D-1}}}{scale_{i_0 \cdots i_{D-1}}}\right\rfloor - offset_{i_0 \cdots i_{D-1}}, qmin, qmax\right)\\ \text{where} \quad \forall_{0 \leq d < D} \quad i_d = \left\lfloor \frac{j_d}{B_d} \right\rfloor This function is overloaded with the signatures listed below: .. function:: quantize(tensor, scale, offset, bitwidth, signed=False, block_size=None) :noindex: Equivalent to: .. math:: qmin= \begin{cases} -\left\lceil\frac{2^{bitwidth}-1}{2}\right\rceil,& \text{if } signed\\ 0, & \text{otherwise (default)} \end{cases} qmax= \begin{cases} \left\lfloor\frac{2^{bitwidth}-1}{2}\right\rfloor,& \text{if } signed\\ 2^{bitwidth}-1, & \text{otherwise (default)} \end{cases} :param Tensor tensor: Tensor to quantize :param Tensor scale: Scale for quantization :param Tensor offset: Offset for quantization :param int bitwidth: Bitwidth of quantized tensor based on which :math:`qmin` and :math:`qmax` will be derived :param bool signed: If false, the output will be mapped to positive integers only. Otherwise, it will range over both positive and negative integers. :param block_size: Block size :type block_size: Tuple[int, ...], optional .. function:: quantize(tensor, scale, offset, *, num_steps, signed=False, block_size=None) :noindex: Equivalent to: .. math:: qmin= \begin{cases} -\left\lceil\frac{num\_steps}{2}\right\rceil,& \text{if } signed\\ 0, & \text{otherwise (default)} \end{cases} qmax= \begin{cases} \left\lfloor\frac{num\_steps}{2}\right\rfloor,& \text{if } signed\\ num\_steps, & \text{otherwise (default)} \end{cases} :param Tensor tensor: Tensor to quantize :param Tensor scale: Scale for quantization :param Tensor offset: Offset for quantization :param int num_steps: The number of steps in the quantization range based on which :math:`qmin` and :math:`qmax` will be derived :param bool signed: If false, the output will be mapped to positive integers only. Otherwise, it will range over both positive and negative integers. :param block_size: Block size :type block_size: Tuple[int, ...], optional .. function:: quantize(tensor, scale, offset, *, qmin, qmax, block_size=None) :noindex: :param Tensor tensor: Tensor to quantize :param Tensor scale: Scale for quantization :param Tensor offset: Offset for quantization :param int qmin: Minimum value of the quantization range :param int qmax: Maximum value of the quantization range :param block_size: Block size :type block_size: Tuple[int, ...], optional Examples: >>> import aimet_torch.v2.quantization as Q >>> input = torch.arange(start=-0.3, end=1.3, step=0.05) >>> print(input) tensor([-3.0000e-01, -2.5000e-01, -2.0000e-01, -1.5000e-01, -1.0000e-01, -5.0000e-02, -1.1921e-08, 5.0000e-02, 1.0000e-01, 1.5000e-01, 2.0000e-01, 2.5000e-01, 3.0000e-01, 3.5000e-01, 4.0000e-01, 4.5000e-01, 5.0000e-01, 5.5000e-01, 6.0000e-01, 6.5000e-01, 7.0000e-01, 7.5000e-01, 8.0000e-01, 8.5000e-01, 9.0000e-01, 9.5000e-01, 1.0000e+00, 1.0500e+00, 1.1000e+00, 1.1500e+00, 1.2000e+00, 1.2500e+00]) >>> scale = torch.tensor(1/15) >>> offset = torch.tensor(0.0) >>> Q.affine.quantize(input, scale, offset, bitwidth=4) tensor([ 0., 0., 0., 0., 0., 0., -0., 1., 2., 2., 3., 4., 4., 5., 6., 7., 7., 8., 9., 10., 10., 11., 12., 13., 13., 14., 15., 15., 15., 15., 15., 15.]) >>> Q.affine.quantize(input, scale, offset, num_steps=15) tensor([ 0., 0., 0., 0., 0., 0., -0., 1., 2., 2., 3., 4., 4., 5., 6., 7., 7., 8., 9., 10., 10., 11., 12., 13., 13., 14., 15., 15., 15., 15., 15., 15.]) >>> Q.affine.quantize(input, scale, offset, qmin=0, qmax=15) tensor([ 0., 0., 0., 0., 0., 0., -0., 1., 2., 2., 3., 4., 4., 5., 6., 7., 7., 8., 9., 10., 10., 11., 12., 13., 13., 14., 15., 15., 15., 15., 15., 15.]) """ qmin, qmax, block_size = _parse_args(args, kwargs) return get_backend().quantize(tensor, scale, offset, qmin, qmax, block_size)
@overload def quantize_dequantize(tensor: torch.Tensor, scale: torch.Tensor, offset: torch.Tensor, bitwidth: Union[int, float], signed: bool = False, block_size: Optional[Tuple[int, ...]] = None): ... @overload def quantize_dequantize(tensor: torch.Tensor, scale: torch.Tensor, offset: torch.Tensor, *, num_steps: int, signed: bool = False, block_size: Optional[Tuple[int, ...]] = None): ... @overload def quantize_dequantize(tensor: torch.Tensor, scale: torch.Tensor, offset: torch.Tensor, *, qmin: int, qmax: int, block_size: Optional[Tuple[int, ...]] = None): ...
[docs]def quantize_dequantize(tensor: torch.Tensor, scale: torch.Tensor, offset: torch.Tensor, *args, **kwargs): r""" Applies fake-quantization by quantizing and dequantizing the input. Precisely, .. math:: out = (\overline{input} + offset) * scale where .. math:: \overline{input} = clamp\left(\left\lceil\frac{input}{scale}\right\rfloor - offset, qmin, qmax\right) If block size :math:`B = \begin{pmatrix} B_0 & B_1 & \cdots & B_{D-1} \end{pmatrix}` is specified, this equation will be further generalized as .. math:: out_{j_0 \cdots j_{D-1}} &= (\overline{input}_{j_0 \cdots j_{D-1}} + offset_{i_0 \cdots i_{D-1}}) * scale_{i_0 \cdots i_{D-1}}\\ \overline{input}_{j_0 \cdots j_{D-1}} &= clamp\left( \left\lceil\frac{input_{j_0 \cdots j_{D-1}}}{scale_{i_0 \cdots i_{D-1}}}\right\rfloor - offset_{i_0 \cdots i_{D-1}}, qmin, qmax\right)\\ \text{where } \quad \forall_{0 \leq d < D} \quad i_d = \left\lfloor \frac{j_d}{B_d} \right\rfloor This function is overloaded with the signatures listed below: .. function:: quantize_dequantize(tensor, scale, offset, bitwidth, signed=False, block_size=None) :noindex: Equivalent to: .. math:: qmin= \begin{cases} -\left\lceil\frac{2^{bitwidth}-1}{2}\right\rceil,& \text{if } signed\\ 0, & \text{otherwise (default)} \end{cases} qmax= \begin{cases} \left\lfloor\frac{2^{bitwidth}-1}{2}\right\rfloor,& \text{if } signed\\ 2^{bitwidth}-1, & \text{otherwise (default)} \end{cases} :param Tensor tensor: Tensor to quantize :param Tensor scale: Scale for quantization :param Tensor offset: Offset for quantization :param int bitwidth: Bitwidth of quantized tensor based on which :math:`qmin` and :math:`qmax` will be derived :param bool signed: If false, :math:`\overline{input}` will be mapped to positive integers only. Otherwise, :math:`\overline{input}` will range over both positive and negative integers. :param block_size: Block size :type block_size: Tuple[int, ...], optional .. function:: quantize_dequantize(tensor, scale, offset, *, num_steps, signed=False, block_size=None) :noindex: Equivalent to: .. math:: qmin= \begin{cases} -\left\lceil\frac{num\_steps}{2}\right\rceil,& \text{if } signed\\ 0, & \text{otherwise (default)} \end{cases} qmax= \begin{cases} \left\lfloor\frac{num\_steps}{2}\right\rfloor,& \text{if } signed\\ num\_steps, & \text{otherwise (default)} \end{cases} :param Tensor tensor: Tensor to quantize :param Tensor scale: Scale for quantization :param Tensor offset: Offset for quantization :param int num_steps: The number of steps in the quantization range based on which :math:`qmin` and :math:`qmax` will be derived :param bool signed: If false, :math:`\overline{input}` will be mapped to positive integers only. Otherwise, :math:`\overline{input}` will range over both positive and negative integers. :param block_size: Block size :type block_size: Tuple[int, ...], optional .. function:: quantize_dequantize(tensor, scale, offset, *, qmin, qmax, block_size=None) :noindex: :param Tensor tensor: Tensor to quantize :param Tensor scale: Scale for quantization :param Tensor offset: Offset for quantization :param int qmin: Minimum value of the quantization range :param int qmax: Maximum value of the quantization range :param block_size: Block size :type block_size: Tuple[int, ...], optional Examples: >>> import aimet_torch.v2.quantization as Q >>> input = torch.arange(start=-0.3, end=1.3, step=0.05) >>> print(input) tensor([-3.0000e-01, -2.5000e-01, -2.0000e-01, -1.5000e-01, -1.0000e-01, -5.0000e-02, -1.1921e-08, 5.0000e-02, 1.0000e-01, 1.5000e-01, 2.0000e-01, 2.5000e-01, 3.0000e-01, 3.5000e-01, 4.0000e-01, 4.5000e-01, 5.0000e-01, 5.5000e-01, 6.0000e-01, 6.5000e-01, 7.0000e-01, 7.5000e-01, 8.0000e-01, 8.5000e-01, 9.0000e-01, 9.5000e-01, 1.0000e+00, 1.0500e+00, 1.1000e+00, 1.1500e+00, 1.2000e+00, 1.2500e+00]) >>> scale = torch.tensor(1/15) >>> offset = torch.tensor(0.0) >>> Q.affine.quantize_dequantize(input, scale, offset, bitwidth=4) tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0667, 0.1333, 0.1333, 0.2000, 0.2667, 0.2667, 0.3333, 0.4000, 0.4667, 0.4667, 0.5333, 0.6000, 0.6667, 0.6667, 0.7333, 0.8000, 0.8667, 0.8667, 0.9333, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000]) >>> Q.affine.quantize_dequantize(input, scale, offset, num_steps=15) tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0667, 0.1333, 0.1333, 0.2000, 0.2667, 0.2667, 0.3333, 0.4000, 0.4667, 0.4667, 0.5333, 0.6000, 0.6667, 0.6667, 0.7333, 0.8000, 0.8667, 0.8667, 0.9333, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000]) >>> Q.affine.quantize_dequantize(input, scale, offset, qmin=0, qmax=15) tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0667, 0.1333, 0.1333, 0.2000, 0.2667, 0.2667, 0.3333, 0.4000, 0.4667, 0.4667, 0.5333, 0.6000, 0.6667, 0.6667, 0.7333, 0.8000, 0.8667, 0.8667, 0.9333, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000]) """ qmin, qmax, block_size = _parse_args(args, kwargs) return get_backend().quantize_dequantize(tensor, scale, offset, qmin, qmax, block_size)
[docs]def dequantize(tensor: torch.Tensor, scale: torch.Tensor, offset: torch.Tensor, block_size: Optional[Tuple[int, ...]] = None): return get_backend().dequantize(tensor, scale, offset, block_size)
def _parse_args(args, kwargs) -> Tuple[int, int, Optional[Tuple[int, ...]]]: bitwidth = num_steps = signed = qmin = qmax = None block_size = kwargs.get('block_size') if len(args) == 2: bitwidth, signed = args elif len(args) == 1: bitwidth = args[0] signed = kwargs.get('signed', False) else: if 'bitwidth' in kwargs: bitwidth, signed = kwargs['bitwidth'], kwargs.get('signed', False) elif 'num_steps' in kwargs: num_steps, signed = kwargs['num_steps'], kwargs.get('signed', False) else: qmin, qmax = kwargs['qmin'], kwargs['qmax'] if bitwidth is not None: num_steps = 2 ** bitwidth - 1 if num_steps is not None: if signed: qmin = -math.ceil(num_steps/2) qmax = math.floor(num_steps/2) else: qmin = 0 qmax = num_steps assert qmin is not None assert qmax is not None return qmin, qmax, block_size