# -*- mode: python -*-
# =============================================================================
# @@-COPYRIGHT-START-@@
#
# Copyright (c) 2017-2018, Qualcomm Innovation Center, Inc. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its contributors
# may be used to endorse or promote products derived from this software
# without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# SPDX-License-Identifier: BSD-3-Clause
#
# @@-COPYRIGHT-END-@@
# =============================================================================
# pylint: disable=too-many-lines
""" Implementation of the SVD model compression technique for TensorFlow """
import os
from functools import reduce
import operator
from enum import Enum
import numpy as np
import tensorflow as tf
from aimet_tensorflow import graph_editor
from aimet_tensorflow.common import core, graph_eval
import aimet_common.libpymo as pymo
from aimet_common import statistics_util as stats_u
from aimet_common.utils import AimetLogger
logger = AimetLogger.get_area_logger(AimetLogger.LogAreas.Svd)
_SVD_TYPES = {'svd': pymo.TYPE_SINGLE,
'ssvd': pymo.TYPE_SUCCESSIVE}
_SVD_LAYER_TYPES = {'Conv2D': pymo.LAYER_TYPE_CONV,
'MatMul': pymo.LAYER_TYPE_FC}
_MIN_LAYER_DIM_FOR_SVD = 10
_SVD_SUPPORTED_LAYER_TYPES = ['Conv2D', 'MatMul']
class CostMetric(Enum):
""" Enumeration of metrics to measure cost of a model/layer """
mac = 1
memory = 2
class LayerAttributes:
""" Holds attributes for a given layer """
def __init__(self, layer_ref, cost, weight_shape):
"""
Constructor
:param layer_ref: Reference to the layer object in TensorFlow
:param cost: Cost of the layer
:param weight_shape: Shape of the output activation of the layer
"""
self.layer_ref = layer_ref
self.cost = cost
self.weight_shape = weight_shape
[docs]class Svd:
"""A class for performing singular value decomposition on a tensorflow model.
The Svd class enables model compression through singular value decomposition (SVD).
It can analyze convolution and fully connected layers and perform
some analysis to find the optimal ranks for balancing compression and the
accuracy of the network.
"""
# pylint: disable=too-many-instance-attributes
def __init__(self, graph, checkpoint, metric, output_file='./svd_graph', svd_type='svd',
num_layers=0, layers=None, layer_ranks=None, num_ranks=20, gpu=True, debug=False, no_evaluation=False,
layer_selection_threshold=0.6):
"""
Constructor for the Svd class
Constructs the Svd class from a set of options passed in at construction. The class takes
a number of named arguments which are detailed below.
:param graph: The file path to the meta graph.
:param checkpoint: The file path to the tensorflow checkpoint file.
:param metric: The metric to use for determining the optimal compression. Either
'mac' for optimizing compression to minimize multiplies and accumulates or 'memory' which
optimizes for overall memory footprint. Defaults to 'memory'
:param output_file: The file path for saving the compressed tensorflow graph.
aimet will save to the directory specified, using output_file as a filename prefix
:param svd_type: Indicates which algorithm should be used, either
'svd' or 'ssvd'. Defaults to 'svd'.
:param num_layers: The number of layers to compress. Defaults to '0' which uses a
heuristic to determine the optimal number of layers to compress.
:param layers: A list of op names to compress. All other layers will be ignored.
Overrides num_layers and sets it to the length of this list.
:param layer_ranks: required only if no_evaluation is set to True. A list of tuples to compress
layers specified in layers argument.
:param num_ranks: The number of ranks (compression_points) to evaluate for compression.
Defaults to 20. Value should be greater than 2.
:param gpu: Indicates if the algorithm should run on GPU or CPU. Defaults to GPU. To
use CPU set to false
:param debug: If true debug messages will be printed. Defaults to False.
:param no_evaluation: If true, ranks will be set manually from user. Defaults to False.
:param layer_selection_threshold: Threshold (0-1) to use to select the top layers in the network
:raises: ValueError: An error occurred processing one of the input parameters.
"""
# pylint: disable=too-many-arguments
self._sanity_check_constructor_parameters(layer_selection_threshold, layers, no_evaluation, num_layers,
num_ranks, svd_type)
self._gpu = gpu
self._debug = debug
self._default_meta_graph = graph
self._default_checkpoint = checkpoint
self._output_file = output_file
self._output_dir = os.path.dirname(output_file)
if not os.path.exists(self._output_dir):
os.makedirs(self._output_dir)
logger.info('Saving SVD output as: %s', output_file)
self.svd_type = _SVD_TYPES[svd_type]
self._metric = metric
self._num_layers = num_layers
self._selected_layers = []
self._networkCost = None
if layers:
logger.debug('Attempting to compress: %s', layers)
self._layers_to_compress = layers
else:
self._layers_to_compress = []
if num_ranks < 0:
raise ValueError("num_ranks must be >= 0")
self._num_ranks = num_ranks
if layer_ranks:
self._layer_ranks = layer_ranks
self._num_layer_ranks = len(layer_ranks)
logger.debug('Attempting to compress model with user provided ranks : %s', layer_ranks)
# Setup the SVD instance and load the graph
self._svd = pymo.GetSVDInstance()
self._no_eval = no_evaluation
self._layer_selection_threshold = layer_selection_threshold
self._model_performance_candidate_ranks = list()
# Todo: Need to look at these attributes and see how to handle them better
# Very likely these attributes don't need to be object attributes
self._generator = None
self._eval_names = None
self._eval_func = None
self._iterations = None
self._run_graph = None
self._baseline_perf = None
self._error_margin = None
self._compressible_ops = None
@staticmethod
def _sanity_check_constructor_parameters(layer_selection_threshold, layers, no_evaluation, num_layers,
num_ranks, svd_type):
if svd_type not in _SVD_TYPES:
raise ValueError('Invalid SVD mode: ' + svd_type)
if no_evaluation:
if not layers:
raise ValueError('Both layers and layer_rank parameters are needed for Manual mode')
if layer_selection_threshold < 0 or layer_selection_threshold > 1:
raise ValueError('Layer selection threshold should be between 0 and 1')
if not no_evaluation:
if num_ranks <= 2:
raise ValueError('Number of ranks should be greater than 2 for auto mode')
if num_layers < 0:
raise ValueError("num_layers must be >= 0")
def _compute_per_layer_compression_ratio(self, split_layers_shape, output_shape, original_layer_shape, op_type):
"""
Updates the per layer statistics
:param orig_layer: The layer before it was split
:param split_layers: List of split layers
:return: The compression ratio of split layers
"""
orig_layer_cost = self._compute_layer_cost(original_layer_shape, output_shape, op_type)
split_layers_mem_cost = 0
split_layers_mac_cost = 0
for layer_shape in split_layers_shape:
mem_cost, mac_cost = self._compute_layer_cost(layer_shape, output_shape, op_type)
if not isinstance(mem_cost, int):
mem_cost = mem_cost.value
if not isinstance(mac_cost, int):
mac_cost = mac_cost.value
split_layers_mem_cost += mem_cost
split_layers_mac_cost += mac_cost
if self._metric is CostMetric.memory:
savings = orig_layer_cost[0] - split_layers_mem_cost
ratio = savings / orig_layer_cost[0]
logger.debug('Original Layer Cost: %s Memory Compression Ratio: %s', orig_layer_cost[0], ratio)
else:
savings = orig_layer_cost[1] - split_layers_mac_cost
ratio = savings / orig_layer_cost[1]
logger.debug('Original Layer Cost: %s MAC Compression Ratio: %s', orig_layer_cost[1], ratio)
return ratio
@staticmethod
def _reset_session(sess):
"""
Reset the given tf.compat.v1.Session
:param sess: tf.compat.v1.Session
:return: None
"""
tf.compat.v1.reset_default_graph()
sess.close()
@staticmethod
def _load_graph(graph, meta_graph, checkpoint):
"""
Load a graph and checkpoint and create a new tf.compat.v1.Session
:param graph: TF graph
:param meta_graph: Meta file
:param checkpoint: Checkpoint file
:return: Newly created session
"""
logger.info('Loading graph: %s', meta_graph)
sess = tf.compat.v1.Session(graph=graph)
# Open the graph and retore the parameters
saver = tf.compat.v1.train.import_meta_graph(meta_graph)
saver.restore(sess, checkpoint)
return sess, saver
@staticmethod
def _get_layer_type(op):
"""
Converts TF layer types into corresponding PyMo layer enumerated values
:param op: TF op
:return: PyMo enumerated value corresponding to the type of op
"""
if op.type in _SVD_LAYER_TYPES:
return _SVD_LAYER_TYPES[op.type]
return pymo.LAYER_TYPE_OTHER
class LayerSelectionScheme(Enum):
""" Enumeration of schemes supported to select layers for SVD compression """
manual = 1
top_n_layers = 2
top_x_percent = 3
@staticmethod
def _pick_compression_layers(sess, cost_metric, layer_select_scheme, **kwargs):
"""
Pick layers for SVD compression given parameters
:param sess: tf.compat.v1.Session
:param cost_metric: Metric to use for evaluating layer cost (either in terms of memory or mac)
:param layer_select_scheme: Layer selection scheme to use
:param kwargs: Keyword arguments that depend on which layer selection scheme is specified
top_n_layers:: num_layers: Number of layers to pick
top_x_percent:: percent_thresh: Top layers up to this parameter will be selected
manual:: layers_to_compress: List of layers (names) to compress
:return:
"""
# pylint: disable=too-many-locals,too-many-branches
if not isinstance(cost_metric, CostMetric):
raise TypeError("cost_metric is not of type CostMetric")
if not isinstance(layer_select_scheme, Svd.LayerSelectionScheme):
raise TypeError("layer_selection_scheme is not of type Svd.LayerSelectionScheme")
# Find all compressible ops
query = core.OpQuery(sess.graph)
compressible_ops = query.get_weight_ops()
compressible_ops = [op for op in compressible_ops if op.type in _SVD_SUPPORTED_LAYER_TYPES]
layer_attributes_list = Svd._create_layer_attributes_list(compressible_ops, sess)
network_cost = Svd._compute_network_cost(layer_attributes_list)
# Heuristic1: Reject any ops whose param shape does not meet a base criterion
pruned_list = []
for layer_attributes in layer_attributes_list:
h, w, n, c = layer_attributes.weight_shape
if (n >= _MIN_LAYER_DIM_FOR_SVD) and ((c * h * w) >= _MIN_LAYER_DIM_FOR_SVD):
pruned_list.append(layer_attributes)
else:
print("Pruning out {}: shape is {}".format(layer_attributes.layer_ref.name,
layer_attributes.weight_shape))
# Reset layer_attributes_list for the next phase
layer_attributes_list = pruned_list
pruned_list = []
# Sort the attribute list based on cost
if cost_metric == CostMetric.memory:
layer_attributes_list.sort(key=lambda x: x.cost[0], reverse=True)
else:
layer_attributes_list.sort(key=lambda x: x.cost[1], reverse=True)
if layer_select_scheme == Svd.LayerSelectionScheme.top_n_layers:
num_layers = kwargs['num_layers']
pruned_list = layer_attributes_list[:num_layers]
elif layer_select_scheme == Svd.LayerSelectionScheme.top_x_percent:
percent_thresh = kwargs['percent_thresh']
accum_cost = 0.
total_cost = network_cost[0] if (cost_metric == CostMetric.memory) else network_cost[1]
for layer in layer_attributes_list:
cost = layer.cost[0] if (cost_metric == CostMetric.memory) else layer.cost[1]
if (100 * (cost + accum_cost)/total_cost) < percent_thresh:
pruned_list.append(layer)
accum_cost += cost
elif layer_select_scheme == Svd.LayerSelectionScheme.manual:
layers_to_compress = kwargs['layers_to_compress']
for layer in layer_attributes_list:
if layer.layer_ref.name in layers_to_compress:
pruned_list.append(layer)
if not pruned_list:
raise RuntimeError('No suitable layers found in the model.')
return pruned_list, network_cost
@staticmethod
def _create_layer_attributes_list(ops_to_use, sess):
"""
Creates list of layer attributes given a set of TF ops
:param ops_to_use: TF ops to collect layer attributes for
:param sess: tf.compat.v1.Session to use
:return: Created list of layer attributes
"""
query = core.OpQuery(sess.graph)
layer_attributes_list = []
for op in ops_to_use:
weight_shape = query.get_weights_for_op(op).eval(session=sess).shape
if op.type == 'MatMul':
n, c = weight_shape
weight_shape = (1, 1, n, c)
output_dims = op.outputs[0].shape
cost = Svd._compute_layer_cost(weight_shape, output_dims, op.type)
layer_attributes_list.append(LayerAttributes(op, cost, weight_shape))
return layer_attributes_list
@staticmethod
def _compute_network_cost(layer_attributes_list):
"""
Compute aggregate cost of the layers included in the layer attributes list
:param layer_attributes_list: List of layer attributes
:return: Computed cost
"""
mac_cost = 0
mem_cost = 0
for layer_attributes in layer_attributes_list:
op_mem_cost, op_mac_cost = layer_attributes.cost
mem_cost += op_mem_cost
mac_cost += op_mac_cost
return mem_cost, mac_cost
@staticmethod
def _compute_layer_cost(weights_shape, output_dims, op_type):
"""
Compute cost of a layer
:param weights_shape: Shape of the weights of this layer
:param output_dims: Shape of the output of this layer
:param op_type: Type of this TF op
:return: Computed layer cost
"""
# for outputs, TF uses dims [N,H,W,C]
mem_cost = reduce(operator.mul, weights_shape)
if op_type == 'Conv2D':
mac_cost = mem_cost * int(output_dims[1]) * int(output_dims[2])
elif op_type == 'MatMul':
mac_cost = mem_cost
return mem_cost, mac_cost
def _compute_compression_ratio(self, sess, cost_metric):
"""
Compute compression ratio
:param sess: tf.compat.v1.Session
:return: Computed compression ratio
"""
query = core.OpQuery(sess.graph)
compressible_ops = query.get_weight_ops()
compressible_ops = [op for op in compressible_ops if op.type in _SVD_SUPPORTED_LAYER_TYPES]
layer_attributes_list = Svd._create_layer_attributes_list(compressible_ops, sess)
selected_layers_ops = [layer.layer_ref.name for layer in self._selected_layers]
layer_attributes_list = [layer for layer in layer_attributes_list if layer.layer_ref.name not in selected_layers_ops]
compressed_network_cost = Svd._compute_network_cost(layer_attributes_list)
if cost_metric is CostMetric.memory:
savings = self._networkCost[0] - compressed_network_cost[0]
ratio = savings/self._networkCost[0]
else:
savings = self._networkCost[1] - compressed_network_cost[1]
ratio = savings/self._networkCost[1]
return ratio
def _store_net_stats(self, sess):
"""
Store layer attributes in the PyMo library instance
:param sess: tf.compat.v1.Session
:return: None
"""
# pylint: disable=too-many-locals,too-many-branches,too-many-statements
if self._metric == CostMetric.memory:
pymo_metric = pymo.COST_TYPE_MEMORY
else:
pymo_metric = pymo.COST_TYPE_MAC
self._svd.SetCostMetric(pymo_metric)
# Layer-selection
if self._layers_to_compress:
selected_layers, network_cost = self._pick_compression_layers(sess,
self._metric,
self.LayerSelectionScheme.manual,
layers_to_compress=self._layers_to_compress)
elif self._num_layers > 0:
selected_layers, network_cost = self._pick_compression_layers(sess,
self._metric,
self.LayerSelectionScheme.top_n_layers,
num_layers=self._num_layers)
else:
percent_thresh = self._layer_selection_threshold * 100
selected_layers, network_cost = self._pick_compression_layers(sess,
self._metric,
self.LayerSelectionScheme.top_x_percent,
percent_thresh=percent_thresh)
self._networkCost = network_cost
print("Selected Layers:")
for layer in selected_layers:
print(layer.layer_ref.name)
self._selected_layers = selected_layers
# Get the op query module and query for all Conv/FC layers
query = core.OpQuery(sess.graph)
self._compressible_ops = query.get_weight_ops()
# Set up the layer attributes for each Conv/FC layer (this also checks for trailing
# bias adds
for i, op in enumerate(self._compressible_ops):
# If op is not a selected layer, skip
if not any(op is layer.layer_ref for layer in selected_layers):
continue
attr = pymo.LayerAttributes()
layerName = op.name
output_dims = op.outputs[0].shape # TF uses dims [N,H,W,C]
attr.layerType = self._get_layer_type(op)
if self.svd_type == pymo.TYPE_SINGLE:
attr.mode = self._svd.GetCompressionType(attr.layerType, 'single')
else:
attr.mode = self._svd.GetCompressionType(attr.layerType, 'successive')
if op.type == 'Conv2D' or op.type == 'MatMul':
logger.info('Setting layer attributes for: %s', layerName+'('+op.type+')')
# Get weights
weights = query.get_weights_for_op(op).eval(session=sess)
w_shape = weights.shape
logger.debug('Got weight shape: %s', w_shape)
# Check for bias op
bias = None
if (i+1) < len(self._compressible_ops):
bias = query.get_bias_for_op(self._compressible_ops[i+1])
if bias is not None:
bias = bias.eval(session=sess)
logger.debug('Got %s w/bias. Shape: %s', op.type, str(bias.shape))
if op.type == 'Conv2D':
attr.shape = [w_shape[3], w_shape[2], w_shape[0], w_shape[1]] # TF Conv weight order [KH,KW,ID,OD]
attr.activation_dims = (output_dims[1], output_dims[2]) # (H,W)
# CONV weights are stored in the order {H,W,I,O} in Tensorflow
# Re-order them to the form {O,I,H,W}
weights = np.transpose(weights, (3, 2, 0, 1))
elif op.type == 'MatMul':
attr.shape = [w_shape[1], w_shape[0], 1, 1] # TF FC weight order [ID,OD], SVD expects [OD,ID]
attr.activation_dims = (1, 1)
weights = np.transpose(weights, (1, 0))
# blobs is a numpy array... add to list then set
params = [weights.flatten()]
if bias is not None:
params.append(bias.flatten())
attr.blobs = params
# Save the attributes for this layer
self._svd.StoreLayerAttributes(layerName, attr)
def _compute_objective_score(self, model_perf, compression_score):
"""
Compute objective score of a given compression model
:param model_perf: Performance of compressed model
:param compression_score: Compression ratio
:return: Computed objective score
"""
if model_perf + (self._error_margin / 100) >= self._baseline_perf:
objective_score = 1 - model_perf + (1 - compression_score)
else:
objective_score = 1 + (1 - compression_score) # treat lower accuracies as 0
return objective_score
def _split_conv_layer(self, sess, svd_ranks, attr, op_name, bias_op_name=None):
"""
Split a given conv layer given a rank
:param sess: tf.compat.v1.Session
:param svd_ranks: Rank to split the layer with (two ranks in case of SSVD)
:param attr: Reference to the corresponding layer attribute
:param op_name: Name of the op to split
:param bias_op_name: Name of the corresponding bias op (if any)
:return: None
"""
# pylint: disable=too-many-statements,too-many-branches,too-many-locals
logger.info('Splitting conv op: %s', op_name)
# Retrieve the op(s) from the current graph
op = sess.graph.get_operation_by_name(op_name)
bias_op = None
if bias_op_name:
bias_op = sess.graph.get_operation_by_name(bias_op_name)
# Create new 'conv_a' layer
pad_mode = op.get_attr('padding')
data_format = op.get_attr('data_format').decode('utf-8')
strides = op.get_attr('strides')
# Print current conv weight shape
query = core.OpQuery(sess.graph)
w_shape = query.get_weights_for_op(op).get_shape().as_list()
logger.debug('Original %s weight shape: %s', op.name, str(w_shape))
split_weights, weight_sizes = [], []
split_biases, bias_sizes = [], []
# TF weights are in [H,W,I,O] order. We must reshape the split weights to SVD format [O,I,H,W]
# and then transpose back
# Conv a weights are: [1, 1, w_shape[2], svd_ranks[0]]
split_conv_a_w_shape = (svd_ranks[0], w_shape[2], 1, 1)
conv_a_weights = np.zeros(split_conv_a_w_shape) # transpose(2,3,1,0)
split_weights.append(conv_a_weights.flatten().tolist())
weight_sizes.append(conv_a_weights.size)
if bias_op:
conv_a_bias = np.zeros(svd_ranks[0])
split_biases.append(conv_a_bias.flatten().tolist())
bias_sizes.append(conv_a_bias.size)
num_filters = w_shape[3]
if len(svd_ranks) >= 2 and attr.mode == pymo.TYPE_SUCCESSIVE:
# Output channels = output_rank (s)
num_filters = svd_ranks[1]
# Conv b weights are: [w_shape[0],w_shape[1],svd_ranks[0],num_filters]
split_conv_b_w_shape = (num_filters, svd_ranks[0], w_shape[0], w_shape[1])
conv_b_weights = np.zeros(split_conv_b_w_shape)
conv_b_bias = np.zeros(num_filters)
split_weights.append(conv_b_weights.flatten().tolist())
weight_sizes.append(conv_b_weights.size)
if bias_op:
split_biases.append(conv_b_bias.flatten().tolist())
bias_sizes.append(conv_b_bias.size)
# Only create a third conv layer when performing successive SVD
if len(svd_ranks) >= 2 and attr.mode == pymo.TYPE_SUCCESSIVE:
# Conv c weights are: [1,1,num_filters,w_shape[3]]
split_conv_c_w_shape = (w_shape[3], num_filters, 1, 1)
conv_c_weights = np.zeros(split_conv_c_w_shape)
conv_c_bias = np.zeros(w_shape[3])
split_weights.append(conv_c_weights.flatten().tolist())
weight_sizes.append(conv_c_weights.size)
if bias_op:
split_biases.append(conv_c_bias.flatten().tolist())
bias_sizes.append(conv_c_bias.size)
# Split the weights and biases according to the number of layers and ranks
split_weights = self._svd.SplitLayerWeights(op.name, split_weights, weight_sizes, svd_ranks)
split_biases = self._svd.SplitLayerBiases(op.name, split_biases, bias_sizes, svd_ranks)
if split_weights:
conv_a_name = op.name+'_a'
conv_a_weights = np.array(split_weights[0]).reshape(split_conv_a_w_shape).transpose(2, 3, 1, 0)
conv_a_w = tf.Variable(initial_value=conv_a_weights, name=conv_a_name+'_w', dtype=tf.float32)
logger.debug('%s weight shape: %s', conv_a_name, str(conv_a_weights.shape))
# Create conv_a using default strides (1,1)
# pylint: disable=no-member
conv_acts = tf.nn.conv2d(op.inputs[0], conv_a_w, strides=[1, 1, 1, 1], data_format=data_format,
padding=pad_mode, name=op.name+'_a') # dilation_rate=dilation_rate
if bias_op:
conv_a_bias = tf.Variable(initial_value=split_biases[0], name=conv_a_name+'_bias', dtype=tf.float32)
conv_acts = conv_acts + conv_a_bias # tf.nn.bias_add(conv_acts, split_biases[0])
if len(split_weights) > 1:
# Create conv_b
conv_b_name = op.name+'_b'
conv_b_weights = np.array(split_weights[1]).reshape(split_conv_b_w_shape).transpose(2, 3, 1, 0)
conv_b_w = tf.Variable(initial_value=conv_b_weights, name=conv_b_name+'_w', dtype=tf.float32)
logger.debug('%s weight shape: %s', conv_b_name, str(conv_b_weights.shape))
# pylint: disable=no-member
conv_acts = tf.nn.conv2d(conv_acts, conv_b_w, strides=strides, data_format=data_format, padding=pad_mode, name=conv_b_name) #dilation_rate=dilation_rate
if bias_op:
conv_b_bias = tf.Variable(initial_value=split_biases[1], name=conv_b_name+'_bias', dtype=tf.float32)
conv_acts = conv_acts + conv_b_bias # tf.nn.bias_add(conv_acts, split_biases[1])
ratio = self._compute_per_layer_compression_ratio([conv_a_w.shape, conv_b_w.shape], conv_acts.shape, w_shape, "Conv2D")
# Only create a third conv layer when performing successive SVD
if len(split_weights) > 2 and len(svd_ranks) >= 2 and attr.mode == pymo.TYPE_SUCCESSIVE:
# Create conv_c, using default strides (1,1)
conv_c_name = op.name+'_c'
conv_c_weights = np.array(split_weights[2]).reshape(split_conv_c_w_shape).transpose(2, 3, 1, 0)
conv_c_w = tf.Variable(initial_value=conv_c_weights, name=conv_c_name+'_w', dtype=tf.float32)
logger.debug('%s weight shape: %s', conv_c_name, str(conv_c_weights.shape))
# pylint: disable=no-member
conv_acts = tf.nn.conv2d(conv_acts, conv_c_w, strides=[1, 1, 1, 1], data_format=data_format,
padding=pad_mode, name=conv_c_name)
if bias_op:
conv_c_bias = tf.Variable(initial_value=split_biases[2], name=conv_c_name+'_bias', dtype=tf.float32)
conv_acts = conv_acts + conv_c_bias # tf.nn.bias_add(conv_acts, split_biases[2])
consumers = []
rerouted_inputs = [bias_op.outputs[0]] if bias_op else [op.outputs[0]]
for inp in rerouted_inputs:
for consumer in inp.consumers():
consumers.append(consumer)
_ = graph_editor.reroute_ts(conv_acts, rerouted_inputs, can_modify=consumers)
return ratio
def _split_fc_layer(self, sess, svd_ranks, op_name, bias_op_name=None):
"""
Split a given conv layer given a rank
:param sess: tf.compat.v1.Session
:param svd_ranks: Rank to split the layer with (two ranks in case of SSVD)
:param op_name: Name of the op to split
:param bias_op_name: Name of the corresponding bias op (if any)
:return: None
"""
# pylint: disable=too-many-statements, too-many-locals
logger.info('Splitting fully connected op: %s', op_name)
# Retrieve the op(s) from the current graph
op = sess.graph.get_operation_by_name(op_name)
bias_op = None
if bias_op_name:
bias_op = sess.graph.get_operation_by_name(bias_op_name)
# Print current conv weight shape
query = core.OpQuery(sess.graph)
w_shape = query.get_weights_for_op(op).get_shape().as_list()
logger.debug('Original %s weight shape: %s', op.name, str(w_shape))
split_weights, weight_sizes = [], []
split_biases, bias_sizes = [], []
# FC weights are: [w_shape[2],svd_ranks[0]] in [I,O] order.
# We must reshape the split weights to SVD format [O,I] and then transpose to NHWC
split_fc_a_w_shape = (svd_ranks[0], w_shape[0])
fc_a_weights = np.zeros(split_fc_a_w_shape)
fc_a_bias = np.zeros(svd_ranks[0])
split_weights.append(fc_a_weights.flatten().tolist())
weight_sizes.append(fc_a_weights.size)
if bias_op:
split_biases.append(fc_a_bias.flatten().tolist())
bias_sizes.append(fc_a_bias.size)
# FC b weights are: [svd_ranks[0],num_filters] in [H,W,I,O] order.
# We must reshape the split weights to SVD format [O,I,H,W] and then transpose to NHWC
split_fc_b_w_shape = (w_shape[1], svd_ranks[0])
fc_b_weights = np.zeros(split_fc_b_w_shape)
split_weights.append(fc_b_weights.flatten().tolist())
weight_sizes.append(fc_b_weights.size)
if bias_op:
fc_b_bias = np.zeros(w_shape[1])
split_biases.append(fc_b_bias.flatten().tolist())
bias_sizes.append(fc_b_bias.size)
# Split the weights and biases according to the number of layers and ranks
split_weights = self._svd.SplitLayerWeights(op.name, split_weights, weight_sizes, svd_ranks)
split_biases = self._svd.SplitLayerBiases(op.name, split_biases, bias_sizes, svd_ranks)
if split_weights:
fc_a_name = op.name+'_a'
fc_a_weights = np.array(split_weights[0]).reshape(split_fc_a_w_shape).transpose(1, 0)
fc_a_w = tf.Variable(initial_value=fc_a_weights, name=fc_a_name+'_w', dtype=tf.float32)
logger.debug('%s weight shape: %s', fc_a_name, str(fc_a_weights.shape))
# Create fc_a using default strides (1,1)
fc_acts = tf.matmul(op.inputs[0], fc_a_w, name=fc_a_name)
if bias_op:
fc_a_bias = tf.Variable(initial_value=split_biases[0], name=fc_a_name+'_bias', dtype=tf.float32)
fc_acts = fc_acts + fc_a_bias
if len(split_weights) > 1:
# Create fc_b
fc_b_name = op.name+'_b'
fc_b_weights = np.array(split_weights[1]).reshape(split_fc_b_w_shape).transpose(1, 0)
fc_b_w = tf.Variable(initial_value=fc_b_weights, name=fc_b_name+'_w', dtype=tf.float32)
logger.debug('%s weight shape: %s', fc_b_name, str(fc_b_weights.shape))
fc_acts = tf.matmul(fc_acts, fc_b_w, name=fc_b_name)
if bias_op:
fc_b_bias = tf.Variable(initial_value=split_biases[1], name=fc_b_name+'_bias', dtype=tf.float32)
fc_acts = fc_acts + fc_b_bias
ratio = self._compute_per_layer_compression_ratio([fc_a_w.shape, fc_b_w.shape], fc_acts.shape, w_shape, 'MatMul')
consumers = []
rerouted_inputs = [bias_op.outputs[0]] if bias_op else [op.outputs[0]]
for inp in rerouted_inputs:
for consumer in inp.consumers():
consumers.append(consumer)
_ = graph_editor.reroute_ts(fc_acts, rerouted_inputs, can_modify=consumers)
return ratio
def _split_layers(self, sess, rank_index, use_best_ranks):
"""
Split all the selected layers given a rank index
:param sess: tf.compat.v1.Session
:param rank_index: Rank index to use for finding the ranks
:param use_best_ranks: Use the best rank index (for final compressed network)
:return: None
"""
layer_stats = list()
for i, op in enumerate(self._compressible_ops):
# If op is not a selected layer, skip
if not any(op is layer.layer_ref for layer in self._selected_layers):
continue
# Bias is taken care of as part of the Conv/FC op
if op.type in ['Add', 'BiasAdd']:
continue
# Get the stored attributes for this op
attr = self._svd.GetLayerAttributes(op.name)
if not attr:
raise RuntimeError("Layer attributes not available for layer"+op.name)
if use_best_ranks:
svd_ranks = attr.bestRanks
else:
svd_ranks = self._svd.GetCandidateRanks(op.name, rank_index)
if svd_ranks:
bias_op = None
if i+1 < len(self._compressible_ops):
bias_op = self._compressible_ops[i+1]
bias_op = bias_op.name if bias_op.type in ['Add', 'BiasAdd'] else None
if op.type in ['Conv2D']:
ratio = self._split_conv_layer(sess, svd_ranks, attr, op.name, bias_op)
elif op.type in ['MatMul']:
ratio = self._split_fc_layer(sess, svd_ranks, op.name, bias_op)
per_layer_stats = stats_u.SvdStatistics.PerSelectedLayer(op.name, svd_ranks, ratio)
layer_stats.append(per_layer_stats)
return layer_stats
def _create_compressed_network(self, sess, rank_index, use_best_ranks):
"""
Create a compressed network for a given rank index
:param sess: tf.compat.v1.Session
:param rank_index: Rank index to use for finding the ranks
:param use_best_ranks: Use the best rank index (for final compressed network)
:return: None
"""
# Split the network layers and update the connections
per_layer_stats = self._split_layers(sess, rank_index, use_best_ranks)
return per_layer_stats
def _perform_rank_selection(self):
"""
Perform rank selection procedure
:return: None
"""
# pylint: disable=too-many-locals
stats_per_rank_index = list()
self._svd.ComputeNetworkCost()
self._num_ranks = self._svd.SetCandidateRanks(self._num_ranks)
if not self._num_ranks:
raise RuntimeError('No good candidate ranks found for compressing specified layers.')
# Ranks are in order from least compression to highest
best_index = -1
optimal_score = 0.0
for rank_index in range(self._num_ranks):
g = tf.Graph()
with g.as_default():
# Create a new network for each rank_index
self._svd.PrintCandidateRanks(rank_index, False)
# Load the default graph so we are operating on a fresh copy of the original graph
sess, saver = self._load_graph(g, self._default_meta_graph, self._default_checkpoint)
per_layer_stats = self._create_compressed_network(sess, rank_index, False)
# Save the temp model
output_file = os.path.join(self._output_dir, 'svd_rank_index_' + str(rank_index))
self._save_graph(sess, saver, output_file)
# Reset the session and start a new graph for loading the compressed model
self._reset_session(sess)
g = tf.Graph()
with g.as_default():
# In TF after making changes to the graph you must save and reload, then evaluate
sess, saver = self._load_graph(g, output_file+'.meta', output_file)
model_perf = self._run_graph(sess, self._generator, self._eval_names, self._eval_func, self._iterations)
logger.info('%s performance: %s', output_file, str(model_perf))
self._model_performance_candidate_ranks.append(model_perf * 100)
# Estimate relative compression score for this rank_index
compression_score = self._compute_compression_ratio(sess, self._metric)
objective_score = self._compute_objective_score(model_perf, compression_score)
rank_data = stats_u.SvdStatistics.PerRankIndex(rank_index=rank_index, model_accuracy=model_perf,
model_compression_ratio=compression_score,
layer_stats_list=per_layer_stats)
stats_per_rank_index.append(rank_data)
logger.info('Compressed network with rank_index %i/%i: accuracy = %f percent '
'with %f percent compression (%r option) and an objective score of %f',
rank_index, self._num_ranks, model_perf * 100, compression_score * 100,
self._metric, objective_score)
if rank_index == 0:
optimal_score = objective_score
logger.info('Initializing objective score to %f at rank index %i', optimal_score, rank_index)
if model_perf + self._error_margin/100 < self._baseline_perf:
logger.info('Model performance %f falls below %f percent of baseline performance %f'
' Ending rank selection', model_perf, self._error_margin, self._baseline_perf)
break
else:
if objective_score <= optimal_score:
optimal_score = objective_score
logger.info('Found a better value for the objective score %f at rank_index %i',
optimal_score, rank_index)
best_index = rank_index
if best_index != -1:
self._svd.StoreBestRanks(best_index)
memory_compression_ratio = self._compute_compression_ratio(sess, CostMetric.memory)
mac_compression_ratio = self._compute_compression_ratio(sess, CostMetric.mac)
stats = stats_u.SvdStatistics(self._baseline_perf, model_perf, self._metric, best_index,
mem_comp_ratio=memory_compression_ratio, mac_comp_ratio=mac_compression_ratio,
rank_stats_list=stats_per_rank_index)
# close the session and reset the default graph
self._reset_session(sess)
return stats
# close the session and reset the default graph
self._reset_session(sess)
raise RuntimeError('No suitable ranks found to compress model within defined error bounds.')
def manual_rank_svd(self):
"""
Set provided ranks in the PyMo library
:return: None
"""
# Store total net cost
self._svd.ComputeNetworkCost()
# Ensure proper layer names are provided in no_eval mode
if not self._layer_ranks:
raise ValueError('Layer names MUST be specified in no_eval mode.')
# Ensure layer_ranks is in list of tuples format
if not all(isinstance(item, tuple) for item in self._layer_ranks):
raise ValueError('layer_ranks should be in list of tuples format for both SVD and SSVD')
# Check number of input ranks match with number of input layers
if len(self._layers_to_compress) != self._num_layer_ranks:
raise ValueError('Number of Input SVD ranks does not match number of layers.')
for layer_name, rank in zip(self._layers_to_compress, self._layer_ranks):
rank_list = list()
rank_list.append(rank[1])
if self.svd_type == _SVD_TYPES['ssvd']:
rank_list.append(rank[1])
self._svd.StoreBestRanks(layer_name, rank_list)
stats = self._stats_for_manual_rank_svd()
return stats
@staticmethod
def _save_graph(sess, saver, output_graph):
"""
Utility function to save a graph
:param sess: tf.compat.v1.Session
:param saver: TF save
:param output_graph: Filename and path for saving the output
:return:
"""
logger.info('Saving graph: %s', output_graph)
saver.save(sess, output_graph)
_ = tf.compat.v1.summary.FileWriter(os.path.dirname(output_graph)+"/models", sess.graph)
def _save_compressed_network(self):
"""
Create and save a compressed network (using the best ranks identified)
:return:
"""
logger.info('Saving final compressed network')
g = tf.Graph()
with g.as_default():
sess, saver = self._load_graph(g, self._default_meta_graph, self._default_checkpoint)
per_layer_stats = self._create_compressed_network(sess, 0, True)
# Save the final network
self._save_graph(sess, saver, self._output_file)
self._reset_session(sess)
return per_layer_stats
def _stats_for_manual_rank_svd(self):
per_layer_stats = self._save_compressed_network()
g = tf.Graph()
with g.as_default():
# Load and evaluate the final network
sess, _ = self._load_graph(g, self._output_file+'.meta', self._output_file)
model_perf = self._run_graph(sess, self._generator, self._eval_names, self._eval_func, self._iterations)
logger.info('%s performance: %s', self._output_file, str(model_perf))
# Estimate relative compression score for this rank_index
self._svd.PrintCandidateRanks(0, True)
# Estimate relative compression score for this rank_index
compression_score = self._compute_compression_ratio(sess, self._metric)
logger.info('Evaluating final model using layer(s): %s. '
'Final accuracy = %f percent with %f percent compression (%r option).',
self._eval_names, model_perf*100, compression_score*100, self._metric)
memory_compression_ratio = self._compute_compression_ratio(sess,
CostMetric.memory)
mac_compression_ratio = self._compute_compression_ratio(sess,
CostMetric.mac)
rank_data = stats_u.SvdStatistics.PerRankIndex(rank_index=0, model_accuracy=model_perf,
model_compression_ratio=compression_score,
layer_stats_list=per_layer_stats)
rank_data_list = list()
rank_data_list.append(rank_data)
stats = stats_u.SvdStatistics(self._baseline_perf, model_perf, self._metric, 0,
mem_comp_ratio=memory_compression_ratio,
mac_comp_ratio=mac_compression_ratio,
rank_stats_list=rank_data_list)
return stats
[docs] def compress_net(self, generator, eval_names=None, run_graph=graph_eval.evaluate_graph,
eval_func=graph_eval.default_eval_func, error_margin=2, iterations=100):
"""
Compresses the network using SVD
Runs rank selection on the network, and compresses it using the method and parameters
passed during construction of the Svd object.
:param generator: The generator which should be used for generating data for quantization
:param eval_names: The list of names to use for calculating model performance
:param run_graph: The function to use for running data through the graph and evaluating
the network's performance. This function must return only a single number representing the
avg performance of the model over the dataset batches.
See the 'graph_eval' module's 'evaluate_graph' function for the prototype
:param eval_func: The function to use for evaluating the network performance. This function should always
return a single number that can be used for comparing different graph's performance.
(The default is accuracy)
:param error_margin: The acceptable degradation in network accuracy from the original.
1 for 1% drop, etc. Defaults to 2%.
:param iterations: The number of iterations (data batches) to run through the network for analysis
:return: An object containing compression statistics
:raises: - ValueError: An invalid parameter was passed
- RuntimeError: An error occurred analyzing or compressing the network. The associated error
and other information will be returned with the error.
"""
self._generator = generator
if not eval_names:
eval_names = ['accuracy']
self._eval_names = eval_names
self._run_graph = run_graph
self._eval_func = eval_func
if error_margin <= 0:
raise ValueError('Invalid error_margin: '+str(error_margin)+'. Must pass error_margin > 0')
self._error_margin = error_margin
if iterations <= 0:
raise ValueError('Invalid iterations: '+str(iterations)+'. Number of iterations must be > 0')
self._iterations = iterations
# Get baseline accuracy, then store the network stats
g = tf.Graph()
with g.as_default():
sess, _ = self._load_graph(g, self._default_meta_graph, self._default_checkpoint)
self._baseline_perf = run_graph(sess, generator, eval_names, eval_func, iterations)
logger.info('Baseline performance: %f', self._baseline_perf)
self._store_net_stats(sess)
self._reset_session(sess)
if self._no_eval:
# Set Manual rank
stats = self.manual_rank_svd()
else:
# Perform rank selection
stats = self._perform_rank_selection()
self._save_compressed_network()
return stats