Source code for aimet_tensorflow.svd

# -*- mode: python -*-
# =============================================================================
#  Copyright (c) 2017-2018, Qualcomm Innovation Center, Inc. All rights reserved.
#  Redistribution and use in source and binary forms, with or without
#  modification, are permitted provided that the following conditions are met:
#  1. Redistributions of source code must retain the above copyright notice,
#     this list of conditions and the following disclaimer.
#  2. Redistributions in binary form must reproduce the above copyright notice,
#     this list of conditions and the following disclaimer in the documentation
#     and/or other materials provided with the distribution.
#  3. Neither the name of the copyright holder nor the names of its contributors
#     may be used to endorse or promote products derived from this software
#     without specific prior written permission.
#  SPDX-License-Identifier: BSD-3-Clause
# =============================================================================

# pylint: disable=too-many-lines

""" Implementation of the SVD model compression technique for TensorFlow """

import os
from functools import reduce
import operator
from enum import Enum
import numpy as np
import tensorflow as tf

from aimet_tensorflow import graph_editor
from aimet_tensorflow.common import core, graph_eval
import aimet_common.libpymo as pymo
from aimet_common import statistics_util as stats_u
from aimet_common.utils import AimetLogger

logger = AimetLogger.get_area_logger(AimetLogger.LogAreas.Svd)

_SVD_TYPES = {'svd': pymo.TYPE_SINGLE,
              'ssvd': pymo.TYPE_SUCCESSIVE}
                    'MatMul': pymo.LAYER_TYPE_FC}


class CostMetric(Enum):
    """ Enumeration of metrics to measure cost of a model/layer """
    mac = 1
    memory = 2

class LayerAttributes:
    """ Holds attributes for a given layer """

    def __init__(self, layer_ref, cost, weight_shape):
        :param layer_ref: Reference to the layer object in TensorFlow
        :param cost: Cost of the layer
        :param weight_shape: Shape of the output activation of the layer
        self.layer_ref = layer_ref
        self.cost = cost
        self.weight_shape = weight_shape

[docs]class Svd: """A class for performing singular value decomposition on a tensorflow model. The Svd class enables model compression through singular value decomposition (SVD). It can analyze convolution and fully connected layers and perform some analysis to find the optimal ranks for balancing compression and the accuracy of the network. """ # pylint: disable=too-many-instance-attributes def __init__(self, graph, checkpoint, metric, output_file='./svd_graph', svd_type='svd', num_layers=0, layers=None, layer_ranks=None, num_ranks=20, gpu=True, debug=False, no_evaluation=False, layer_selection_threshold=0.6): """ Constructor for the Svd class Constructs the Svd class from a set of options passed in at construction. The class takes a number of named arguments which are detailed below. :param graph: The file path to the meta graph. :param checkpoint: The file path to the tensorflow checkpoint file. :param metric: The metric to use for determining the optimal compression. Either 'mac' for optimizing compression to minimize multiplies and accumulates or 'memory' which optimizes for overall memory footprint. Defaults to 'memory' :param output_file: The file path for saving the compressed tensorflow graph. aimet will save to the directory specified, using output_file as a filename prefix :param svd_type: Indicates which algorithm should be used, either 'svd' or 'ssvd'. Defaults to 'svd'. :param num_layers: The number of layers to compress. Defaults to '0' which uses a heuristic to determine the optimal number of layers to compress. :param layers: A list of op names to compress. All other layers will be ignored. Overrides num_layers and sets it to the length of this list. :param layer_ranks: required only if no_evaluation is set to True. A list of tuples to compress layers specified in layers argument. :param num_ranks: The number of ranks (compression_points) to evaluate for compression. Defaults to 20. Value should be greater than 2. :param gpu: Indicates if the algorithm should run on GPU or CPU. Defaults to GPU. To use CPU set to false :param debug: If true debug messages will be printed. Defaults to False. :param no_evaluation: If true, ranks will be set manually from user. Defaults to False. :param layer_selection_threshold: Threshold (0-1) to use to select the top layers in the network :raises: ValueError: An error occurred processing one of the input parameters. """ # pylint: disable=too-many-arguments self._sanity_check_constructor_parameters(layer_selection_threshold, layers, no_evaluation, num_layers, num_ranks, svd_type) self._gpu = gpu self._debug = debug self._default_meta_graph = graph self._default_checkpoint = checkpoint self._output_file = output_file self._output_dir = os.path.dirname(output_file) if not os.path.exists(self._output_dir): os.makedirs(self._output_dir)'Saving SVD output as: %s', output_file) self.svd_type = _SVD_TYPES[svd_type] self._metric = metric self._num_layers = num_layers self._selected_layers = [] self._networkCost = None if layers: logger.debug('Attempting to compress: %s', layers) self._layers_to_compress = layers else: self._layers_to_compress = [] if num_ranks < 0: raise ValueError("num_ranks must be >= 0") self._num_ranks = num_ranks if layer_ranks: self._layer_ranks = layer_ranks self._num_layer_ranks = len(layer_ranks) logger.debug('Attempting to compress model with user provided ranks : %s', layer_ranks) # Setup the SVD instance and load the graph self._svd = pymo.GetSVDInstance() self._no_eval = no_evaluation self._layer_selection_threshold = layer_selection_threshold self._model_performance_candidate_ranks = list() # Todo: Need to look at these attributes and see how to handle them better # Very likely these attributes don't need to be object attributes self._generator = None self._eval_names = None self._eval_func = None self._iterations = None self._run_graph = None self._baseline_perf = None self._error_margin = None self._compressible_ops = None @staticmethod def _sanity_check_constructor_parameters(layer_selection_threshold, layers, no_evaluation, num_layers, num_ranks, svd_type): if svd_type not in _SVD_TYPES: raise ValueError('Invalid SVD mode: ' + svd_type) if no_evaluation: if not layers: raise ValueError('Both layers and layer_rank parameters are needed for Manual mode') if layer_selection_threshold < 0 or layer_selection_threshold > 1: raise ValueError('Layer selection threshold should be between 0 and 1') if not no_evaluation: if num_ranks <= 2: raise ValueError('Number of ranks should be greater than 2 for auto mode') if num_layers < 0: raise ValueError("num_layers must be >= 0") def _compute_per_layer_compression_ratio(self, split_layers_shape, output_shape, original_layer_shape, op_type): """ Updates the per layer statistics :param orig_layer: The layer before it was split :param split_layers: List of split layers :return: The compression ratio of split layers """ orig_layer_cost = self._compute_layer_cost(original_layer_shape, output_shape, op_type) split_layers_mem_cost = 0 split_layers_mac_cost = 0 for layer_shape in split_layers_shape: mem_cost, mac_cost = self._compute_layer_cost(layer_shape, output_shape, op_type) if not isinstance(mem_cost, int): mem_cost = mem_cost.value if not isinstance(mac_cost, int): mac_cost = mac_cost.value split_layers_mem_cost += mem_cost split_layers_mac_cost += mac_cost if self._metric is CostMetric.memory: savings = orig_layer_cost[0] - split_layers_mem_cost ratio = savings / orig_layer_cost[0] logger.debug('Original Layer Cost: %s Memory Compression Ratio: %s', orig_layer_cost[0], ratio) else: savings = orig_layer_cost[1] - split_layers_mac_cost ratio = savings / orig_layer_cost[1] logger.debug('Original Layer Cost: %s MAC Compression Ratio: %s', orig_layer_cost[1], ratio) return ratio @staticmethod def _reset_session(sess): """ Reset the given tf.compat.v1.Session :param sess: tf.compat.v1.Session :return: None """ tf.compat.v1.reset_default_graph() sess.close() @staticmethod def _load_graph(graph, meta_graph, checkpoint): """ Load a graph and checkpoint and create a new tf.compat.v1.Session :param graph: TF graph :param meta_graph: Meta file :param checkpoint: Checkpoint file :return: Newly created session """'Loading graph: %s', meta_graph) sess = tf.compat.v1.Session(graph=graph) # Open the graph and retore the parameters saver = tf.compat.v1.train.import_meta_graph(meta_graph) saver.restore(sess, checkpoint) return sess, saver @staticmethod def _get_layer_type(op): """ Converts TF layer types into corresponding PyMo layer enumerated values :param op: TF op :return: PyMo enumerated value corresponding to the type of op """ if op.type in _SVD_LAYER_TYPES: return _SVD_LAYER_TYPES[op.type] return pymo.LAYER_TYPE_OTHER class LayerSelectionScheme(Enum): """ Enumeration of schemes supported to select layers for SVD compression """ manual = 1 top_n_layers = 2 top_x_percent = 3 @staticmethod def _pick_compression_layers(sess, cost_metric, layer_select_scheme, **kwargs): """ Pick layers for SVD compression given parameters :param sess: tf.compat.v1.Session :param cost_metric: Metric to use for evaluating layer cost (either in terms of memory or mac) :param layer_select_scheme: Layer selection scheme to use :param kwargs: Keyword arguments that depend on which layer selection scheme is specified top_n_layers:: num_layers: Number of layers to pick top_x_percent:: percent_thresh: Top layers up to this parameter will be selected manual:: layers_to_compress: List of layers (names) to compress :return: """ # pylint: disable=too-many-locals,too-many-branches if not isinstance(cost_metric, CostMetric): raise TypeError("cost_metric is not of type CostMetric") if not isinstance(layer_select_scheme, Svd.LayerSelectionScheme): raise TypeError("layer_selection_scheme is not of type Svd.LayerSelectionScheme") # Find all compressible ops query = core.OpQuery(sess.graph) compressible_ops = query.get_weight_ops() compressible_ops = [op for op in compressible_ops if op.type in _SVD_SUPPORTED_LAYER_TYPES] layer_attributes_list = Svd._create_layer_attributes_list(compressible_ops, sess) network_cost = Svd._compute_network_cost(layer_attributes_list) # Heuristic1: Reject any ops whose param shape does not meet a base criterion pruned_list = [] for layer_attributes in layer_attributes_list: h, w, n, c = layer_attributes.weight_shape if (n >= _MIN_LAYER_DIM_FOR_SVD) and ((c * h * w) >= _MIN_LAYER_DIM_FOR_SVD): pruned_list.append(layer_attributes) else: print("Pruning out {}: shape is {}".format(, layer_attributes.weight_shape)) # Reset layer_attributes_list for the next phase layer_attributes_list = pruned_list pruned_list = [] # Sort the attribute list based on cost if cost_metric == CostMetric.memory: layer_attributes_list.sort(key=lambda x: x.cost[0], reverse=True) else: layer_attributes_list.sort(key=lambda x: x.cost[1], reverse=True) if layer_select_scheme == Svd.LayerSelectionScheme.top_n_layers: num_layers = kwargs['num_layers'] pruned_list = layer_attributes_list[:num_layers] elif layer_select_scheme == Svd.LayerSelectionScheme.top_x_percent: percent_thresh = kwargs['percent_thresh'] accum_cost = 0. total_cost = network_cost[0] if (cost_metric == CostMetric.memory) else network_cost[1] for layer in layer_attributes_list: cost = layer.cost[0] if (cost_metric == CostMetric.memory) else layer.cost[1] if (100 * (cost + accum_cost)/total_cost) < percent_thresh: pruned_list.append(layer) accum_cost += cost elif layer_select_scheme == Svd.LayerSelectionScheme.manual: layers_to_compress = kwargs['layers_to_compress'] for layer in layer_attributes_list: if in layers_to_compress: pruned_list.append(layer) if not pruned_list: raise RuntimeError('No suitable layers found in the model.') return pruned_list, network_cost @staticmethod def _create_layer_attributes_list(ops_to_use, sess): """ Creates list of layer attributes given a set of TF ops :param ops_to_use: TF ops to collect layer attributes for :param sess: tf.compat.v1.Session to use :return: Created list of layer attributes """ query = core.OpQuery(sess.graph) layer_attributes_list = [] for op in ops_to_use: weight_shape = query.get_weights_for_op(op).eval(session=sess).shape if op.type == 'MatMul': n, c = weight_shape weight_shape = (1, 1, n, c) output_dims = op.outputs[0].shape cost = Svd._compute_layer_cost(weight_shape, output_dims, op.type) layer_attributes_list.append(LayerAttributes(op, cost, weight_shape)) return layer_attributes_list @staticmethod def _compute_network_cost(layer_attributes_list): """ Compute aggregate cost of the layers included in the layer attributes list :param layer_attributes_list: List of layer attributes :return: Computed cost """ mac_cost = 0 mem_cost = 0 for layer_attributes in layer_attributes_list: op_mem_cost, op_mac_cost = layer_attributes.cost mem_cost += op_mem_cost mac_cost += op_mac_cost return mem_cost, mac_cost @staticmethod def _compute_layer_cost(weights_shape, output_dims, op_type): """ Compute cost of a layer :param weights_shape: Shape of the weights of this layer :param output_dims: Shape of the output of this layer :param op_type: Type of this TF op :return: Computed layer cost """ # for outputs, TF uses dims [N,H,W,C] mem_cost = reduce(operator.mul, weights_shape) if op_type == 'Conv2D': mac_cost = mem_cost * int(output_dims[1]) * int(output_dims[2]) elif op_type == 'MatMul': mac_cost = mem_cost return mem_cost, mac_cost def _compute_compression_ratio(self, sess, cost_metric): """ Compute compression ratio :param sess: tf.compat.v1.Session :return: Computed compression ratio """ query = core.OpQuery(sess.graph) compressible_ops = query.get_weight_ops() compressible_ops = [op for op in compressible_ops if op.type in _SVD_SUPPORTED_LAYER_TYPES] layer_attributes_list = Svd._create_layer_attributes_list(compressible_ops, sess) selected_layers_ops = [ for layer in self._selected_layers] layer_attributes_list = [layer for layer in layer_attributes_list if not in selected_layers_ops] compressed_network_cost = Svd._compute_network_cost(layer_attributes_list) if cost_metric is CostMetric.memory: savings = self._networkCost[0] - compressed_network_cost[0] ratio = savings/self._networkCost[0] else: savings = self._networkCost[1] - compressed_network_cost[1] ratio = savings/self._networkCost[1] return ratio def _store_net_stats(self, sess): """ Store layer attributes in the PyMo library instance :param sess: tf.compat.v1.Session :return: None """ # pylint: disable=too-many-locals,too-many-branches,too-many-statements if self._metric == CostMetric.memory: pymo_metric = pymo.COST_TYPE_MEMORY else: pymo_metric = pymo.COST_TYPE_MAC self._svd.SetCostMetric(pymo_metric) # Layer-selection if self._layers_to_compress: selected_layers, network_cost = self._pick_compression_layers(sess, self._metric, self.LayerSelectionScheme.manual, layers_to_compress=self._layers_to_compress) elif self._num_layers > 0: selected_layers, network_cost = self._pick_compression_layers(sess, self._metric, self.LayerSelectionScheme.top_n_layers, num_layers=self._num_layers) else: percent_thresh = self._layer_selection_threshold * 100 selected_layers, network_cost = self._pick_compression_layers(sess, self._metric, self.LayerSelectionScheme.top_x_percent, percent_thresh=percent_thresh) self._networkCost = network_cost print("Selected Layers:") for layer in selected_layers: print( self._selected_layers = selected_layers # Get the op query module and query for all Conv/FC layers query = core.OpQuery(sess.graph) self._compressible_ops = query.get_weight_ops() # Set up the layer attributes for each Conv/FC layer (this also checks for trailing # bias adds for i, op in enumerate(self._compressible_ops): # If op is not a selected layer, skip if not any(op is layer.layer_ref for layer in selected_layers): continue attr = pymo.LayerAttributes() layerName = output_dims = op.outputs[0].shape # TF uses dims [N,H,W,C] attr.layerType = self._get_layer_type(op) if self.svd_type == pymo.TYPE_SINGLE: attr.mode = self._svd.GetCompressionType(attr.layerType, 'single') else: attr.mode = self._svd.GetCompressionType(attr.layerType, 'successive') if op.type == 'Conv2D' or op.type == 'MatMul':'Setting layer attributes for: %s', layerName+'('+op.type+')') # Get weights weights = query.get_weights_for_op(op).eval(session=sess) w_shape = weights.shape logger.debug('Got weight shape: %s', w_shape) # Check for bias op bias = None if (i+1) < len(self._compressible_ops): bias = query.get_bias_for_op(self._compressible_ops[i+1]) if bias is not None: bias = bias.eval(session=sess) logger.debug('Got %s w/bias. Shape: %s', op.type, str(bias.shape)) if op.type == 'Conv2D': attr.shape = [w_shape[3], w_shape[2], w_shape[0], w_shape[1]] # TF Conv weight order [KH,KW,ID,OD] attr.activation_dims = (output_dims[1], output_dims[2]) # (H,W) # CONV weights are stored in the order {H,W,I,O} in Tensorflow # Re-order them to the form {O,I,H,W} weights = np.transpose(weights, (3, 2, 0, 1)) elif op.type == 'MatMul': attr.shape = [w_shape[1], w_shape[0], 1, 1] # TF FC weight order [ID,OD], SVD expects [OD,ID] attr.activation_dims = (1, 1) weights = np.transpose(weights, (1, 0)) # blobs is a numpy array... add to list then set params = [weights.flatten()] if bias is not None: params.append(bias.flatten()) attr.blobs = params # Save the attributes for this layer self._svd.StoreLayerAttributes(layerName, attr) def _compute_objective_score(self, model_perf, compression_score): """ Compute objective score of a given compression model :param model_perf: Performance of compressed model :param compression_score: Compression ratio :return: Computed objective score """ if model_perf + (self._error_margin / 100) >= self._baseline_perf: objective_score = 1 - model_perf + (1 - compression_score) else: objective_score = 1 + (1 - compression_score) # treat lower accuracies as 0 return objective_score def _split_conv_layer(self, sess, svd_ranks, attr, op_name, bias_op_name=None): """ Split a given conv layer given a rank :param sess: tf.compat.v1.Session :param svd_ranks: Rank to split the layer with (two ranks in case of SSVD) :param attr: Reference to the corresponding layer attribute :param op_name: Name of the op to split :param bias_op_name: Name of the corresponding bias op (if any) :return: None """ # pylint: disable=too-many-statements,too-many-branches,too-many-locals'Splitting conv op: %s', op_name) # Retrieve the op(s) from the current graph op = sess.graph.get_operation_by_name(op_name) bias_op = None if bias_op_name: bias_op = sess.graph.get_operation_by_name(bias_op_name) # Create new 'conv_a' layer pad_mode = op.get_attr('padding') data_format = op.get_attr('data_format').decode('utf-8') strides = op.get_attr('strides') # Print current conv weight shape query = core.OpQuery(sess.graph) w_shape = query.get_weights_for_op(op).get_shape().as_list() logger.debug('Original %s weight shape: %s',, str(w_shape)) split_weights, weight_sizes = [], [] split_biases, bias_sizes = [], [] # TF weights are in [H,W,I,O] order. We must reshape the split weights to SVD format [O,I,H,W] # and then transpose back # Conv a weights are: [1, 1, w_shape[2], svd_ranks[0]] split_conv_a_w_shape = (svd_ranks[0], w_shape[2], 1, 1) conv_a_weights = np.zeros(split_conv_a_w_shape) # transpose(2,3,1,0) split_weights.append(conv_a_weights.flatten().tolist()) weight_sizes.append(conv_a_weights.size) if bias_op: conv_a_bias = np.zeros(svd_ranks[0]) split_biases.append(conv_a_bias.flatten().tolist()) bias_sizes.append(conv_a_bias.size) num_filters = w_shape[3] if len(svd_ranks) >= 2 and attr.mode == pymo.TYPE_SUCCESSIVE: # Output channels = output_rank (s) num_filters = svd_ranks[1] # Conv b weights are: [w_shape[0],w_shape[1],svd_ranks[0],num_filters] split_conv_b_w_shape = (num_filters, svd_ranks[0], w_shape[0], w_shape[1]) conv_b_weights = np.zeros(split_conv_b_w_shape) conv_b_bias = np.zeros(num_filters) split_weights.append(conv_b_weights.flatten().tolist()) weight_sizes.append(conv_b_weights.size) if bias_op: split_biases.append(conv_b_bias.flatten().tolist()) bias_sizes.append(conv_b_bias.size) # Only create a third conv layer when performing successive SVD if len(svd_ranks) >= 2 and attr.mode == pymo.TYPE_SUCCESSIVE: # Conv c weights are: [1,1,num_filters,w_shape[3]] split_conv_c_w_shape = (w_shape[3], num_filters, 1, 1) conv_c_weights = np.zeros(split_conv_c_w_shape) conv_c_bias = np.zeros(w_shape[3]) split_weights.append(conv_c_weights.flatten().tolist()) weight_sizes.append(conv_c_weights.size) if bias_op: split_biases.append(conv_c_bias.flatten().tolist()) bias_sizes.append(conv_c_bias.size) # Split the weights and biases according to the number of layers and ranks split_weights = self._svd.SplitLayerWeights(, split_weights, weight_sizes, svd_ranks) split_biases = self._svd.SplitLayerBiases(, split_biases, bias_sizes, svd_ranks) if split_weights: conv_a_name ='_a' conv_a_weights = np.array(split_weights[0]).reshape(split_conv_a_w_shape).transpose(2, 3, 1, 0) conv_a_w = tf.Variable(initial_value=conv_a_weights, name=conv_a_name+'_w', dtype=tf.float32) logger.debug('%s weight shape: %s', conv_a_name, str(conv_a_weights.shape)) # Create conv_a using default strides (1,1) # pylint: disable=no-member conv_acts = tf.nn.conv2d(op.inputs[0], conv_a_w, strides=[1, 1, 1, 1], data_format=data_format, padding=pad_mode,'_a') # dilation_rate=dilation_rate if bias_op: conv_a_bias = tf.Variable(initial_value=split_biases[0], name=conv_a_name+'_bias', dtype=tf.float32) conv_acts = conv_acts + conv_a_bias # tf.nn.bias_add(conv_acts, split_biases[0]) if len(split_weights) > 1: # Create conv_b conv_b_name ='_b' conv_b_weights = np.array(split_weights[1]).reshape(split_conv_b_w_shape).transpose(2, 3, 1, 0) conv_b_w = tf.Variable(initial_value=conv_b_weights, name=conv_b_name+'_w', dtype=tf.float32) logger.debug('%s weight shape: %s', conv_b_name, str(conv_b_weights.shape)) # pylint: disable=no-member conv_acts = tf.nn.conv2d(conv_acts, conv_b_w, strides=strides, data_format=data_format, padding=pad_mode, name=conv_b_name) #dilation_rate=dilation_rate if bias_op: conv_b_bias = tf.Variable(initial_value=split_biases[1], name=conv_b_name+'_bias', dtype=tf.float32) conv_acts = conv_acts + conv_b_bias # tf.nn.bias_add(conv_acts, split_biases[1]) ratio = self._compute_per_layer_compression_ratio([conv_a_w.shape, conv_b_w.shape], conv_acts.shape, w_shape, "Conv2D") # Only create a third conv layer when performing successive SVD if len(split_weights) > 2 and len(svd_ranks) >= 2 and attr.mode == pymo.TYPE_SUCCESSIVE: # Create conv_c, using default strides (1,1) conv_c_name ='_c' conv_c_weights = np.array(split_weights[2]).reshape(split_conv_c_w_shape).transpose(2, 3, 1, 0) conv_c_w = tf.Variable(initial_value=conv_c_weights, name=conv_c_name+'_w', dtype=tf.float32) logger.debug('%s weight shape: %s', conv_c_name, str(conv_c_weights.shape)) # pylint: disable=no-member conv_acts = tf.nn.conv2d(conv_acts, conv_c_w, strides=[1, 1, 1, 1], data_format=data_format, padding=pad_mode, name=conv_c_name) if bias_op: conv_c_bias = tf.Variable(initial_value=split_biases[2], name=conv_c_name+'_bias', dtype=tf.float32) conv_acts = conv_acts + conv_c_bias # tf.nn.bias_add(conv_acts, split_biases[2]) consumers = [] rerouted_inputs = [bias_op.outputs[0]] if bias_op else [op.outputs[0]] for inp in rerouted_inputs: for consumer in inp.consumers(): consumers.append(consumer) _ = graph_editor.reroute_ts(conv_acts, rerouted_inputs, can_modify=consumers) return ratio def _split_fc_layer(self, sess, svd_ranks, op_name, bias_op_name=None): """ Split a given conv layer given a rank :param sess: tf.compat.v1.Session :param svd_ranks: Rank to split the layer with (two ranks in case of SSVD) :param op_name: Name of the op to split :param bias_op_name: Name of the corresponding bias op (if any) :return: None """ # pylint: disable=too-many-statements, too-many-locals'Splitting fully connected op: %s', op_name) # Retrieve the op(s) from the current graph op = sess.graph.get_operation_by_name(op_name) bias_op = None if bias_op_name: bias_op = sess.graph.get_operation_by_name(bias_op_name) # Print current conv weight shape query = core.OpQuery(sess.graph) w_shape = query.get_weights_for_op(op).get_shape().as_list() logger.debug('Original %s weight shape: %s',, str(w_shape)) split_weights, weight_sizes = [], [] split_biases, bias_sizes = [], [] # FC weights are: [w_shape[2],svd_ranks[0]] in [I,O] order. # We must reshape the split weights to SVD format [O,I] and then transpose to NHWC split_fc_a_w_shape = (svd_ranks[0], w_shape[0]) fc_a_weights = np.zeros(split_fc_a_w_shape) fc_a_bias = np.zeros(svd_ranks[0]) split_weights.append(fc_a_weights.flatten().tolist()) weight_sizes.append(fc_a_weights.size) if bias_op: split_biases.append(fc_a_bias.flatten().tolist()) bias_sizes.append(fc_a_bias.size) # FC b weights are: [svd_ranks[0],num_filters] in [H,W,I,O] order. # We must reshape the split weights to SVD format [O,I,H,W] and then transpose to NHWC split_fc_b_w_shape = (w_shape[1], svd_ranks[0]) fc_b_weights = np.zeros(split_fc_b_w_shape) split_weights.append(fc_b_weights.flatten().tolist()) weight_sizes.append(fc_b_weights.size) if bias_op: fc_b_bias = np.zeros(w_shape[1]) split_biases.append(fc_b_bias.flatten().tolist()) bias_sizes.append(fc_b_bias.size) # Split the weights and biases according to the number of layers and ranks split_weights = self._svd.SplitLayerWeights(, split_weights, weight_sizes, svd_ranks) split_biases = self._svd.SplitLayerBiases(, split_biases, bias_sizes, svd_ranks) if split_weights: fc_a_name ='_a' fc_a_weights = np.array(split_weights[0]).reshape(split_fc_a_w_shape).transpose(1, 0) fc_a_w = tf.Variable(initial_value=fc_a_weights, name=fc_a_name+'_w', dtype=tf.float32) logger.debug('%s weight shape: %s', fc_a_name, str(fc_a_weights.shape)) # Create fc_a using default strides (1,1) fc_acts = tf.matmul(op.inputs[0], fc_a_w, name=fc_a_name) if bias_op: fc_a_bias = tf.Variable(initial_value=split_biases[0], name=fc_a_name+'_bias', dtype=tf.float32) fc_acts = fc_acts + fc_a_bias if len(split_weights) > 1: # Create fc_b fc_b_name ='_b' fc_b_weights = np.array(split_weights[1]).reshape(split_fc_b_w_shape).transpose(1, 0) fc_b_w = tf.Variable(initial_value=fc_b_weights, name=fc_b_name+'_w', dtype=tf.float32) logger.debug('%s weight shape: %s', fc_b_name, str(fc_b_weights.shape)) fc_acts = tf.matmul(fc_acts, fc_b_w, name=fc_b_name) if bias_op: fc_b_bias = tf.Variable(initial_value=split_biases[1], name=fc_b_name+'_bias', dtype=tf.float32) fc_acts = fc_acts + fc_b_bias ratio = self._compute_per_layer_compression_ratio([fc_a_w.shape, fc_b_w.shape], fc_acts.shape, w_shape, 'MatMul') consumers = [] rerouted_inputs = [bias_op.outputs[0]] if bias_op else [op.outputs[0]] for inp in rerouted_inputs: for consumer in inp.consumers(): consumers.append(consumer) _ = graph_editor.reroute_ts(fc_acts, rerouted_inputs, can_modify=consumers) return ratio def _split_layers(self, sess, rank_index, use_best_ranks): """ Split all the selected layers given a rank index :param sess: tf.compat.v1.Session :param rank_index: Rank index to use for finding the ranks :param use_best_ranks: Use the best rank index (for final compressed network) :return: None """ layer_stats = list() for i, op in enumerate(self._compressible_ops): # If op is not a selected layer, skip if not any(op is layer.layer_ref for layer in self._selected_layers): continue # Bias is taken care of as part of the Conv/FC op if op.type in ['Add', 'BiasAdd']: continue # Get the stored attributes for this op attr = self._svd.GetLayerAttributes( if not attr: raise RuntimeError("Layer attributes not available for layer" if use_best_ranks: svd_ranks = attr.bestRanks else: svd_ranks = self._svd.GetCandidateRanks(, rank_index) if svd_ranks: bias_op = None if i+1 < len(self._compressible_ops): bias_op = self._compressible_ops[i+1] bias_op = if bias_op.type in ['Add', 'BiasAdd'] else None if op.type in ['Conv2D']: ratio = self._split_conv_layer(sess, svd_ranks, attr,, bias_op) elif op.type in ['MatMul']: ratio = self._split_fc_layer(sess, svd_ranks,, bias_op) per_layer_stats = stats_u.SvdStatistics.PerSelectedLayer(, svd_ranks, ratio) layer_stats.append(per_layer_stats) return layer_stats def _create_compressed_network(self, sess, rank_index, use_best_ranks): """ Create a compressed network for a given rank index :param sess: tf.compat.v1.Session :param rank_index: Rank index to use for finding the ranks :param use_best_ranks: Use the best rank index (for final compressed network) :return: None """ # Split the network layers and update the connections per_layer_stats = self._split_layers(sess, rank_index, use_best_ranks) return per_layer_stats def _perform_rank_selection(self): """ Perform rank selection procedure :return: None """ # pylint: disable=too-many-locals stats_per_rank_index = list() self._svd.ComputeNetworkCost() self._num_ranks = self._svd.SetCandidateRanks(self._num_ranks) if not self._num_ranks: raise RuntimeError('No good candidate ranks found for compressing specified layers.') # Ranks are in order from least compression to highest best_index = -1 optimal_score = 0.0 for rank_index in range(self._num_ranks): g = tf.Graph() with g.as_default(): # Create a new network for each rank_index self._svd.PrintCandidateRanks(rank_index, False) # Load the default graph so we are operating on a fresh copy of the original graph sess, saver = self._load_graph(g, self._default_meta_graph, self._default_checkpoint) per_layer_stats = self._create_compressed_network(sess, rank_index, False) # Save the temp model output_file = os.path.join(self._output_dir, 'svd_rank_index_' + str(rank_index)) self._save_graph(sess, saver, output_file) # Reset the session and start a new graph for loading the compressed model self._reset_session(sess) g = tf.Graph() with g.as_default(): # In TF after making changes to the graph you must save and reload, then evaluate sess, saver = self._load_graph(g, output_file+'.meta', output_file) model_perf = self._run_graph(sess, self._generator, self._eval_names, self._eval_func, self._iterations)'%s performance: %s', output_file, str(model_perf)) self._model_performance_candidate_ranks.append(model_perf * 100) # Estimate relative compression score for this rank_index compression_score = self._compute_compression_ratio(sess, self._metric) objective_score = self._compute_objective_score(model_perf, compression_score) rank_data = stats_u.SvdStatistics.PerRankIndex(rank_index=rank_index, model_accuracy=model_perf, model_compression_ratio=compression_score, layer_stats_list=per_layer_stats) stats_per_rank_index.append(rank_data)'Compressed network with rank_index %i/%i: accuracy = %f percent ' 'with %f percent compression (%r option) and an objective score of %f', rank_index, self._num_ranks, model_perf * 100, compression_score * 100, self._metric, objective_score) if rank_index == 0: optimal_score = objective_score'Initializing objective score to %f at rank index %i', optimal_score, rank_index) if model_perf + self._error_margin/100 < self._baseline_perf:'Model performance %f falls below %f percent of baseline performance %f' ' Ending rank selection', model_perf, self._error_margin, self._baseline_perf) break if objective_score <= optimal_score: optimal_score = objective_score'Found a better value for the objective score %f at rank_index %i', optimal_score, rank_index) best_index = rank_index if best_index != -1: self._svd.StoreBestRanks(best_index) memory_compression_ratio = self._compute_compression_ratio(sess, CostMetric.memory) mac_compression_ratio = self._compute_compression_ratio(sess, CostMetric.mac) stats = stats_u.SvdStatistics(self._baseline_perf, model_perf, self._metric, best_index, mem_comp_ratio=memory_compression_ratio, mac_comp_ratio=mac_compression_ratio, rank_stats_list=stats_per_rank_index) # close the session and reset the default graph self._reset_session(sess) return stats # close the session and reset the default graph self._reset_session(sess) raise RuntimeError('No suitable ranks found to compress model within defined error bounds.') def manual_rank_svd(self): """ Set provided ranks in the PyMo library :return: None """ # Store total net cost self._svd.ComputeNetworkCost() # Ensure proper layer names are provided in no_eval mode if not self._layer_ranks: raise ValueError('Layer names MUST be specified in no_eval mode.') # Ensure layer_ranks is in list of tuples format if not all(isinstance(item, tuple) for item in self._layer_ranks): raise ValueError('layer_ranks should be in list of tuples format for both SVD and SSVD') # Check number of input ranks match with number of input layers if len(self._layers_to_compress) != self._num_layer_ranks: raise ValueError('Number of Input SVD ranks does not match number of layers.') for layer_name, rank in zip(self._layers_to_compress, self._layer_ranks): rank_list = list() rank_list.append(rank[1]) if self.svd_type == _SVD_TYPES['ssvd']: rank_list.append(rank[1]) self._svd.StoreBestRanks(layer_name, rank_list) stats = self._stats_for_manual_rank_svd() return stats @staticmethod def _save_graph(sess, saver, output_graph): """ Utility function to save a graph :param sess: tf.compat.v1.Session :param saver: TF save :param output_graph: Filename and path for saving the output :return: """'Saving graph: %s', output_graph), output_graph) _ = tf.compat.v1.summary.FileWriter(os.path.dirname(output_graph)+"/models", sess.graph) def _save_compressed_network(self): """ Create and save a compressed network (using the best ranks identified) :return: """'Saving final compressed network') g = tf.Graph() with g.as_default(): sess, saver = self._load_graph(g, self._default_meta_graph, self._default_checkpoint) per_layer_stats = self._create_compressed_network(sess, 0, True) # Save the final network self._save_graph(sess, saver, self._output_file) self._reset_session(sess) return per_layer_stats def _stats_for_manual_rank_svd(self): per_layer_stats = self._save_compressed_network() g = tf.Graph() with g.as_default(): # Load and evaluate the final network sess, _ = self._load_graph(g, self._output_file+'.meta', self._output_file) model_perf = self._run_graph(sess, self._generator, self._eval_names, self._eval_func, self._iterations)'%s performance: %s', self._output_file, str(model_perf)) # Estimate relative compression score for this rank_index self._svd.PrintCandidateRanks(0, True) # Estimate relative compression score for this rank_index compression_score = self._compute_compression_ratio(sess, self._metric)'Evaluating final model using layer(s): %s. ' 'Final accuracy = %f percent with %f percent compression (%r option).', self._eval_names, model_perf*100, compression_score*100, self._metric) memory_compression_ratio = self._compute_compression_ratio(sess, CostMetric.memory) mac_compression_ratio = self._compute_compression_ratio(sess, CostMetric.mac) rank_data = stats_u.SvdStatistics.PerRankIndex(rank_index=0, model_accuracy=model_perf, model_compression_ratio=compression_score, layer_stats_list=per_layer_stats) rank_data_list = list() rank_data_list.append(rank_data) stats = stats_u.SvdStatistics(self._baseline_perf, model_perf, self._metric, 0, mem_comp_ratio=memory_compression_ratio, mac_comp_ratio=mac_compression_ratio, rank_stats_list=rank_data_list) return stats
[docs] def compress_net(self, generator, eval_names=None, run_graph=graph_eval.evaluate_graph, eval_func=graph_eval.default_eval_func, error_margin=2, iterations=100): """ Compresses the network using SVD Runs rank selection on the network, and compresses it using the method and parameters passed during construction of the Svd object. :param generator: The generator which should be used for generating data for quantization :param eval_names: The list of names to use for calculating model performance :param run_graph: The function to use for running data through the graph and evaluating the network's performance. This function must return only a single number representing the avg performance of the model over the dataset batches. See the 'graph_eval' module's 'evaluate_graph' function for the prototype :param eval_func: The function to use for evaluating the network performance. This function should always return a single number that can be used for comparing different graph's performance. (The default is accuracy) :param error_margin: The acceptable degradation in network accuracy from the original. 1 for 1% drop, etc. Defaults to 2%. :param iterations: The number of iterations (data batches) to run through the network for analysis :return: An object containing compression statistics :raises: - ValueError: An invalid parameter was passed - RuntimeError: An error occurred analyzing or compressing the network. The associated error and other information will be returned with the error. """ self._generator = generator if not eval_names: eval_names = ['accuracy'] self._eval_names = eval_names self._run_graph = run_graph self._eval_func = eval_func if error_margin <= 0: raise ValueError('Invalid error_margin: '+str(error_margin)+'. Must pass error_margin > 0') self._error_margin = error_margin if iterations <= 0: raise ValueError('Invalid iterations: '+str(iterations)+'. Number of iterations must be > 0') self._iterations = iterations # Get baseline accuracy, then store the network stats g = tf.Graph() with g.as_default(): sess, _ = self._load_graph(g, self._default_meta_graph, self._default_checkpoint) self._baseline_perf = run_graph(sess, generator, eval_names, eval_func, iterations)'Baseline performance: %f', self._baseline_perf) self._store_net_stats(sess) self._reset_session(sess) if self._no_eval: # Set Manual rank stats = self.manual_rank_svd() else: # Perform rank selection stats = self._perform_rank_selection() self._save_compressed_network() return stats