Skip to content

Commit

Permalink
add resource utilization calculator
Browse files Browse the repository at this point in the history
  • Loading branch information
irenaby committed Jan 6, 2025
1 parent ce318c0 commit c2dcbe1
Show file tree
Hide file tree
Showing 22 changed files with 1,030 additions and 1,058 deletions.
6 changes: 2 additions & 4 deletions model_compression_toolkit/core/common/graph/base_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,9 +545,7 @@ def get_weights_configurable_nodes(self,

def is_configurable(n):
kernel_attr = fw_info.get_kernel_op_attributes(n.type)[0]
return (n.is_weights_quantization_enabled(kernel_attr) and
not n.is_all_weights_candidates_equal(kernel_attr) and
(not n.reuse or include_reused_nodes))
return n.is_configurable_weight(kernel_attr) and (not n.reuse or include_reused_nodes)

return [n for n in potential_conf_nodes if is_configurable(n)]

Expand Down Expand Up @@ -576,7 +574,7 @@ def get_activation_configurable_nodes(self) -> List[BaseNode]:
Returns:
A list of nodes that their activation can be configured (namely, has one or more activation qc candidate).
"""
return [n for n in list(self) if n.is_activation_quantization_enabled() and not n.is_all_activation_candidates_equal()]
return [n for n in list(self) if n.has_configurable_activation()]

def get_sorted_activation_configurable_nodes(self) -> List[BaseNode]:
"""
Expand Down
16 changes: 14 additions & 2 deletions model_compression_toolkit/core/common/graph/base_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,14 @@ def is_weights_quantization_enabled(self, attr_name: str) -> bool:

return False

def is_configurable_weight(self, attr_name: str) -> bool:
""" Checks whether the specific weight has a configurable quantization. """
return self.is_weights_quantization_enabled(attr_name) and not self.is_all_weights_candidates_equal(attr_name)

def has_configurable_activation(self):
""" Checks whether the activation has a configurable quantization. """
return self.is_activation_quantization_enabled() and not self.is_all_activation_candidates_equal()

def __repr__(self):
"""
Expand Down Expand Up @@ -420,11 +428,15 @@ def get_total_output_params(self) -> float:
Returns: Output size.
"""
output_shapes = self.output_shape if isinstance(self.output_shape, List) else [self.output_shape]
# multiple output shapes are not necessarily lists, e.g. tf nms uses custom named tuple.
if self.output_shape and isinstance(self.output_shape[0], (tuple, list)):
output_shapes = list(self.output_shape)
else:
output_shapes = self.output_shape if isinstance(self.output_shape, list) else [self.output_shape]

# remove batch size (first element) from output shape
output_shapes = [s[1:] for s in output_shapes]

# for scalar shape (None,) prod returns 1
return sum([np.prod([x for x in output_shape if x is not None]) for output_shape in output_shapes])

def find_min_candidates_indices(self) -> List[int]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
from model_compression_toolkit.core.common import Graph
from model_compression_toolkit.core.common.hessian import HessianInfoService
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization, RUTarget
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_functions_mapping import ru_functions_mapping
from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
from model_compression_toolkit.core.common.mixed_precision.mixed_precision_search_manager import MixedPrecisionSearchManager
from model_compression_toolkit.core.common.mixed_precision.search_methods.linear_programming import \
Expand Down Expand Up @@ -105,16 +104,11 @@ def search_bit_width(graph_to_search_cfg: Graph,
disable_activation_for_metric=disable_activation_for_metric,
hessian_info_service=hessian_info_service)

# Each pair of (resource utilization method, resource utilization aggregation) should match to a specific
# provided target resource utilization
ru_functions = ru_functions_mapping

# Instantiate a manager object
search_manager = MixedPrecisionSearchManager(graph,
fw_info,
fw_impl,
se,
ru_functions,
target_resource_utilization,
original_graph=graph_to_search_cfg)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,23 +13,24 @@
# limitations under the License.
# ==============================================================================

from typing import Callable, Tuple
from typing import Dict, List
from typing import Callable, Dict, List

import numpy as np

from model_compression_toolkit.core.common import BaseNode
from model_compression_toolkit.logger import Logger
from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
from model_compression_toolkit.core.common.framework_info import FrameworkInfo
from model_compression_toolkit.core.common.graph.base_graph import Graph
from model_compression_toolkit.core.common.graph.virtual_activation_weights_node import VirtualActivationWeightsNode, \
VirtualSplitWeightsNode, VirtualSplitActivationNode
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import RUTarget, ResourceUtilization
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_functions_mapping import RuFunctions
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_aggregation_methods import MpRuAggregation
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_methods import MpRuMetric, calc_graph_cuts
from model_compression_toolkit.core.common.graph.memory_graph.compute_graph_max_cut import Cut
from model_compression_toolkit.core.common.framework_info import FrameworkInfo
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
RUTarget, ResourceUtilization
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization_calculator import \
ResourceUtilizationCalculator, TargetInclusionCriterion, BitwidthMode
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_methods import \
MixPrecisionRUHelper
from model_compression_toolkit.core.common.mixed_precision.sensitivity_evaluation import SensitivityEvaluation
from model_compression_toolkit.logger import Logger


class MixedPrecisionSearchManager:
Expand All @@ -42,7 +43,6 @@ def __init__(self,
fw_info: FrameworkInfo,
fw_impl: FrameworkImplementation,
sensitivity_evaluator: SensitivityEvaluation,
ru_functions: Dict[RUTarget, RuFunctions],
target_resource_utilization: ResourceUtilization,
original_graph: Graph = None):
"""
Expand All @@ -53,8 +53,6 @@ def __init__(self,
fw_impl: FrameworkImplementation object with specific framework methods implementation.
sensitivity_evaluator: A SensitivityEvaluation which provides a function that evaluates the sensitivity of
a bit-width configuration for the MP model.
ru_functions: A dictionary with pairs of (MpRuMethod, MpRuAggregationMethod) mapping a RUTarget to
a couple of resource utilization metric function and resource utilization aggregation function.
target_resource_utilization: Target Resource Utilization to bound our feasible solution space s.t the configuration does not violate it.
original_graph: In case we have a search over a virtual graph (if we have BOPS utilization target), then this argument
will contain the original graph (for config reconstruction purposes).
Expand All @@ -69,29 +67,17 @@ def __init__(self,
self.compute_metric_fn = self.get_sensitivity_metric()
self._cuts = None

ru_types = [ru_target for ru_target, ru_value in
target_resource_utilization.get_resource_utilization_dict().items() if ru_value < np.inf]
self.compute_ru_functions = {ru_target: ru_fn for ru_target, ru_fn in ru_functions.items() if ru_target in ru_types}
self.ru_metrics = target_resource_utilization.get_restricted_metrics()
self.ru_helper = MixPrecisionRUHelper(graph, fw_info, fw_impl)
self.target_resource_utilization = target_resource_utilization
self.min_ru_config = self.graph.get_min_candidates_config(fw_info)
self.max_ru_config = self.graph.get_max_candidates_config(fw_info)
self.min_ru = self.compute_min_ru()
self.min_ru = self.ru_helper.compute_utilization(self.ru_metrics, self.min_ru_config)
self.non_conf_ru_dict = self._non_configurable_nodes_ru()

self.config_reconstruction_helper = ConfigReconstructionHelper(virtual_graph=self.graph,
original_graph=self.original_graph)

@property
def cuts(self) -> List[Cut]:
"""
Calculates graph cuts. Written as property, so it will only be calculated once and
only if cuts are needed.
"""
if self._cuts is None:
self._cuts = calc_graph_cuts(self.original_graph)
return self._cuts

def get_search_space(self) -> Dict[int, List[int]]:
"""
The search space is a mapping from a node's index to a list of integers (possible bitwidths candidates indeces
Expand Down Expand Up @@ -122,40 +108,6 @@ def get_sensitivity_metric(self) -> Callable:

return self.sensitivity_evaluator.compute_metric

def _calc_ru_fn(self, ru_target, ru_fn, mp_cfg) -> np.ndarray:
"""
Computes a resource utilization for a certain mixed precision configuration.
The method computes a resource utilization vector for specific target resource utilization.
Returns: resource utilization value.
"""
# ru_fn is a pair of resource utilization computation method and
# resource utilization aggregation method (in this method we only need the first one)
if ru_target is RUTarget.ACTIVATION:
return ru_fn.metric_fn(mp_cfg, self.graph, self.fw_info, self.fw_impl, self.cuts)
else:
return ru_fn.metric_fn(mp_cfg, self.graph, self.fw_info, self.fw_impl)

def compute_min_ru(self) -> Dict[RUTarget, np.ndarray]:
"""
Computes a resource utilization vector with the values matching to the minimal mp configuration
(i.e., each node is configured with the quantization candidate that would give the minimal size of the
node's resource utilization).
The method computes the minimal resource utilization vector for each target resource utilization.
Returns: A dictionary mapping each target resource utilization to its respective minimal
resource utilization values.
"""
min_ru = {}
for ru_target, ru_fn in self.compute_ru_functions.items():
# ru_fns is a pair of resource utilization computation method and
# resource utilization aggregation method (in this method we only need the first one)
min_ru[ru_target] = self._calc_ru_fn(ru_target, ru_fn, self.min_ru_config)

return min_ru

def compute_resource_utilization_matrix(self, target: RUTarget) -> np.ndarray:
"""
Computes and builds a resource utilization matrix, to be used for the mixed-precision search problem formalization.
Expand Down Expand Up @@ -184,7 +136,8 @@ def compute_resource_utilization_matrix(self, target: RUTarget) -> np.ndarray:
# always be 0 for all entries in the results vector.
candidate_rus = np.zeros(shape=self.min_ru[target].shape)
else:
candidate_rus = self.compute_candidate_relative_ru(c, candidate_idx, target)
candidate_rus = self.compute_node_ru_for_candidate(c, candidate_idx, target) - self.min_ru[target]

ru_matrix.append(np.asarray(candidate_rus))

# We need to transpose the calculated ru matrix to allow later multiplication with
Expand All @@ -195,40 +148,6 @@ def compute_resource_utilization_matrix(self, target: RUTarget) -> np.ndarray:
np_ru_matrix = np.array(ru_matrix)
return np.moveaxis(np_ru_matrix, source=0, destination=len(np_ru_matrix.shape) - 1)

def compute_candidate_relative_ru(self,
conf_node_idx: int,
candidate_idx: int,
target: RUTarget) -> np.ndarray:
"""
Computes a resource utilization vector for a given candidates of a given configurable node,
i.e., the matching resource utilization vector which is obtained by computing the given target's
resource utilization function on a minimal configuration in which the given
layer's candidates is changed to the new given one.
The result is normalized by subtracting the target's minimal resource utilization vector.
Args:
conf_node_idx: The index of a node in a sorted configurable nodes list.
candidate_idx: The index of a node's quantization configuration candidate.
target: The target for which the resource utilization is calculated (a RUTarget value).
Returns: Normalized node's resource utilization vector
"""
return self.compute_node_ru_for_candidate(conf_node_idx, candidate_idx, target) - \
self.get_min_target_resource_utilization(target)

def get_min_target_resource_utilization(self, target: RUTarget) -> np.ndarray:
"""
Returns the minimal resource utilization vector (pre-calculated on initialization) of a specific target.
Args:
target: The target for which the resource utilization is calculated (a RUTarget value).
Returns: Minimal resource utilization vector.
"""
return self.min_ru[target]

def compute_node_ru_for_candidate(self, conf_node_idx: int, candidate_idx: int, target: RUTarget) -> np.ndarray:
"""
Computes a resource utilization vector after replacing the given node's configuration candidate in the minimal
Expand All @@ -243,7 +162,8 @@ def compute_node_ru_for_candidate(self, conf_node_idx: int, candidate_idx: int,
"""
cfg = self.replace_config_in_index(self.min_ru_config, conf_node_idx, candidate_idx)
return self._calc_ru_fn(target, self.compute_ru_functions[target], cfg)
# TODO compute for all targets at once. Currently the way up to add_set_of_ru_constraints is per target.
return self.ru_helper.compute_utilization({target}, cfg)[target]

@staticmethod
def replace_config_in_index(mp_cfg: List[int], idx: int, value: int) -> List[int]:
Expand All @@ -270,21 +190,10 @@ def _non_configurable_nodes_ru(self) -> Dict[RUTarget, np.ndarray]:
Returns: A mapping between a RUTarget and its non-configurable nodes' resource utilization vector.
"""

non_conf_ru_dict = {}
for target, ru_fns in self.compute_ru_functions.items():
# Call for the ru method of the given target - empty quantization configuration list is passed since we
# compute for non-configurable nodes
if target == RUTarget.BOPS:
ru_vector = None
elif target == RUTarget.ACTIVATION:
ru_vector = ru_fns.metric_fn([], self.graph, self.fw_info, self.fw_impl, self.cuts)
else:
ru_vector = ru_fns.metric_fn([], self.graph, self.fw_info, self.fw_impl)

non_conf_ru_dict[target] = ru_vector

return non_conf_ru_dict
ru_metrics = self.ru_metrics - {RUTarget.BOPS}
ru = self.ru_helper.compute_utilization(ru_targets=ru_metrics, mp_cfg=None)
ru[RUTarget.BOPS] = None
return ru

def compute_resource_utilization_for_config(self, config: List[int]) -> ResourceUtilization:
"""
Expand All @@ -297,29 +206,14 @@ def compute_resource_utilization_for_config(self, config: List[int]) -> Resource
with the given config.
"""

ru_dict = {}
for ru_target, ru_fns in self.compute_ru_functions.items():
# Passing False to ru methods and aggregations to indicates that the computations
# are not for constraints setting
if ru_target == RUTarget.BOPS:
configurable_nodes_ru_vector = ru_fns.metric_fn(config, self.original_graph, self.fw_info, self.fw_impl, False)
elif ru_target == RUTarget.ACTIVATION:
configurable_nodes_ru_vector = ru_fns.metric_fn(config, self.graph, self.fw_info, self.fw_impl, self.cuts)
else:
configurable_nodes_ru_vector = ru_fns.metric_fn(config, self.original_graph, self.fw_info, self.fw_impl)
non_configurable_nodes_ru_vector = self.non_conf_ru_dict.get(ru_target)
if non_configurable_nodes_ru_vector is None or len(non_configurable_nodes_ru_vector) == 0:
ru_ru = self.compute_ru_functions[ru_target].aggregate_fn(configurable_nodes_ru_vector, False)
else:
ru_ru = self.compute_ru_functions[ru_target].aggregate_fn(
np.concatenate([configurable_nodes_ru_vector, non_configurable_nodes_ru_vector]), False)

ru_dict[ru_target] = ru_ru[0]

config_ru = ResourceUtilization()
config_ru.set_resource_utilization_by_target(ru_dict)
return config_ru
act_qcs, w_qcs = self.ru_helper.get_configurable_qcs(config)
# TODO on graph or on orig graph???
ru_calc = ResourceUtilizationCalculator(self.graph, self.fw_impl, self.fw_info)
ru = ru_calc.compute_resource_utilization(target_criterion=TargetInclusionCriterion.AnyQuantized,
bitwidth_mode=BitwidthMode.MpCustom,
act_qcs=act_qcs,
w_qcs=w_qcs)
return ru

def finalize_distance_metric(self, layer_to_metrics_mapping: Dict[int, Dict[int, float]]):
"""
Expand Down
Loading

0 comments on commit c2dcbe1

Please sign in to comment.