diff --git a/quant_vis/histograms/hooks/forward_hooks.py b/quant_vis/histograms/hooks/forward_hooks.py index c56fea0..0c848b4 100644 --- a/quant_vis/histograms/hooks/forward_hooks.py +++ b/quant_vis/histograms/hooks/forward_hooks.py @@ -39,6 +39,61 @@ def activation_forward_histogram_hook( def hook(module, input): # Ensure we are in eval mode, and ensure that this is not during a Shadow conversion check. + """ + generates high-quality documentation for code given to it, by initializing + and updating an actor's histogram bins for a specific quantization module, + based on the input's activation value. + + Args: + module (tensor, possibly representing a PyTorch module.): 3D tensor + containing the activations of a PyTorch module that is being + quantized, which is used to determine the quantization settings + and histogram bins for the module's activations. + + - `training`: If true, the module is in training mode, else it's + not. + - `type`: The type of the input `module`, which can be either a + Shadow conversion check or a non-Shadow module. + - `zero_point`: The zero point of the quantization range. + - `scale`: The scale of the quantization range. + - `bit_res`: The number of bits used for quantization. + - `HIST_XMIN`: The minimum value of the histogram. + - `HIST_XMAX`: The maximum value of the histogram. + - `HIST_QUANT_BIN_RATIO`: The number of histogram bins per + quantization bin. + - `local_input`: The input to the hook function, which is a tensor. + + The `hook` function then explains the process of creating a + histogram for the input `module`, including: + + 1/ Calculating the limits of the histogram based on the `HIST_XMIN` + and `HIST_XMAX` values, as well as the quantization range and + buffer size. + 2/ Creating a tensor histogram using `torch.histogram`. + 3/ Using `torch.bucketize` to map the activations to the same + histogram bins. + 4/ Storing the histogram in the `act_histogram` dictionary for + future use. + input (0-dimensional tensor of type `torch.Tensor`.): 1D tensor of + activations to be processed through the hook function, which is + passed as the sole argument to the function. + + - `input[0].detach().cpu()`: The first element of the serialized + input is deserialized and returned as a tensor on the CPU. + - `name`: A string representing the name of the module for which + the histogram is being generated. + - `module`: An instance of the `nn.Module` class, which represents + the neural network module for which the histogram is being generated. + - `bit_res`: An integer representing the bit width of the + quantization bin, which determines the number of histogram bins. + - `qrange`: An integer representing the range of values that the + quantized output can take, which is dependent on the qparams and + how much "buffer" is wanted on either side of the quantization range. + - `HIST_XMIN`, `HIST_XMAX`, `HIST_QUANT_BIN_RATIO`: Constants + representing the parameters for creating histogram bins, which are + defined in the code. + + """ if not module.training and type(module) is not ns.Shadow: # Get number of quantization bins from the quantization bit width diff --git a/quant_vis/histograms/hooks/sa_back_hooks.py b/quant_vis/histograms/hooks/sa_back_hooks.py index 4d1ce88..8b8a69a 100644 --- a/quant_vis/histograms/hooks/sa_back_hooks.py +++ b/quant_vis/histograms/hooks/sa_back_hooks.py @@ -100,6 +100,49 @@ def backwards_SA_histogram_hook( """ def hook(module, inp_grad, out_grad): + """ + updates a dataclass `act_backward_histograms.data[name]` with gradients + summed across forward histogram bins, computed using `torch.bincount()`. + The updated dataclass contains a field `binned_grads`. + + Args: + module (object (class or instance) of `torch.nn.Module`.): 3-tuple + (module, input gradients, output gradients) of the hook function, + which provides the necessary inputs for the function to compute + and store the summed gradients. + + - `module`: This is an instance of a `nn.Module` class, which + represents a neural network module. + - `inp_grad`: This is an instance of a `torch.Tensor` class, + representing the gradient of the input to the module. + - `out_grad`: This is an instance of a `torch.Tensor` class, + representing the gradient of the output from the module. + + The function then proceeds to compute the sum of the gradients + for each histogram bin in the forward pass and stores it in the + backward pass. + inp_grad (1D tensor.): 1st component of the gradients tensor returned + by the forward call. + + - `inp_grad` is a tensor with shape `(1, ..., num_outputs)` where + `num_outputs` is the number of outputs generated by the forward pass. + - The elements of `inp_grad` represent the gradients of the + output variables with respect to the input variables. + - `inp_grad` may contain nested tensors or arrays, representing + the hierarchical structure of the computational graph. + - The type of `inp_grad` is inferred from its shape and contents, + but it can also be specified using the `torch.nn.Module` class. + out_grad (0-dimensional tensor.): 1-dimensional tensor of gradients + that will be summed and stored in the dataclass as the binned gradients. + + - `out_grad`: The output gradients from the forward pass, which + is an array of shape `(1, ..., n_outputs)` where `n_outputs` is + the number of outputs produced by the model. + - `inp_grad`: The input gradients from the previous iteration + of the training loop, which is an array of shape `(1, ..., n_inputs)` + where `n_inputs` is the number of inputs to the model. + + """ if name not in act_forward_histograms.data: return diff --git a/quant_vis/histograms/plots/utils.py b/quant_vis/histograms/plots/utils.py index 85d8da4..d56ba44 100644 --- a/quant_vis/histograms/plots/utils.py +++ b/quant_vis/histograms/plots/utils.py @@ -150,6 +150,24 @@ def moving_average(input_tensor, window_size): ########### def create_double_level_plot_folder(file_path: str, lvl_1: str, lvl_2: str) -> str: + """ + creates a folder structure with two levels of subfolders based on input + arguments, ensuring the existence of each level using the `os` module's + `makedirs()` method. + + Args: + file_path (str): full path to the directory where the plot files will be + created. + lvl_1 (str): 1st level of subfolders within the plot folder, which is + created when the function returns the weight plot folder. + lvl_2 (str): 2nd level of subfolders within the plot folder, which is + created by the function when `lvl_1` is specified as the first input + parameter. + + Returns: + str: a string representing the path to a double-level plot folder. + + """ weight_plot_folder = ( file_path / lvl_1 / lvl_2 ) diff --git a/tests/evaluate/evaluate.py b/tests/evaluate/evaluate.py index 8d3c886..d43151c 100644 --- a/tests/evaluate/evaluate.py +++ b/tests/evaluate/evaluate.py @@ -4,6 +4,39 @@ def evaluate(model, device_str="cuda"): # Download an example image from the pytorch website + """ + downloads an image from a URL, preprocesses it using PyTorch's `transforms`, + and passes it to a trained PyTorch model for classification. It also checks + the availability of CUDA and moves the input and model to the GPU if available. + + Args: + model (nn.Module object.): deep learning model that is being evaluated, + and it is used to perform the actual evaluation by passing the input + batch through the model to generate output probabilities. + + - `device_str`: This is an optional string parameter that specifies + the device to use for the model computation. It can take values such + as "cpu" or "cuda". If not specified, it defaults to "cpu". + - `model`: This is a PyTorch model instance that takes in an input + image and outputs a prediction. The model has various attributes and + properties, including: + + `to(device_str)`: This method moves the model to the device specified + in `device_str`. + + `dimension`: This is the dimension of the input data that the model + operates on. + + `device`: This is the device that the model is executed on. + + `is_cuda`: This is a boolean attribute that indicates whether the + model is executed on CUDA if `device_str` is "cuda". + + The function then proceeds to preprocess the input image and move it + to the device specified in `device_str`. It then runs the model on the + input batch and computes the output. Finally, it prints out the top + categories predicted by the model for the given input image. + device_str (str): GPU or CPU device to which the input batch and the model + should be moved for faster execution, with "cuda" indicating the use + of NVIDIA GPU and "cpu" indicating the use of CPU. + + """ import urllib url, filename = (