From 0eb85ee40a74caa4dec66fe914e248f462e98108 Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Thu, 30 May 2024 17:16:27 +0000 Subject: [PATCH 1/3] Fix (bias_correction): Set `skip_if_no_bias=True` when accelerate is enabled. --- optimum/amd/brevitas/quantizer.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/optimum/amd/brevitas/quantizer.py b/optimum/amd/brevitas/quantizer.py index d06e143b..ba998686 100644 --- a/optimum/amd/brevitas/quantizer.py +++ b/optimum/amd/brevitas/quantizer.py @@ -244,6 +244,7 @@ def quantize( apply_bias_correction( model, calibration_dataset, + skip_if_no_bias=use_accelerate, # We can't add keys to the state dict if accelerate is being used ) logger.info("Bias Correction applied.") @@ -331,7 +332,7 @@ def apply_calibration(model: torch.nn.Module, dataset: List[Dict]) -> None: @torch.no_grad() -def apply_bias_correction(model: torch.nn.Module, dataset: List[Dict]) -> None: - with bias_correction_mode(model): +def apply_bias_correction(model: torch.nn.Module, dataset: List[Dict], skip_if_no_bias: bool = False) -> None: + with bias_correction_mode(model, skip_if_no_bias=skip_if_no_bias): for inps in tqdm(dataset): model(**inps) From f6978aa6d8660fbb6856ddb5a2e25b97381e6a45 Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Thu, 30 May 2024 17:44:21 +0000 Subject: [PATCH 2/3] Fix (bias_correction): Added zero biases to linear layers when accelerate is used. --- optimum/amd/brevitas/configuration.py | 3 +++ optimum/amd/brevitas/quantizer.py | 18 +++++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/optimum/amd/brevitas/configuration.py b/optimum/amd/brevitas/configuration.py index b5f4e637..8cd2dba1 100644 --- a/optimum/amd/brevitas/configuration.py +++ b/optimum/amd/brevitas/configuration.py @@ -137,5 +137,8 @@ def __post_init__(self): self.activations_group_size = None self.activations_param_method = None + def add_bias_to_linear(self): + return self.apply_bias_correction and self.device == "auto" + def requires_fx_graph(self): return self.activations_equalization == "cross_layer" or self.apply_weight_equalization diff --git a/optimum/amd/brevitas/quantizer.py b/optimum/amd/brevitas/quantizer.py index ba998686..f901afca 100644 --- a/optimum/amd/brevitas/quantizer.py +++ b/optimum/amd/brevitas/quantizer.py @@ -192,6 +192,8 @@ def quantize( if use_accelerate: remove_hooks(model) device = None + if self.config.add_bias_to_linear(): + model = add_zero_bias_to_linear(model) else: device = next(model.parameters()).device @@ -244,7 +246,7 @@ def quantize( apply_bias_correction( model, calibration_dataset, - skip_if_no_bias=use_accelerate, # We can't add keys to the state dict if accelerate is being used + skip_if_no_bias=use_accelerate, # We can't add keys to the state dict if accelerate is being used ) logger.info("Bias Correction applied.") @@ -336,3 +338,17 @@ def apply_bias_correction(model: torch.nn.Module, dataset: List[Dict], skip_if_n with bias_correction_mode(model, skip_if_no_bias=skip_if_no_bias): for inps in tqdm(dataset): model(**inps) + + +@torch.no_grad() +def add_zero_bias_to_linear(model: torch.nn.Module) -> torch.nn.Module: + for name, module in model.named_modules(): + if type(module) == torch.nn.Linear: + if module.bias is None: + module.register_parameter( + "bias", + torch.nn.Parameter( + torch.zeros((module.weight.shape[0],), device=module.weight.device, dtype=module.weight.dtype) + ), + ) + return model From bb563b5eeb1e42ea55107dfe75f1f6c94c3e9a4a Mon Sep 17 00:00:00 2001 From: Nick Fraser Date: Thu, 30 May 2024 19:43:51 +0000 Subject: [PATCH 3/3] Fix (bias_correction): Typo fix. --- optimum/amd/brevitas/quantizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimum/amd/brevitas/quantizer.py b/optimum/amd/brevitas/quantizer.py index f901afca..88e7ecb6 100644 --- a/optimum/amd/brevitas/quantizer.py +++ b/optimum/amd/brevitas/quantizer.py @@ -192,7 +192,7 @@ def quantize( if use_accelerate: remove_hooks(model) device = None - if self.config.add_bias_to_linear(): + if quantization_config.add_bias_to_linear(): model = add_zero_bias_to_linear(model) else: device = next(model.parameters()).device