Skip to content

Commit 477ee77

Browse files
add docstring for Docstring coverage
1 parent d2bb7ad commit 477ee77

3 files changed

Lines changed: 20 additions & 0 deletions

File tree

examples/llm_ptq/example_utils.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ def build_quant_cfg(
209209
model_type,
210210
moe_calib_experts_ratio: float | None = None,
211211
) -> dict[str, Any]:
212+
"""Build quantization config with model-specific overrides for AWQ, SmoothQuant, and VLM."""
212213
quant_cfg = copy.deepcopy(quant_cfg)
213214
if "awq" in str(quant_cfg.get("algorithm")):
214215
from modelopt.torch.quantization.config import find_quant_cfg_entry_by_path
@@ -257,6 +258,11 @@ def build_quant_cfg(
257258
# (hidden_size -> num_v_heads, e.g. 1024 -> 16), quantizing them causes accuracy loss.
258259
quant_cfg["quant_cfg"].append({"quantizer_name": "*in_proj_b*", "enable": False})
259260
quant_cfg["quant_cfg"].append({"quantizer_name": "*in_proj_a*", "enable": False})
261+
# TRT-LLM's Qwen3.5 linear-attention packing only supports weight/weight_scale_inv;
262+
# disable activation quantization so input_scale is not exported for these layers.
263+
quant_cfg["quant_cfg"].append(
264+
{"quantizer_name": "*linear_attn*input_quantizer", "enable": False}
265+
)
260266

261267
return quant_cfg
262268

tests/_test_utils/torch/transformers_models.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ def get_tiny_qwen3_moe(**config_kwargs) -> PreTrainedModel:
112112
def create_tiny_qwen3_moe_dir(
113113
tmp_path: Path | str, with_tokenizer: bool = False, **config_kwargs
114114
) -> Path:
115+
"""Save a tiny Qwen3 MoE model (and optional tokenizer) to a temp directory."""
115116
qwen3_moe_dir = Path(tmp_path) / "tiny_qwen3_moe"
116117
if with_tokenizer:
117118
tokenizer = AutoTokenizer.from_pretrained(
@@ -157,6 +158,7 @@ def get_tiny_qwen3_5(**config_kwargs) -> PreTrainedModel:
157158

158159
##### GPT-OSS #####
159160
def get_tiny_gpt_oss(**config_kwargs) -> PreTrainedModel:
161+
"""Create a tiny GPT-OSS MoE model for testing."""
160162
set_seed(SEED)
161163

162164
kwargs = {

tests/unit/torch/quantization/plugins/test_huggingface.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,7 @@ def test_is_homogeneous_hf_model_gpt_oss():
236236

237237

238238
def test_hf_decoder_discoverer_registration_path():
239+
"""Verify HF decoder layer discoverer is registered and returns correct layers."""
239240
model = get_tiny_llama()
240241
assert any(
241242
is_supported is is_homogeneous_hf_model and discoverer is get_homogeneous_hf_decoder_layers
@@ -267,6 +268,10 @@ def test_qwen3_5_hybrid_attention_quantize(quant_config):
267268
# Disable narrow GatedDeltaNet projections (same as example_utils does for qwen3_5)
268269
quant_cfg["quant_cfg"].append({"quantizer_name": "*in_proj_b*", "enable": False})
269270
quant_cfg["quant_cfg"].append({"quantizer_name": "*in_proj_a*", "enable": False})
271+
# Disable activation quantization for linear attention (TRT-LLM packing limitation)
272+
quant_cfg["quant_cfg"].append(
273+
{"quantizer_name": "*linear_attn*input_quantizer", "enable": False}
274+
)
270275

271276
def calib_fn(model):
272277
"""Run calibration forward passes with dummy inputs."""
@@ -303,3 +308,10 @@ def calib_fn(model):
303308
assert not module.weight_quantizer.is_enabled, (
304309
f"in_proj_a should have quantization disabled: {name}"
305310
)
311+
312+
# Verify linear_attn input quantizers are disabled (no input_scale for TRT-LLM export)
313+
for name, module in model.named_modules():
314+
if "linear_attn" in name and hasattr(module, "input_quantizer"):
315+
assert not module.input_quantizer.is_enabled, (
316+
f"linear_attn input_quantizer should be disabled: {name}"
317+
)

0 commit comments

Comments
 (0)