Skip to content

Commit e3e98fe

Browse files
committed
initialize moe experts differently
Signed-off-by: Jennifer Chen <jennifchen@nvidia.com>
1 parent e589ac8 commit e3e98fe

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

tests/gpu_megatron/torch/quantization/plugins/test_megatron.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -765,6 +765,12 @@ def _test_layer_sync_moe_local_experts_amax(ep_size, moe_grouped_gemm, rank, siz
765765
num_moe_experts=8,
766766
transformer_impl="modelopt",
767767
)
768+
# Make weight initialization different across experts, otherwise experts will have similar amax values
769+
for layer in model.decoder.layers:
770+
for i, expert in enumerate(layer.mlp.experts.local_experts):
771+
expert.linear_fc1.weight.data.fill_(0.1 + i * 0.05)
772+
expert.linear_fc2.weight.data.fill_(0.2 + i * 0.05)
773+
768774
quant_cfg = mtq.FP8_DEFAULT_CFG
769775
model = mtq.quantize(model, quant_cfg, get_forward(model))
770776

0 commit comments

Comments
 (0)