Skip to content

Commit 7db8372

Browse files
Emit mulx for X86Base.BigMul BMI2 is supported
1 parent d781b3a commit 7db8372

4 files changed

Lines changed: 70 additions & 28 deletions

File tree

src/coreclr/jit/hwintrinsiccodegenxarch.cpp

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2364,20 +2364,37 @@ void CodeGen::genX86BaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
23642364
emitAttr attr = emitTypeSize(targetType);
23652365
emitter* emit = GetEmitter();
23662366

2367-
if (rmOp->isUsedFromReg() && rmOp->GetRegNum() == REG_EAX)
2367+
// Unsigned multiplication can use mulx on BMI2-capable CPUs
2368+
if (ins == INS_mulEAX && compiler->compOpportunisticallyDependsOn(InstructionSet_AVX2))
23682369
{
2369-
std::swap(rmOp, regOp);
2370+
// op1: EDX, op2: reg/mem (operand 3) => hiRes: (operand 1), lowReg: (operand 2)
2371+
assert((regOp->GetRegNum() == REG_EDX) || (rmOp->GetRegNum() != REG_EDX));
2372+
2373+
// mov the first operand into implicit source operand EDX/RDX
2374+
emit->emitIns_Mov(INS_mov, attr, REG_EDX, regOp->GetRegNum(), /* canSkip */ true);
2375+
2376+
// emit MULX instruction
2377+
regNumber lowReg = node->GetRegByIndex(0);
2378+
regNumber hiReg = node->GetRegByIndex(1);
2379+
inst_RV_RV_TT(INS_mulx, attr, hiReg, lowReg, rmOp, /* isRMW */ false, INS_OPTS_NONE);
23702380
}
2381+
else
2382+
{
2383+
if (rmOp->isUsedFromReg() && rmOp->GetRegNum() == REG_EAX)
2384+
{
2385+
std::swap(rmOp, regOp);
2386+
}
23712387

2372-
// op1: EAX, op2: reg/mem
2373-
emit->emitIns_Mov(INS_mov, attr, REG_EAX, regOp->GetRegNum(), /* canSkip */ true);
2388+
// op1: EAX, op2: reg/mem
2389+
emit->emitIns_Mov(INS_mov, attr, REG_EAX, regOp->GetRegNum(), /* canSkip */ true);
23742390

2375-
// emit the MUL/IMUL instruction
2376-
emit->emitInsBinary(ins, attr, node, rmOp);
2391+
// emit the MUL/IMUL instruction
2392+
emit->emitInsBinary(ins, attr, node, rmOp);
23772393

2378-
// verify target registers are as expected
2379-
assert(node->GetRegByIndex(0) == REG_EAX);
2380-
assert(node->GetRegByIndex(1) == REG_EDX);
2394+
// verify target registers are as expected
2395+
assert(node->GetRegByIndex(0) == REG_EAX);
2396+
assert(node->GetRegByIndex(1) == REG_EDX);
2397+
}
23812398

23822399
break;
23832400
}

src/coreclr/jit/lsraxarch.cpp

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2489,17 +2489,37 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
24892489
assert(isRMW);
24902490
assert(!op1->isContained());
24912491

2492-
SingleTypeRegSet apxAwareRegCandidates =
2493-
ForceLowGprForApxIfNeeded(op1, RBM_NONE, canHWIntrinsicUseApxRegs);
2492+
if ((baseType == TYP_ULONG || baseType == TYP_UINT) &&
2493+
compiler->compOpportunisticallyDependsOn(InstructionSet_AVX2))
2494+
{
2495+
isRMW = false;
24942496

2495-
// mulEAX always use EAX, if one operand is contained, specify other op with fixed EAX register
2496-
// otherwise dont force any register, we might get the second parameter in EAX
2497-
srcCount = BuildOperandUses(op1, op2->isContained() ? SRBM_EAX : apxAwareRegCandidates);
2498-
srcCount += BuildOperandUses(op2, apxAwareRegCandidates);
2497+
SingleTypeRegSet apxAwareRegCandidates =
2498+
ForceLowGprForApxIfNeeded(op2, RBM_NONE, canHWIntrinsicUseApxRegs);
2499+
// mulx, place op1 in implicit EDX register since op2 might be contained
2500+
srcCount = BuildOperandUses(op1, SRBM_EDX);
2501+
srcCount += BuildOperandUses(op2, apxAwareRegCandidates);
2502+
2503+
// result in any register
2504+
SingleTypeRegSet apxAwareDestCandidates =
2505+
ForceLowGprForApxIfNeeded(intrinsicTree, RBM_NONE, canHWIntrinsicUseApxRegs);
2506+
BuildDef(intrinsicTree, apxAwareDestCandidates, 0);
2507+
BuildDef(intrinsicTree, apxAwareDestCandidates, 1);
2508+
}
2509+
else // Signed multiply or normal unsigned multiply in one operand form
2510+
{
2511+
SingleTypeRegSet apxAwareRegCandidates =
2512+
ForceLowGprForApxIfNeeded(op1, RBM_NONE, canHWIntrinsicUseApxRegs);
24992513

2500-
// result put in EAX and EDX
2501-
BuildDef(intrinsicTree, SRBM_EAX, 0);
2502-
BuildDef(intrinsicTree, SRBM_EDX, 1);
2514+
// mulEAX always use EAX, if one operand is contained, specify other op with fixed EAX register
2515+
// otherwise dont force any register, we might get the second parameter in EAX
2516+
srcCount = BuildOperandUses(op1, op2->isContained() ? SRBM_EAX : apxAwareRegCandidates);
2517+
srcCount += BuildOperandUses(op2, apxAwareRegCandidates);
2518+
2519+
// result put in EAX and EDX
2520+
BuildDef(intrinsicTree, SRBM_EAX, 0);
2521+
BuildDef(intrinsicTree, SRBM_EDX, 1);
2522+
}
25032523

25042524
buildUses = false;
25052525
break;

src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/X86Base.PlatformNotSupported.cs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,11 @@ internal X64() { }
6767
/// <summary>
6868
/// <para>unsigned _umul128(unsigned __int64 Multiplier, unsigned __int64 Multiplicand, unsigned __int64 * HighProduct)</para>
6969
/// <para> MUL reg/m64</para>
70+
/// <para> MULX reg reg reg/m64 (if BMI2 is supported)</para>
7071
/// </summary>
7172
/// <remarks>
72-
/// <para>Its functionality is exposed in the public <see cref="Math" /> class.</para>
73+
/// <para>Its functionality is exposed by the public <see cref="Math.BigMul(ulong, ulong, out ulong)" />.</para>
74+
/// <para>Can emit either mul or mulx depending on hardware</para>
7375
/// </remarks>
7476
internal static (ulong Lower, ulong Upper) BigMul(ulong left, ulong right) { throw new PlatformNotSupportedException(); }
7577

@@ -128,21 +130,19 @@ internal X64() { }
128130

129131
/// <summary>
130132
/// <para> MUL reg/m32</para>
133+
/// <para> MULX reg reg reg/m32 (if BMI2 is supported)</para>
131134
/// </summary>
132-
/// <remarks>
133-
/// <para>Its functionality is exposed in the public <see cref="Math" /> class.</para>
134-
/// </remarks>
135135
internal static (uint Lower, uint Upper) BigMul(uint left, uint right) { throw new PlatformNotSupportedException(); }
136136

137137
/// <summary>
138138
/// <para> IMUL reg/m32</para>
139139
/// </summary>
140-
/// <remarks>
141-
/// <para>Its functionality is exposed in the public <see cref="Math" /> class.</para>
142-
/// </remarks>
143140
internal static (int Lower, int Upper) BigMul(int left, int right) { throw new PlatformNotSupportedException(); }
144141

145-
/// <summary> MUL reg/m</summary>
142+
/// <summary>
143+
/// <para> MUL reg/m</para>
144+
/// <para> MULX reg reg reg/m (if BMI2 is supported)</para>
145+
/// </summary>
146146
internal static (nuint Lower, nuint Upper) BigMul(nuint left, nuint right) { throw new PlatformNotSupportedException(); }
147147

148148
/// <summary> IMUL reg/m</summary>

src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/X86Base.cs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,10 +71,11 @@ internal X64() { }
7171
/// <summary>
7272
/// <para>unsigned _umul128(unsigned __int64 Multiplier, unsigned __int64 Multiplicand, unsigned __int64 * HighProduct)</para>
7373
/// <para> MUL reg/m64</para>
74+
/// <para> MULX reg reg reg/m64 (if BMI2 is supported)</para>
7475
/// </summary>
7576
/// <remarks>
7677
/// <para>Its functionality is exposed by the public <see cref="Math.BigMul(ulong, ulong, out ulong)" />.</para>
77-
/// <para>In the future it might emit mulx on compatible hardware</para>
78+
/// <para>Can emit either mul or mulx depending on hardware</para>
7879
/// </remarks>
7980
internal static (ulong Lower, ulong Upper) BigMul(ulong left, ulong right) => BigMul(left, right);
8081

@@ -146,6 +147,7 @@ public static unsafe (int Eax, int Ebx, int Ecx, int Edx) CpuId(int functionId,
146147
#if !MONO
147148
/// <summary>
148149
/// <para> MUL reg/m32</para>
150+
/// <para> MULX reg reg reg/m32 (if BMI2 is supported)</para>
149151
/// </summary>
150152
internal static (uint Lower, uint Upper) BigMul(uint left, uint right) => BigMul(left, right);
151153

@@ -154,7 +156,10 @@ public static unsafe (int Eax, int Ebx, int Ecx, int Edx) CpuId(int functionId,
154156
/// </summary>
155157
internal static (int Lower, int Upper) BigMul(int left, int right) => BigMul(left, right);
156158

157-
/// <summary> MUL reg/m</summary>
159+
/// <summary>
160+
/// <para> MUL reg/m</para>
161+
/// <para> MULX reg reg reg/m (if BMI2 is supported)</para>
162+
/// </summary>
158163
/// <remarks>Intented for UIntPtr.Bigmul https://github.com/dotnet/runtime/issues/114731 </remarks>
159164
internal static (nuint Lower, nuint Upper) BigMul(nuint left, nuint right) => BigMul(left, right);
160165

0 commit comments

Comments
 (0)