Skip to content

Commit b1a51d9

Browse files
committed
Updating xarch to utilize EVEX compares and blending where profitable
1 parent f5fee8f commit b1a51d9

File tree

11 files changed

+1204
-478
lines changed

11 files changed

+1204
-478
lines changed

src/coreclr/jit/gentree.cpp

Lines changed: 221 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -18122,9 +18122,9 @@ bool GenTree::canBeContained() const
1812218122
{
1812318123
return false;
1812418124
}
18125-
else if (OperIsHWIntrinsic() && !isContainableHWIntrinsic())
18125+
else if (OperIsHWIntrinsic())
1812618126
{
18127-
return isEmbeddedMaskingCompatibleHWIntrinsic();
18127+
return isContainableHWIntrinsic();
1812818128
}
1812918129

1813018130
return true;
@@ -20305,10 +20305,11 @@ bool GenTree::isCommutativeHWIntrinsic() const
2030520305

2030620306
bool GenTree::isContainableHWIntrinsic() const
2030720307
{
20308-
assert(OperIs(GT_HWINTRINSIC));
20308+
const GenTreeHWIntrinsic* node = AsHWIntrinsic();
20309+
NamedIntrinsic intrinsic = node->GetHWIntrinsicId();
2030920310

2031020311
#ifdef TARGET_XARCH
20311-
switch (AsHWIntrinsic()->GetHWIntrinsicId())
20312+
switch (node->GetHWIntrinsicId())
2031220313
{
2031320314
case NI_X86Base_LoadAlignedVector128:
2031420315
case NI_X86Base_LoadScalarVector128:
@@ -20324,7 +20325,7 @@ bool GenTree::isContainableHWIntrinsic() const
2032420325
case NI_AVX512_ConvertToVector256Int32:
2032520326
case NI_AVX512_ConvertToVector256UInt32:
2032620327
{
20327-
if (varTypeIsFloating(AsHWIntrinsic()->GetSimdBaseType()))
20328+
if (varTypeIsFloating(node->GetSimdBaseType()))
2032820329
{
2032920330
return false;
2033020331
}
@@ -20399,24 +20400,24 @@ bool GenTree::isContainableHWIntrinsic() const
2039920400

2040020401
default:
2040120402
{
20402-
return false;
20403+
return isEmbeddedMaskingCompatible();
2040320404
}
2040420405
}
2040520406
#elif defined(TARGET_ARM64)
20406-
return (AsHWIntrinsic()->GetHWIntrinsicId() == NI_Sve_ConditionalSelect);
20407+
return (intrinsic == NI_Sve_ConditionalSelect);
2040720408
#else
2040820409
return false;
2040920410
#endif // TARGET_XARCH
2041020411
}
2041120412

20412-
bool GenTree::isRMWHWIntrinsic(Compiler* comp)
20413+
bool GenTree::isRMWHWIntrinsic(Compiler* comp) const
2041320414
{
2041420415
assert(OperIs(GT_HWINTRINSIC));
2041520416
assert(comp != nullptr);
2041620417

2041720418
#if defined(TARGET_XARCH)
20418-
GenTreeHWIntrinsic* hwintrinsic = AsHWIntrinsic();
20419-
NamedIntrinsic intrinsicId = hwintrinsic->GetHWIntrinsicId();
20419+
const GenTreeHWIntrinsic* hwintrinsic = AsHWIntrinsic();
20420+
NamedIntrinsic intrinsicId = hwintrinsic->GetHWIntrinsicId();
2042020421

2042120422
if (!comp->canUseVexEncoding())
2042220423
{
@@ -20609,43 +20610,242 @@ bool GenTree::isEmbeddedBroadcastCompatibleHWIntrinsic(Compiler* comp) const
2060920610
#endif // TARGET_XARCH
2061020611

2061120612
//------------------------------------------------------------------------
20612-
// isEmbeddedMaskingCompatibleHWIntrinsic : Checks if the intrinsic is compatible
20613+
// isEmbeddedMaskingCompatible : Checks if the node is a hwintrinsic compatible
2061320614
// with the EVEX embedded masking form for its intended lowering instruction.
2061420615
//
2061520616
// Return Value:
20616-
// true if the intrinsic node lowering instruction has a EVEX embedded masking support
20617+
// true if the node lowering instruction has a EVEX embedded masking support
2061720618
//
20618-
bool GenTree::isEmbeddedMaskingCompatibleHWIntrinsic() const
20619+
bool GenTree::isEmbeddedMaskingCompatible() const
2061920620
{
2062020621
if (OperIsHWIntrinsic())
2062120622
{
20622-
NamedIntrinsic intrinsicId = AsHWIntrinsic()->GetHWIntrinsicId();
20623+
const GenTreeHWIntrinsic* node = AsHWIntrinsic();
20624+
NamedIntrinsic intrinsic = node->GetHWIntrinsicId();
20625+
2062320626
#if defined(TARGET_XARCH)
20624-
var_types simdBaseType = AsHWIntrinsic()->GetSimdBaseType();
20627+
var_types simdBaseType = node->GetSimdBaseType();
2062520628

2062620629
if (simdBaseType == TYP_UNKNOWN)
2062720630
{
2062820631
// Various scalar intrinsics don't support masking
2062920632
return false;
2063020633
}
2063120634

20632-
if (AsHWIntrinsic()->OperIsMemoryLoadOrStore())
20635+
HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(intrinsic);
20636+
20637+
if (!HWIntrinsicInfo::genIsTableDrivenHWIntrinsic(intrinsic, category))
20638+
{
20639+
// TODO-AVX512-CQ: Codegen is currently limited to only handling embedded
20640+
// masking for table driven intrinsics. This can be relaxed once that is fixed.
20641+
return false;
20642+
}
20643+
20644+
if (node->OperIsMemoryLoadOrStore())
2063320645
{
2063420646
// Direct loads and stores cannot be embedded masking compatible
2063520647
// as they may suppress faults that should otherwise be raised
2063620648
return false;
2063720649
}
2063820650

20639-
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, simdBaseType, nullptr);
20651+
instruction ins = HWIntrinsicInfo::lookupIns(intrinsic, simdBaseType, nullptr);
2064020652
return CodeGenInterface::instIsEmbeddedMaskingCompatible(ins);
2064120653
#elif defined(TARGET_ARM64)
20642-
return HWIntrinsicInfo::IsEmbeddedMaskedOperation(intrinsicId) ||
20643-
HWIntrinsicInfo::IsOptionalEmbeddedMaskedOperation(intrinsicId);
20654+
return HWIntrinsicInfo::IsEmbeddedMaskedOperation(intrinsic) ||
20655+
HWIntrinsicInfo::IsOptionalEmbeddedMaskedOperation(intrinsic);
2064420656
#endif
2064520657
}
2064620658
return false;
2064720659
}
2064820660

20661+
#if defined(TARGET_XARCH)
20662+
//------------------------------------------------------------------------
20663+
// isEmbeddedMaskingCompatible : Checks if the node is a hwintrinsic compatible
20664+
// with the EVEX embedded masking form for its intended lowering instruction.
20665+
//
20666+
// Arguments:
20667+
// comp - The compiler
20668+
// tgtMaskSize - The mask size to check compatibility against
20669+
// tgtSimdBaseJitType - The target simd base jit type to use if supported
20670+
//
20671+
// Return Value:
20672+
// true if the node lowering instruction has a EVEX embedded masking support
20673+
//
20674+
bool GenTree::isEmbeddedMaskingCompatible(Compiler* comp, unsigned tgtMaskSize, CorInfoType& tgtSimdBaseJitType) const
20675+
{
20676+
if (!isEmbeddedMaskingCompatible())
20677+
{
20678+
return false;
20679+
}
20680+
20681+
if (comp->opts.MinOpts())
20682+
{
20683+
return false;
20684+
}
20685+
20686+
if (comp->canUseEmbeddedMasking())
20687+
{
20688+
return false;
20689+
}
20690+
20691+
if (isRMWHWIntrinsic(comp))
20692+
{
20693+
// TODO-AVX512-CQ: Ensure we can support embedded operations on RMW intrinsics
20694+
return false;
20695+
}
20696+
20697+
const GenTreeHWIntrinsic* node = AsHWIntrinsic();
20698+
NamedIntrinsic intrinsic = node->GetHWIntrinsicId();
20699+
CorInfoType simdBaseJitType = node->GetSimdBaseJitType();
20700+
var_types simdBaseType = node->GetSimdBaseType();
20701+
var_types simdType = node->TypeGet();
20702+
20703+
instruction ins = HWIntrinsicInfo::lookupIns(intrinsic, simdBaseType, comp);
20704+
unsigned maskBaseSize = CodeGenInterface::instKMaskBaseSize(ins);
20705+
unsigned tgtMaskBaseSize = tgtMaskSize / (genTypeSize(simdType) / 16);
20706+
20707+
tgtSimdBaseJitType = CORINFO_TYPE_UNDEF;
20708+
20709+
if (maskBaseSize != tgtMaskBaseSize)
20710+
{
20711+
// Some intrinsics are effectively bitwise operations and so we
20712+
// can freely update them to match the size of the actual mask
20713+
20714+
bool supportsMaskBaseSize4Or8 = false;
20715+
20716+
switch (ins)
20717+
{
20718+
case INS_andpd:
20719+
case INS_andps:
20720+
case INS_andnpd:
20721+
case INS_andnps:
20722+
case INS_orpd:
20723+
case INS_orps:
20724+
case INS_pandd:
20725+
case INS_pandnd:
20726+
case INS_pord:
20727+
case INS_pxord:
20728+
case INS_vpandq:
20729+
case INS_vpandnq:
20730+
case INS_vporq:
20731+
case INS_vpxorq:
20732+
case INS_vshuff32x4:
20733+
case INS_vshuff64x2:
20734+
case INS_vshufi32x4:
20735+
case INS_vshufi64x2:
20736+
case INS_xorpd:
20737+
case INS_xorps:
20738+
{
20739+
// These intrinsics support embedded broadcast and have masking support for 4 or 8
20740+
assert((maskBaseSize == 4) || (maskBaseSize == 8));
20741+
20742+
if (!comp->codeGen->IsEmbeddedBroadcastEnabled(ins, node->Op(2)))
20743+
{
20744+
// We cannot change the base type if we've already contained a broadcast
20745+
supportsMaskBaseSize4Or8 = true;
20746+
}
20747+
break;
20748+
}
20749+
20750+
case INS_vpternlogd:
20751+
case INS_vpternlogq:
20752+
{
20753+
// These intrinsics support embedded broadcast and have masking support for 4 or 8
20754+
assert((maskBaseSize == 4) || (maskBaseSize == 8));
20755+
20756+
if (!comp->codeGen->IsEmbeddedBroadcastEnabled(ins, node->Op(3)))
20757+
{
20758+
// We cannot change the base type if we've already contained a broadcast
20759+
supportsMaskBaseSize4Or8 = true;
20760+
}
20761+
break;
20762+
}
20763+
20764+
case INS_vbroadcastf32x4:
20765+
case INS_vbroadcastf32x8:
20766+
case INS_vbroadcastf64x2:
20767+
case INS_vbroadcastf64x4:
20768+
case INS_vbroadcasti32x4:
20769+
case INS_vbroadcasti32x8:
20770+
case INS_vbroadcasti64x2:
20771+
case INS_vbroadcasti64x4:
20772+
case INS_vextractf32x4:
20773+
case INS_vextractf32x8:
20774+
case INS_vextractf64x2:
20775+
case INS_vextractf64x4:
20776+
case INS_vextracti32x4:
20777+
case INS_vextracti32x8:
20778+
case INS_vextracti64x2:
20779+
case INS_vextracti64x4:
20780+
case INS_vinsertf32x4:
20781+
case INS_vinsertf32x8:
20782+
case INS_vinsertf64x2:
20783+
case INS_vinsertf64x4:
20784+
case INS_vinserti32x4:
20785+
case INS_vinserti32x8:
20786+
case INS_vinserti64x2:
20787+
case INS_vinserti64x4:
20788+
{
20789+
// These intrinsics don't support embedded broadcast and have masking support for 4 or 8
20790+
assert((maskBaseSize == 4) || (maskBaseSize == 8));
20791+
supportsMaskBaseSize4Or8 = true;
20792+
break;
20793+
}
20794+
20795+
default:
20796+
{
20797+
break;
20798+
}
20799+
}
20800+
20801+
if (supportsMaskBaseSize4Or8)
20802+
{
20803+
if (tgtMaskBaseSize == 8)
20804+
{
20805+
if (varTypeIsFloating(simdBaseType))
20806+
{
20807+
tgtSimdBaseJitType = CORINFO_TYPE_DOUBLE;
20808+
}
20809+
else if (varTypeIsSigned(simdBaseType))
20810+
{
20811+
tgtSimdBaseJitType = CORINFO_TYPE_LONG;
20812+
}
20813+
else
20814+
{
20815+
tgtSimdBaseJitType = CORINFO_TYPE_ULONG;
20816+
}
20817+
}
20818+
else if (tgtMaskBaseSize == 4)
20819+
{
20820+
if (varTypeIsFloating(simdBaseType))
20821+
{
20822+
tgtSimdBaseJitType = CORINFO_TYPE_FLOAT;
20823+
}
20824+
else if (varTypeIsSigned(simdBaseType))
20825+
{
20826+
tgtSimdBaseJitType = CORINFO_TYPE_INT;
20827+
}
20828+
else
20829+
{
20830+
tgtSimdBaseJitType = CORINFO_TYPE_UINT;
20831+
}
20832+
}
20833+
}
20834+
}
20835+
20836+
if (tgtSimdBaseJitType != CORINFO_TYPE_UNDEF)
20837+
{
20838+
ins = HWIntrinsicInfo::lookupIns(intrinsic, simdBaseType, comp);
20839+
maskBaseSize = CodeGenInterface::instKMaskBaseSize(ins);
20840+
}
20841+
20842+
unsigned maskSize = maskBaseSize * (genTypeSize(simdType) / 16);
20843+
assert(maskSize != 0);
20844+
20845+
return maskSize != tgtMaskSize;
20846+
}
20847+
#endif // TARGET_XARCH
20848+
2064920849
GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode(var_types type,
2065020850
NamedIntrinsic hwIntrinsicID,
2065120851
CorInfoType simdBaseJitType,
@@ -30843,47 +31043,9 @@ var_types GenTreeHWIntrinsic::GetLookupTypeForCmpOp(
3084331043
var_types lookupType = type;
3084431044

3084531045
#if defined(TARGET_XARCH)
30846-
if (reverseCond)
31046+
if ((simdSize == 64) || comp->canUseEvexEncoding())
3084731047
{
30848-
oper = ReverseRelop(oper);
30849-
}
30850-
30851-
switch (oper)
30852-
{
30853-
case GT_EQ:
30854-
{
30855-
if (simdSize == 64)
30856-
{
30857-
lookupType = TYP_MASK;
30858-
}
30859-
break;
30860-
}
30861-
30862-
case GT_GE:
30863-
case GT_LE:
30864-
case GT_NE:
30865-
{
30866-
if ((simdSize == 64) || (varTypeIsIntegral(simdBaseType) && comp->canUseEvexEncoding()))
30867-
{
30868-
lookupType = TYP_MASK;
30869-
}
30870-
break;
30871-
}
30872-
30873-
case GT_GT:
30874-
case GT_LT:
30875-
{
30876-
if ((simdSize == 64) || (varTypeIsUnsigned(simdBaseType) && comp->canUseEvexEncoding()))
30877-
{
30878-
lookupType = TYP_MASK;
30879-
}
30880-
break;
30881-
}
30882-
30883-
default:
30884-
{
30885-
unreached();
30886-
}
31048+
lookupType = TYP_MASK;
3088731049
}
3088831050
#endif // TARGET_XARCH
3088931051

0 commit comments

Comments
 (0)