@@ -18122,9 +18122,9 @@ bool GenTree::canBeContained() const
1812218122 {
1812318123 return false;
1812418124 }
18125- else if (OperIsHWIntrinsic() && !isContainableHWIntrinsic() )
18125+ else if (OperIsHWIntrinsic())
1812618126 {
18127- return isEmbeddedMaskingCompatibleHWIntrinsic ();
18127+ return isContainableHWIntrinsic ();
1812818128 }
1812918129
1813018130 return true;
@@ -20305,10 +20305,11 @@ bool GenTree::isCommutativeHWIntrinsic() const
2030520305
2030620306bool GenTree::isContainableHWIntrinsic() const
2030720307{
20308- assert(OperIs(GT_HWINTRINSIC));
20308+ const GenTreeHWIntrinsic* node = AsHWIntrinsic();
20309+ NamedIntrinsic intrinsic = node->GetHWIntrinsicId();
2030920310
2031020311#ifdef TARGET_XARCH
20311- switch (AsHWIntrinsic() ->GetHWIntrinsicId())
20312+ switch (node ->GetHWIntrinsicId())
2031220313 {
2031320314 case NI_X86Base_LoadAlignedVector128:
2031420315 case NI_X86Base_LoadScalarVector128:
@@ -20324,7 +20325,7 @@ bool GenTree::isContainableHWIntrinsic() const
2032420325 case NI_AVX512_ConvertToVector256Int32:
2032520326 case NI_AVX512_ConvertToVector256UInt32:
2032620327 {
20327- if (varTypeIsFloating(AsHWIntrinsic() ->GetSimdBaseType()))
20328+ if (varTypeIsFloating(node ->GetSimdBaseType()))
2032820329 {
2032920330 return false;
2033020331 }
@@ -20399,24 +20400,24 @@ bool GenTree::isContainableHWIntrinsic() const
2039920400
2040020401 default:
2040120402 {
20402- return false ;
20403+ return isEmbeddedMaskingCompatible() ;
2040320404 }
2040420405 }
2040520406#elif defined(TARGET_ARM64)
20406- return (AsHWIntrinsic()->GetHWIntrinsicId() == NI_Sve_ConditionalSelect);
20407+ return (intrinsic == NI_Sve_ConditionalSelect);
2040720408#else
2040820409 return false;
2040920410#endif // TARGET_XARCH
2041020411}
2041120412
20412- bool GenTree::isRMWHWIntrinsic(Compiler* comp)
20413+ bool GenTree::isRMWHWIntrinsic(Compiler* comp) const
2041320414{
2041420415 assert(OperIs(GT_HWINTRINSIC));
2041520416 assert(comp != nullptr);
2041620417
2041720418#if defined(TARGET_XARCH)
20418- GenTreeHWIntrinsic* hwintrinsic = AsHWIntrinsic();
20419- NamedIntrinsic intrinsicId = hwintrinsic->GetHWIntrinsicId();
20419+ const GenTreeHWIntrinsic* hwintrinsic = AsHWIntrinsic();
20420+ NamedIntrinsic intrinsicId = hwintrinsic->GetHWIntrinsicId();
2042020421
2042120422 if (!comp->canUseVexEncoding())
2042220423 {
@@ -20609,43 +20610,242 @@ bool GenTree::isEmbeddedBroadcastCompatibleHWIntrinsic(Compiler* comp) const
2060920610#endif // TARGET_XARCH
2061020611
2061120612//------------------------------------------------------------------------
20612- // isEmbeddedMaskingCompatibleHWIntrinsic : Checks if the intrinsic is compatible
20613+ // isEmbeddedMaskingCompatible : Checks if the node is a hwintrinsic compatible
2061320614// with the EVEX embedded masking form for its intended lowering instruction.
2061420615//
2061520616// Return Value:
20616- // true if the intrinsic node lowering instruction has a EVEX embedded masking support
20617+ // true if the node lowering instruction has a EVEX embedded masking support
2061720618//
20618- bool GenTree::isEmbeddedMaskingCompatibleHWIntrinsic () const
20619+ bool GenTree::isEmbeddedMaskingCompatible () const
2061920620{
2062020621 if (OperIsHWIntrinsic())
2062120622 {
20622- NamedIntrinsic intrinsicId = AsHWIntrinsic()->GetHWIntrinsicId();
20623+ const GenTreeHWIntrinsic* node = AsHWIntrinsic();
20624+ NamedIntrinsic intrinsic = node->GetHWIntrinsicId();
20625+
2062320626#if defined(TARGET_XARCH)
20624- var_types simdBaseType = AsHWIntrinsic() ->GetSimdBaseType();
20627+ var_types simdBaseType = node ->GetSimdBaseType();
2062520628
2062620629 if (simdBaseType == TYP_UNKNOWN)
2062720630 {
2062820631 // Various scalar intrinsics don't support masking
2062920632 return false;
2063020633 }
2063120634
20632- if (AsHWIntrinsic()->OperIsMemoryLoadOrStore())
20635+ HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(intrinsic);
20636+
20637+ if (!HWIntrinsicInfo::genIsTableDrivenHWIntrinsic(intrinsic, category))
20638+ {
20639+ // TODO-AVX512-CQ: Codegen is currently limited to only handling embedded
20640+ // masking for table driven intrinsics. This can be relaxed once that is fixed.
20641+ return false;
20642+ }
20643+
20644+ if (node->OperIsMemoryLoadOrStore())
2063320645 {
2063420646 // Direct loads and stores cannot be embedded masking compatible
2063520647 // as they may suppress faults that should otherwise be raised
2063620648 return false;
2063720649 }
2063820650
20639- instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId , simdBaseType, nullptr);
20651+ instruction ins = HWIntrinsicInfo::lookupIns(intrinsic , simdBaseType, nullptr);
2064020652 return CodeGenInterface::instIsEmbeddedMaskingCompatible(ins);
2064120653#elif defined(TARGET_ARM64)
20642- return HWIntrinsicInfo::IsEmbeddedMaskedOperation(intrinsicId ) ||
20643- HWIntrinsicInfo::IsOptionalEmbeddedMaskedOperation(intrinsicId );
20654+ return HWIntrinsicInfo::IsEmbeddedMaskedOperation(intrinsic ) ||
20655+ HWIntrinsicInfo::IsOptionalEmbeddedMaskedOperation(intrinsic );
2064420656#endif
2064520657 }
2064620658 return false;
2064720659}
2064820660
20661+ #if defined(TARGET_XARCH)
20662+ //------------------------------------------------------------------------
20663+ // isEmbeddedMaskingCompatible : Checks if the node is a hwintrinsic compatible
20664+ // with the EVEX embedded masking form for its intended lowering instruction.
20665+ //
20666+ // Arguments:
20667+ // comp - The compiler
20668+ // tgtMaskSize - The mask size to check compatibility against
20669+ // tgtSimdBaseJitType - The target simd base jit type to use if supported
20670+ //
20671+ // Return Value:
20672+ // true if the node lowering instruction has a EVEX embedded masking support
20673+ //
20674+ bool GenTree::isEmbeddedMaskingCompatible(Compiler* comp, unsigned tgtMaskSize, CorInfoType& tgtSimdBaseJitType) const
20675+ {
20676+ if (!isEmbeddedMaskingCompatible())
20677+ {
20678+ return false;
20679+ }
20680+
20681+ if (comp->opts.MinOpts())
20682+ {
20683+ return false;
20684+ }
20685+
20686+ if (comp->canUseEmbeddedMasking())
20687+ {
20688+ return false;
20689+ }
20690+
20691+ if (isRMWHWIntrinsic(comp))
20692+ {
20693+ // TODO-AVX512-CQ: Ensure we can support embedded operations on RMW intrinsics
20694+ return false;
20695+ }
20696+
20697+ const GenTreeHWIntrinsic* node = AsHWIntrinsic();
20698+ NamedIntrinsic intrinsic = node->GetHWIntrinsicId();
20699+ CorInfoType simdBaseJitType = node->GetSimdBaseJitType();
20700+ var_types simdBaseType = node->GetSimdBaseType();
20701+ var_types simdType = node->TypeGet();
20702+
20703+ instruction ins = HWIntrinsicInfo::lookupIns(intrinsic, simdBaseType, comp);
20704+ unsigned maskBaseSize = CodeGenInterface::instKMaskBaseSize(ins);
20705+ unsigned tgtMaskBaseSize = tgtMaskSize / (genTypeSize(simdType) / 16);
20706+
20707+ tgtSimdBaseJitType = CORINFO_TYPE_UNDEF;
20708+
20709+ if (maskBaseSize != tgtMaskBaseSize)
20710+ {
20711+ // Some intrinsics are effectively bitwise operations and so we
20712+ // can freely update them to match the size of the actual mask
20713+
20714+ bool supportsMaskBaseSize4Or8 = false;
20715+
20716+ switch (ins)
20717+ {
20718+ case INS_andpd:
20719+ case INS_andps:
20720+ case INS_andnpd:
20721+ case INS_andnps:
20722+ case INS_orpd:
20723+ case INS_orps:
20724+ case INS_pandd:
20725+ case INS_pandnd:
20726+ case INS_pord:
20727+ case INS_pxord:
20728+ case INS_vpandq:
20729+ case INS_vpandnq:
20730+ case INS_vporq:
20731+ case INS_vpxorq:
20732+ case INS_vshuff32x4:
20733+ case INS_vshuff64x2:
20734+ case INS_vshufi32x4:
20735+ case INS_vshufi64x2:
20736+ case INS_xorpd:
20737+ case INS_xorps:
20738+ {
20739+ // These intrinsics support embedded broadcast and have masking support for 4 or 8
20740+ assert((maskBaseSize == 4) || (maskBaseSize == 8));
20741+
20742+ if (!comp->codeGen->IsEmbeddedBroadcastEnabled(ins, node->Op(2)))
20743+ {
20744+ // We cannot change the base type if we've already contained a broadcast
20745+ supportsMaskBaseSize4Or8 = true;
20746+ }
20747+ break;
20748+ }
20749+
20750+ case INS_vpternlogd:
20751+ case INS_vpternlogq:
20752+ {
20753+ // These intrinsics support embedded broadcast and have masking support for 4 or 8
20754+ assert((maskBaseSize == 4) || (maskBaseSize == 8));
20755+
20756+ if (!comp->codeGen->IsEmbeddedBroadcastEnabled(ins, node->Op(3)))
20757+ {
20758+ // We cannot change the base type if we've already contained a broadcast
20759+ supportsMaskBaseSize4Or8 = true;
20760+ }
20761+ break;
20762+ }
20763+
20764+ case INS_vbroadcastf32x4:
20765+ case INS_vbroadcastf32x8:
20766+ case INS_vbroadcastf64x2:
20767+ case INS_vbroadcastf64x4:
20768+ case INS_vbroadcasti32x4:
20769+ case INS_vbroadcasti32x8:
20770+ case INS_vbroadcasti64x2:
20771+ case INS_vbroadcasti64x4:
20772+ case INS_vextractf32x4:
20773+ case INS_vextractf32x8:
20774+ case INS_vextractf64x2:
20775+ case INS_vextractf64x4:
20776+ case INS_vextracti32x4:
20777+ case INS_vextracti32x8:
20778+ case INS_vextracti64x2:
20779+ case INS_vextracti64x4:
20780+ case INS_vinsertf32x4:
20781+ case INS_vinsertf32x8:
20782+ case INS_vinsertf64x2:
20783+ case INS_vinsertf64x4:
20784+ case INS_vinserti32x4:
20785+ case INS_vinserti32x8:
20786+ case INS_vinserti64x2:
20787+ case INS_vinserti64x4:
20788+ {
20789+ // These intrinsics don't support embedded broadcast and have masking support for 4 or 8
20790+ assert((maskBaseSize == 4) || (maskBaseSize == 8));
20791+ supportsMaskBaseSize4Or8 = true;
20792+ break;
20793+ }
20794+
20795+ default:
20796+ {
20797+ break;
20798+ }
20799+ }
20800+
20801+ if (supportsMaskBaseSize4Or8)
20802+ {
20803+ if (tgtMaskBaseSize == 8)
20804+ {
20805+ if (varTypeIsFloating(simdBaseType))
20806+ {
20807+ tgtSimdBaseJitType = CORINFO_TYPE_DOUBLE;
20808+ }
20809+ else if (varTypeIsSigned(simdBaseType))
20810+ {
20811+ tgtSimdBaseJitType = CORINFO_TYPE_LONG;
20812+ }
20813+ else
20814+ {
20815+ tgtSimdBaseJitType = CORINFO_TYPE_ULONG;
20816+ }
20817+ }
20818+ else if (tgtMaskBaseSize == 4)
20819+ {
20820+ if (varTypeIsFloating(simdBaseType))
20821+ {
20822+ tgtSimdBaseJitType = CORINFO_TYPE_FLOAT;
20823+ }
20824+ else if (varTypeIsSigned(simdBaseType))
20825+ {
20826+ tgtSimdBaseJitType = CORINFO_TYPE_INT;
20827+ }
20828+ else
20829+ {
20830+ tgtSimdBaseJitType = CORINFO_TYPE_UINT;
20831+ }
20832+ }
20833+ }
20834+ }
20835+
20836+ if (tgtSimdBaseJitType != CORINFO_TYPE_UNDEF)
20837+ {
20838+ ins = HWIntrinsicInfo::lookupIns(intrinsic, simdBaseType, comp);
20839+ maskBaseSize = CodeGenInterface::instKMaskBaseSize(ins);
20840+ }
20841+
20842+ unsigned maskSize = maskBaseSize * (genTypeSize(simdType) / 16);
20843+ assert(maskSize != 0);
20844+
20845+ return maskSize != tgtMaskSize;
20846+ }
20847+ #endif // TARGET_XARCH
20848+
2064920849GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode(var_types type,
2065020850 NamedIntrinsic hwIntrinsicID,
2065120851 CorInfoType simdBaseJitType,
@@ -30843,47 +31043,9 @@ var_types GenTreeHWIntrinsic::GetLookupTypeForCmpOp(
3084331043 var_types lookupType = type;
3084431044
3084531045#if defined(TARGET_XARCH)
30846- if (reverseCond )
31046+ if ((simdSize == 64) || comp->canUseEvexEncoding() )
3084731047 {
30848- oper = ReverseRelop(oper);
30849- }
30850-
30851- switch (oper)
30852- {
30853- case GT_EQ:
30854- {
30855- if (simdSize == 64)
30856- {
30857- lookupType = TYP_MASK;
30858- }
30859- break;
30860- }
30861-
30862- case GT_GE:
30863- case GT_LE:
30864- case GT_NE:
30865- {
30866- if ((simdSize == 64) || (varTypeIsIntegral(simdBaseType) && comp->canUseEvexEncoding()))
30867- {
30868- lookupType = TYP_MASK;
30869- }
30870- break;
30871- }
30872-
30873- case GT_GT:
30874- case GT_LT:
30875- {
30876- if ((simdSize == 64) || (varTypeIsUnsigned(simdBaseType) && comp->canUseEvexEncoding()))
30877- {
30878- lookupType = TYP_MASK;
30879- }
30880- break;
30881- }
30882-
30883- default:
30884- {
30885- unreached();
30886- }
31048+ lookupType = TYP_MASK;
3088731049 }
3088831050#endif // TARGET_XARCH
3088931051
0 commit comments