[X86][tablgen] Fix the broadcast tables#79675
Merged
Merged
Conversation
Member
|
@llvm/pr-subscribers-llvm-support @llvm/pr-subscribers-backend-x86 Author: Shengchen Kan (KanRobert) ChangesPatch is 36.81 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/79675.diff 3 Files Affected:
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index fe7d90fbcdf7070..bb5e22c71427939 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -12422,7 +12422,7 @@ multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
: avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
let ExeDomain = VTI.ExeDomain in
defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
- (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
+ (ins VTI.RC:$src1, BcstVTI.ScalarMemOp:$src2, u8imm:$src3),
OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1",
"$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3",
(OpNode (VTI.VT VTI.RC:$src1),
diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc
index 029beeff0564438..dc400548ef2f0a7 100644
--- a/llvm/test/TableGen/x86-fold-tables.inc
+++ b/llvm/test/TableGen/x86-fold-tables.inc
@@ -6626,15 +6626,15 @@ static const X86FoldTableEntry Table4[] = {
};
static const X86FoldTableEntry BroadcastTable1[] = {
- {X86::VCVTDQ2PDZ128rr, X86::VCVTDQ2PDZ128rmb, TB_BCAST_SD},
- {X86::VCVTDQ2PDZ256rr, X86::VCVTDQ2PDZ256rmb, TB_BCAST_SD},
- {X86::VCVTDQ2PDZrr, X86::VCVTDQ2PDZrmb, TB_BCAST_SD},
- {X86::VCVTDQ2PHZ128rr, X86::VCVTDQ2PHZ128rmb, TB_BCAST_SH},
- {X86::VCVTDQ2PHZ256rr, X86::VCVTDQ2PHZ256rmb, TB_BCAST_SH},
- {X86::VCVTDQ2PHZrr, X86::VCVTDQ2PHZrmb, TB_BCAST_SH},
- {X86::VCVTDQ2PSZ128rr, X86::VCVTDQ2PSZ128rmb, TB_BCAST_SS},
- {X86::VCVTDQ2PSZ256rr, X86::VCVTDQ2PSZ256rmb, TB_BCAST_SS},
- {X86::VCVTDQ2PSZrr, X86::VCVTDQ2PSZrmb, TB_BCAST_SS},
+ {X86::VCVTDQ2PDZ128rr, X86::VCVTDQ2PDZ128rmb, TB_BCAST_D},
+ {X86::VCVTDQ2PDZ256rr, X86::VCVTDQ2PDZ256rmb, TB_BCAST_D},
+ {X86::VCVTDQ2PDZrr, X86::VCVTDQ2PDZrmb, TB_BCAST_D},
+ {X86::VCVTDQ2PHZ128rr, X86::VCVTDQ2PHZ128rmb, TB_BCAST_D},
+ {X86::VCVTDQ2PHZ256rr, X86::VCVTDQ2PHZ256rmb, TB_BCAST_D},
+ {X86::VCVTDQ2PHZrr, X86::VCVTDQ2PHZrmb, TB_BCAST_D},
+ {X86::VCVTDQ2PSZ128rr, X86::VCVTDQ2PSZ128rmb, TB_BCAST_D},
+ {X86::VCVTDQ2PSZ256rr, X86::VCVTDQ2PSZ256rmb, TB_BCAST_D},
+ {X86::VCVTDQ2PSZrr, X86::VCVTDQ2PSZrmb, TB_BCAST_D},
{X86::VCVTNEPS2BF16Z128rr, X86::VCVTNEPS2BF16Z128rmb, TB_BCAST_SS},
{X86::VCVTNEPS2BF16Z256rr, X86::VCVTNEPS2BF16Z256rmb, TB_BCAST_SS},
{X86::VCVTNEPS2BF16Zrr, X86::VCVTNEPS2BF16Zrmb, TB_BCAST_SS},
@@ -6644,9 +6644,9 @@ static const X86FoldTableEntry BroadcastTable1[] = {
{X86::VCVTPD2PHZ128rr, X86::VCVTPD2PHZ128rmb, TB_BCAST_SD},
{X86::VCVTPD2PHZ256rr, X86::VCVTPD2PHZ256rmb, TB_BCAST_SD},
{X86::VCVTPD2PHZrr, X86::VCVTPD2PHZrmb, TB_BCAST_SD},
- {X86::VCVTPD2PSZ128rr, X86::VCVTPD2PSZ128rmb, TB_BCAST_SS},
- {X86::VCVTPD2PSZ256rr, X86::VCVTPD2PSZ256rmb, TB_BCAST_SS},
- {X86::VCVTPD2PSZrr, X86::VCVTPD2PSZrmb, TB_BCAST_SS},
+ {X86::VCVTPD2PSZ128rr, X86::VCVTPD2PSZ128rmb, TB_BCAST_SD},
+ {X86::VCVTPD2PSZ256rr, X86::VCVTPD2PSZ256rmb, TB_BCAST_SD},
+ {X86::VCVTPD2PSZrr, X86::VCVTPD2PSZrmb, TB_BCAST_SD},
{X86::VCVTPD2QQZ128rr, X86::VCVTPD2QQZ128rmb, TB_BCAST_SD},
{X86::VCVTPD2QQZ256rr, X86::VCVTPD2QQZ256rmb, TB_BCAST_SD},
{X86::VCVTPD2QQZrr, X86::VCVTPD2QQZrmb, TB_BCAST_SD},
@@ -6698,15 +6698,15 @@ static const X86FoldTableEntry BroadcastTable1[] = {
{X86::VCVTPS2UQQZ128rr, X86::VCVTPS2UQQZ128rmb, TB_BCAST_SS},
{X86::VCVTPS2UQQZ256rr, X86::VCVTPS2UQQZ256rmb, TB_BCAST_SS},
{X86::VCVTPS2UQQZrr, X86::VCVTPS2UQQZrmb, TB_BCAST_SS},
- {X86::VCVTQQ2PDZ128rr, X86::VCVTQQ2PDZ128rmb, TB_BCAST_SD},
- {X86::VCVTQQ2PDZ256rr, X86::VCVTQQ2PDZ256rmb, TB_BCAST_SD},
- {X86::VCVTQQ2PDZrr, X86::VCVTQQ2PDZrmb, TB_BCAST_SD},
- {X86::VCVTQQ2PHZ128rr, X86::VCVTQQ2PHZ128rmb, TB_BCAST_SH},
- {X86::VCVTQQ2PHZ256rr, X86::VCVTQQ2PHZ256rmb, TB_BCAST_SH},
- {X86::VCVTQQ2PHZrr, X86::VCVTQQ2PHZrmb, TB_BCAST_SH},
- {X86::VCVTQQ2PSZ128rr, X86::VCVTQQ2PSZ128rmb, TB_BCAST_SS},
- {X86::VCVTQQ2PSZ256rr, X86::VCVTQQ2PSZ256rmb, TB_BCAST_SS},
- {X86::VCVTQQ2PSZrr, X86::VCVTQQ2PSZrmb, TB_BCAST_SS},
+ {X86::VCVTQQ2PDZ128rr, X86::VCVTQQ2PDZ128rmb, TB_BCAST_Q},
+ {X86::VCVTQQ2PDZ256rr, X86::VCVTQQ2PDZ256rmb, TB_BCAST_Q},
+ {X86::VCVTQQ2PDZrr, X86::VCVTQQ2PDZrmb, TB_BCAST_Q},
+ {X86::VCVTQQ2PHZ128rr, X86::VCVTQQ2PHZ128rmb, TB_BCAST_Q},
+ {X86::VCVTQQ2PHZ256rr, X86::VCVTQQ2PHZ256rmb, TB_BCAST_Q},
+ {X86::VCVTQQ2PHZrr, X86::VCVTQQ2PHZrmb, TB_BCAST_Q},
+ {X86::VCVTQQ2PSZ128rr, X86::VCVTQQ2PSZ128rmb, TB_BCAST_Q},
+ {X86::VCVTQQ2PSZ256rr, X86::VCVTQQ2PSZ256rmb, TB_BCAST_Q},
+ {X86::VCVTQQ2PSZrr, X86::VCVTQQ2PSZrmb, TB_BCAST_Q},
{X86::VCVTTPD2DQZ128rr, X86::VCVTTPD2DQZ128rmb, TB_BCAST_SD},
{X86::VCVTTPD2DQZ256rr, X86::VCVTTPD2DQZ256rmb, TB_BCAST_SD},
{X86::VCVTTPD2DQZrr, X86::VCVTTPD2DQZrmb, TB_BCAST_SD},
@@ -6749,24 +6749,24 @@ static const X86FoldTableEntry BroadcastTable1[] = {
{X86::VCVTTPS2UQQZ128rr, X86::VCVTTPS2UQQZ128rmb, TB_BCAST_SS},
{X86::VCVTTPS2UQQZ256rr, X86::VCVTTPS2UQQZ256rmb, TB_BCAST_SS},
{X86::VCVTTPS2UQQZrr, X86::VCVTTPS2UQQZrmb, TB_BCAST_SS},
- {X86::VCVTUDQ2PDZ128rr, X86::VCVTUDQ2PDZ128rmb, TB_BCAST_SD},
- {X86::VCVTUDQ2PDZ256rr, X86::VCVTUDQ2PDZ256rmb, TB_BCAST_SD},
- {X86::VCVTUDQ2PDZrr, X86::VCVTUDQ2PDZrmb, TB_BCAST_SD},
- {X86::VCVTUDQ2PHZ128rr, X86::VCVTUDQ2PHZ128rmb, TB_BCAST_SH},
- {X86::VCVTUDQ2PHZ256rr, X86::VCVTUDQ2PHZ256rmb, TB_BCAST_SH},
- {X86::VCVTUDQ2PHZrr, X86::VCVTUDQ2PHZrmb, TB_BCAST_SH},
- {X86::VCVTUDQ2PSZ128rr, X86::VCVTUDQ2PSZ128rmb, TB_BCAST_SS},
- {X86::VCVTUDQ2PSZ256rr, X86::VCVTUDQ2PSZ256rmb, TB_BCAST_SS},
- {X86::VCVTUDQ2PSZrr, X86::VCVTUDQ2PSZrmb, TB_BCAST_SS},
- {X86::VCVTUQQ2PDZ128rr, X86::VCVTUQQ2PDZ128rmb, TB_BCAST_SD},
- {X86::VCVTUQQ2PDZ256rr, X86::VCVTUQQ2PDZ256rmb, TB_BCAST_SD},
- {X86::VCVTUQQ2PDZrr, X86::VCVTUQQ2PDZrmb, TB_BCAST_SD},
- {X86::VCVTUQQ2PHZ128rr, X86::VCVTUQQ2PHZ128rmb, TB_BCAST_SH},
- {X86::VCVTUQQ2PHZ256rr, X86::VCVTUQQ2PHZ256rmb, TB_BCAST_SH},
- {X86::VCVTUQQ2PHZrr, X86::VCVTUQQ2PHZrmb, TB_BCAST_SH},
- {X86::VCVTUQQ2PSZ128rr, X86::VCVTUQQ2PSZ128rmb, TB_BCAST_SS},
- {X86::VCVTUQQ2PSZ256rr, X86::VCVTUQQ2PSZ256rmb, TB_BCAST_SS},
- {X86::VCVTUQQ2PSZrr, X86::VCVTUQQ2PSZrmb, TB_BCAST_SS},
+ {X86::VCVTUDQ2PDZ128rr, X86::VCVTUDQ2PDZ128rmb, TB_BCAST_D},
+ {X86::VCVTUDQ2PDZ256rr, X86::VCVTUDQ2PDZ256rmb, TB_BCAST_D},
+ {X86::VCVTUDQ2PDZrr, X86::VCVTUDQ2PDZrmb, TB_BCAST_D},
+ {X86::VCVTUDQ2PHZ128rr, X86::VCVTUDQ2PHZ128rmb, TB_BCAST_D},
+ {X86::VCVTUDQ2PHZ256rr, X86::VCVTUDQ2PHZ256rmb, TB_BCAST_D},
+ {X86::VCVTUDQ2PHZrr, X86::VCVTUDQ2PHZrmb, TB_BCAST_D},
+ {X86::VCVTUDQ2PSZ128rr, X86::VCVTUDQ2PSZ128rmb, TB_BCAST_D},
+ {X86::VCVTUDQ2PSZ256rr, X86::VCVTUDQ2PSZ256rmb, TB_BCAST_D},
+ {X86::VCVTUDQ2PSZrr, X86::VCVTUDQ2PSZrmb, TB_BCAST_D},
+ {X86::VCVTUQQ2PDZ128rr, X86::VCVTUQQ2PDZ128rmb, TB_BCAST_Q},
+ {X86::VCVTUQQ2PDZ256rr, X86::VCVTUQQ2PDZ256rmb, TB_BCAST_Q},
+ {X86::VCVTUQQ2PDZrr, X86::VCVTUQQ2PDZrmb, TB_BCAST_Q},
+ {X86::VCVTUQQ2PHZ128rr, X86::VCVTUQQ2PHZ128rmb, TB_BCAST_Q},
+ {X86::VCVTUQQ2PHZ256rr, X86::VCVTUQQ2PHZ256rmb, TB_BCAST_Q},
+ {X86::VCVTUQQ2PHZrr, X86::VCVTUQQ2PHZrmb, TB_BCAST_Q},
+ {X86::VCVTUQQ2PSZ128rr, X86::VCVTUQQ2PSZ128rmb, TB_BCAST_Q},
+ {X86::VCVTUQQ2PSZ256rr, X86::VCVTUQQ2PSZ256rmb, TB_BCAST_Q},
+ {X86::VCVTUQQ2PSZrr, X86::VCVTUQQ2PSZrmb, TB_BCAST_Q},
{X86::VCVTUW2PHZ128rr, X86::VCVTUW2PHZ128rmb, TB_BCAST_SH},
{X86::VCVTUW2PHZ256rr, X86::VCVTUW2PHZ256rmb, TB_BCAST_SH},
{X86::VCVTUW2PHZrr, X86::VCVTUW2PHZrmb, TB_BCAST_SH},
@@ -6963,15 +6963,15 @@ static const X86FoldTableEntry BroadcastTable2[] = {
{X86::VCMPPSZ128rri, X86::VCMPPSZ128rmbi, TB_BCAST_SS},
{X86::VCMPPSZ256rri, X86::VCMPPSZ256rmbi, TB_BCAST_SS},
{X86::VCMPPSZrri, X86::VCMPPSZrmbi, TB_BCAST_SS},
- {X86::VCVTDQ2PDZ128rrkz, X86::VCVTDQ2PDZ128rmbkz, TB_BCAST_SD},
- {X86::VCVTDQ2PDZ256rrkz, X86::VCVTDQ2PDZ256rmbkz, TB_BCAST_SD},
- {X86::VCVTDQ2PDZrrkz, X86::VCVTDQ2PDZrmbkz, TB_BCAST_SD},
- {X86::VCVTDQ2PHZ128rrkz, X86::VCVTDQ2PHZ128rmbkz, TB_BCAST_SH},
- {X86::VCVTDQ2PHZ256rrkz, X86::VCVTDQ2PHZ256rmbkz, TB_BCAST_SH},
- {X86::VCVTDQ2PHZrrkz, X86::VCVTDQ2PHZrmbkz, TB_BCAST_SH},
- {X86::VCVTDQ2PSZ128rrkz, X86::VCVTDQ2PSZ128rmbkz, TB_BCAST_SS},
- {X86::VCVTDQ2PSZ256rrkz, X86::VCVTDQ2PSZ256rmbkz, TB_BCAST_SS},
- {X86::VCVTDQ2PSZrrkz, X86::VCVTDQ2PSZrmbkz, TB_BCAST_SS},
+ {X86::VCVTDQ2PDZ128rrkz, X86::VCVTDQ2PDZ128rmbkz, TB_BCAST_D},
+ {X86::VCVTDQ2PDZ256rrkz, X86::VCVTDQ2PDZ256rmbkz, TB_BCAST_D},
+ {X86::VCVTDQ2PDZrrkz, X86::VCVTDQ2PDZrmbkz, TB_BCAST_D},
+ {X86::VCVTDQ2PHZ128rrkz, X86::VCVTDQ2PHZ128rmbkz, TB_BCAST_D},
+ {X86::VCVTDQ2PHZ256rrkz, X86::VCVTDQ2PHZ256rmbkz, TB_BCAST_D},
+ {X86::VCVTDQ2PHZrrkz, X86::VCVTDQ2PHZrmbkz, TB_BCAST_D},
+ {X86::VCVTDQ2PSZ128rrkz, X86::VCVTDQ2PSZ128rmbkz, TB_BCAST_D},
+ {X86::VCVTDQ2PSZ256rrkz, X86::VCVTDQ2PSZ256rmbkz, TB_BCAST_D},
+ {X86::VCVTDQ2PSZrrkz, X86::VCVTDQ2PSZrmbkz, TB_BCAST_D},
{X86::VCVTNE2PS2BF16Z128rr, X86::VCVTNE2PS2BF16Z128rmb, TB_BCAST_SS},
{X86::VCVTNE2PS2BF16Z256rr, X86::VCVTNE2PS2BF16Z256rmb, TB_BCAST_SS},
{X86::VCVTNE2PS2BF16Zrr, X86::VCVTNE2PS2BF16Zrmb, TB_BCAST_SS},
@@ -6984,9 +6984,9 @@ static const X86FoldTableEntry BroadcastTable2[] = {
{X86::VCVTPD2PHZ128rrkz, X86::VCVTPD2PHZ128rmbkz, TB_BCAST_SD},
{X86::VCVTPD2PHZ256rrkz, X86::VCVTPD2PHZ256rmbkz, TB_BCAST_SD},
{X86::VCVTPD2PHZrrkz, X86::VCVTPD2PHZrmbkz, TB_BCAST_SD},
- {X86::VCVTPD2PSZ128rrkz, X86::VCVTPD2PSZ128rmbkz, TB_BCAST_SS},
- {X86::VCVTPD2PSZ256rrkz, X86::VCVTPD2PSZ256rmbkz, TB_BCAST_SS},
- {X86::VCVTPD2PSZrrkz, X86::VCVTPD2PSZrmbkz, TB_BCAST_SS},
+ {X86::VCVTPD2PSZ128rrkz, X86::VCVTPD2PSZ128rmbkz, TB_BCAST_SD},
+ {X86::VCVTPD2PSZ256rrkz, X86::VCVTPD2PSZ256rmbkz, TB_BCAST_SD},
+ {X86::VCVTPD2PSZrrkz, X86::VCVTPD2PSZrmbkz, TB_BCAST_SD},
{X86::VCVTPD2QQZ128rrkz, X86::VCVTPD2QQZ128rmbkz, TB_BCAST_SD},
{X86::VCVTPD2QQZ256rrkz, X86::VCVTPD2QQZ256rmbkz, TB_BCAST_SD},
{X86::VCVTPD2QQZrrkz, X86::VCVTPD2QQZrmbkz, TB_BCAST_SD},
@@ -7038,15 +7038,15 @@ static const X86FoldTableEntry BroadcastTable2[] = {
{X86::VCVTPS2UQQZ128rrkz, X86::VCVTPS2UQQZ128rmbkz, TB_BCAST_SS},
{X86::VCVTPS2UQQZ256rrkz, X86::VCVTPS2UQQZ256rmbkz, TB_BCAST_SS},
{X86::VCVTPS2UQQZrrkz, X86::VCVTPS2UQQZrmbkz, TB_BCAST_SS},
- {X86::VCVTQQ2PDZ128rrkz, X86::VCVTQQ2PDZ128rmbkz, TB_BCAST_SD},
- {X86::VCVTQQ2PDZ256rrkz, X86::VCVTQQ2PDZ256rmbkz, TB_BCAST_SD},
- {X86::VCVTQQ2PDZrrkz, X86::VCVTQQ2PDZrmbkz, TB_BCAST_SD},
- {X86::VCVTQQ2PHZ128rrkz, X86::VCVTQQ2PHZ128rmbkz, TB_BCAST_SH},
- {X86::VCVTQQ2PHZ256rrkz, X86::VCVTQQ2PHZ256rmbkz, TB_BCAST_SH},
- {X86::VCVTQQ2PHZrrkz, X86::VCVTQQ2PHZrmbkz, TB_BCAST_SH},
- {X86::VCVTQQ2PSZ128rrkz, X86::VCVTQQ2PSZ128rmbkz, TB_BCAST_SS},
- {X86::VCVTQQ2PSZ256rrkz, X86::VCVTQQ2PSZ256rmbkz, TB_BCAST_SS},
- {X86::VCVTQQ2PSZrrkz, X86::VCVTQQ2PSZrmbkz, TB_BCAST_SS},
+ {X86::VCVTQQ2PDZ128rrkz, X86::VCVTQQ2PDZ128rmbkz, TB_BCAST_Q},
+ {X86::VCVTQQ2PDZ256rrkz, X86::VCVTQQ2PDZ256rmbkz, TB_BCAST_Q},
+ {X86::VCVTQQ2PDZrrkz, X86::VCVTQQ2PDZrmbkz, TB_BCAST_Q},
+ {X86::VCVTQQ2PHZ128rrkz, X86::VCVTQQ2PHZ128rmbkz, TB_BCAST_Q},
+ {X86::VCVTQQ2PHZ256rrkz, X86::VCVTQQ2PHZ256rmbkz, TB_BCAST_Q},
+ {X86::VCVTQQ2PHZrrkz, X86::VCVTQQ2PHZrmbkz, TB_BCAST_Q},
+ {X86::VCVTQQ2PSZ128rrkz, X86::VCVTQQ2PSZ128rmbkz, TB_BCAST_Q},
+ {X86::VCVTQQ2PSZ256rrkz, X86::VCVTQQ2PSZ256rmbkz, TB_BCAST_Q},
+ {X86::VCVTQQ2PSZrrkz, X86::VCVTQQ2PSZrmbkz, TB_BCAST_Q},
{X86::VCVTTPD2DQZ128rrkz, X86::VCVTTPD2DQZ128rmbkz, TB_BCAST_SD},
{X86::VCVTTPD2DQZ256rrkz, X86::VCVTTPD2DQZ256rmbkz, TB_BCAST_SD},
{X86::VCVTTPD2DQZrrkz, X86::VCVTTPD2DQZrmbkz, TB_BCAST_SD},
@@ -7089,24 +7089,24 @@ static const X86FoldTableEntry BroadcastTable2[] = {
{X86::VCVTTPS2UQQZ128rrkz, X86::VCVTTPS2UQQZ128rmbkz, TB_BCAST_SS},
{X86::VCVTTPS2UQQZ256rrkz, X86::VCVTTPS2UQQZ256rmbkz, TB_BCAST_SS},
{X86::VCVTTPS2UQQZrrkz, X86::VCVTTPS2UQQZrmbkz, TB_BCAST_SS},
- {X86::VCVTUDQ2PDZ128rrkz, X86::VCVTUDQ2PDZ128rmbkz, TB_BCAST_SD},
- {X86::VCVTUDQ2PDZ256rrkz, X86::VCVTUDQ2PDZ256rmbkz, TB_BCAST_SD},
- {X86::VCVTUDQ2PDZrrkz, X86::VCVTUDQ2PDZrmbkz, TB_BCAST_SD},
- {X86::VCVTUDQ2PHZ128rrkz, X86::VCVTUDQ2PHZ128rmbkz, TB_BCAST_SH},
- {X86::VCVTUDQ2PHZ256rrkz, X86::VCVTUDQ2PHZ256rmbkz, TB_BCAST_SH},
- {X86::VCVTUDQ2PHZrrkz, X86::VCVTUDQ2PHZrmbkz, TB_BCAST_SH},
- {X86::VCVTUDQ2PSZ128rrkz, X86::VCVTUDQ2PSZ128rmbkz, TB_BCAST_SS},
- {X86::VCVTUDQ2PSZ256rrkz, X86::VCVTUDQ2PSZ256rmbkz, TB_BCAST_SS},
- {X86::VCVTUDQ2PSZrrkz, X86::VCVTUDQ2PSZrmbkz, TB_BCAST_SS},
- {X86::VCVTUQQ2PDZ128rrkz, X86::VCVTUQQ2PDZ128rmbkz, TB_BCAST_SD},
- {X86::VCVTUQQ2PDZ256rrkz, X86::VCVTUQQ2PDZ256rmbkz, TB_BCAST_SD},
- {X86::VCVTUQQ2PDZrrkz, X86::VCVTUQQ2PDZrmbkz, TB_BCAST_SD},
- {X86::VCVTUQQ2PHZ128rrkz, X86::VCVTUQQ2PHZ128rmbkz, TB_BCAST_SH},
- {X86::VCVTUQQ2PHZ256rrkz, X86::VCVTUQQ2PHZ256rmbkz, TB_BCAST_SH},
- {X86::VCVTUQQ2PHZrrkz, X86::VCVTUQQ2PHZrmbkz, TB_BCAST_SH},
- {X86::VCVTUQQ2PSZ128rrkz, X86::VCVTUQQ2PSZ128rmbkz, TB_BCAST_SS},
- {X86::VCVTUQQ2PSZ256rrkz, X86::VCVTUQQ2PSZ256rmbkz, TB_BCAST_SS},
- {X86::VCVTUQQ2PSZrrkz, X86::VCVTUQQ2PSZrmbkz, TB_BCAST_SS},
+ {X86::VCVTUDQ2PDZ128rrkz, X86::VCVTUDQ2PDZ128rmbkz, TB_BCAST_D},
+ {X86::VCVTUDQ2PDZ256rrkz, X86::VCVTUDQ2PDZ256rmbkz, TB_BCAST_D},
+ {X86::VCVTUDQ2PDZrrkz, X86::VCVTUDQ2PDZrmbkz, TB_BCAST_D},
+ {X86::VCVTUDQ2PHZ128rrkz, X86::VCVTUDQ2PHZ128rmbkz, TB_BCAST_D},
+ {X86::VCVTUDQ2PHZ256rrkz, X86::VCVTUDQ2PHZ256rmbkz, TB_BCAST_D},
+ {X86::VCVTUDQ2PHZrrkz, X86::VCVTUDQ2PHZrmbkz, TB_BCAST_D},
+ {X86::VCVTUDQ2PSZ128rrkz, X86::VCVTUDQ2PSZ128rmbkz, TB_BCAST_D},
+ {X86::VCVTUDQ2PSZ256rrkz, X86::VCVTUDQ2PSZ256rmbkz, TB_BCAST_D},
+ {X86::VCVTUDQ2PSZrrkz, X86::VCVTUDQ2PSZrmbkz, TB_BCAST_D},
+ {X86::VCVTUQQ2PDZ128rrkz, X86::VCVTUQQ2PDZ128rmbkz, TB_BCAST_Q},
+ {X86::VCVTUQQ2PDZ256rrkz, X86::VCVTUQQ2PDZ256rmbkz, TB_BCAST_Q},
+ {X86::VCVTUQQ2PDZrrkz, X86::VCVTUQQ2PDZrmbkz, TB_BCAST_Q},
+ {X86::VCVTUQQ2PHZ128rrkz, X86::VCVTUQQ2PHZ128rmbkz, TB_BCAST_Q},
+ {X86::VCVTUQQ2PHZ256rrkz, X86::VCVTUQQ2PHZ256rmbkz, TB_BCAST_Q},
+ {X86::VCVTUQQ2PHZrrkz, X86::VCVTUQQ2PHZrmbkz, TB_BCAST_Q},
+ {X86::VCVTUQQ2PSZ128rrkz, X86::VCVTUQQ2PSZ128rmbkz, TB_BCAST_Q},
+ {X86::VCVTUQQ2PSZ256rrkz, X86::VCVTUQQ2PSZ256rmbkz, TB_BCAST_Q},
+ {X86::VCVTUQQ2PSZrrkz, X86::VCVTUQQ2PSZrmbkz, TB_BCAST_Q},
{X86::VCVTUW2PHZ128rrkz, X86::VCVTUW2PHZ128rmbkz, TB_BCAST_SH},
{X86::VCVTUW2PHZ256rrkz, X86::VCVTUW2PHZ256rmbkz, TB_BCAST_SH},
{X86::VCVTUW2PHZrrkz, X86::VCVTUW2PHZrmbkz, TB_BCAST_SH},
@@ -7166,9 +7166,9 @@ static const X86FoldTableEntry BroadcastTable2[] = {
{X86::VMAXCPDZ128rr, X86::VMAXCPDZ128rmb, TB_BCAST_SD},
{X86::VMAXCPDZ256rr, X86::VMAXCPDZ256rmb, TB_BCAST_SD},
{X86::VMAXCPDZrr, X86::VMAXCPDZrmb, TB_BCAST_SD},
- {X86::VMAXCPHZ128rr, X86::VMAXCPHZ128rmb, TB_BCAST_SS},
- {X86::VMAXCPHZ256rr, X86::VMAXCPHZ256rmb, TB_BCAST_SS},
- {X86::VMAXCPHZrr, X86::VMAXCPHZrmb, TB_BCAST_SS},
+ {X86::VMAXCPHZ128rr, X86::VMAXCPHZ128rmb, TB_BCAST_SH},
+ {X86::VMAXCPHZ256rr, X86::VMAXCPHZ256rmb, TB_BCAST_SH},
+ {X86::VMAXCPHZrr, X86::VMAXCPHZrmb, TB_BCAST_SH},
{X86::VMAXCPSZ128rr, X86::VMAXCPSZ128rmb, TB_BCAST_SS},
{X86::VMAXCPSZ256rr, X86::VMAXCPSZ256rmb, TB_BCAST_SS},
{X86::VMAXCPSZrr, X86::VMAXCPSZrmb, TB_BCAST_SS},
@@ -7184,9 +7184,9 @@ static const X86FoldTableEntry BroadcastTable2[] = {
{X86::VMINCPDZ128rr, X86::VMINCPDZ128rmb, TB_BCAST_SD},
{X86::VMINCPDZ256rr, X86::VMINCPDZ256rmb, TB_BCAST_SD},
{X86::VMINCPDZrr, X86::VMINCPDZrmb, TB_BCAST_SD},
- {X86::VMINCPHZ128rr, X86::VMINCPHZ128rmb, TB_BCAST_SS},
- {X86::VMINCPHZ256rr, X86::VMINCPHZ256rmb, TB_BCAST_SS},
- {X86::VMINCPHZrr, X86::VMINCPHZrmb, TB_BCAST_SS},
+ {X86::VMINCPHZ128rr, X86::VMINCPHZ128rmb, TB_BCAST_SH},
+ {X86::VMINCPHZ256rr, X86::VMINCPHZ256rmb, TB_BCAST_SH},
+ {X86::VMINCPHZrr, X86::VMINCPHZrmb, TB_BCAST_SH},
{X86::VMINCPSZ128rr, X86::VMINCPSZ128rmb, TB_BCAST_SS},
{X86::VMINCPSZ256rr, X86::VMINCPSZ256rmb, TB_BCAST_SS},
{X86::VMINCPSZrr, X86::VMINCPSZrmb, TB_BCAST_SS},
@@ -7460,15 +7460,15 @@ static const X86FoldTableEntry BroadcastTable2[] = {
{X86::VPTESTNMQZ128rr, X86::VPTESTNMQZ128rmb, TB_BCAST_Q},
{X86::VPTESTNMQZ256rr, X86::VPTESTNMQZ256rmb, TB_BCAST_Q},
{X86::VPTESTNMQZrr, X86::VPTESTNMQZrmb, TB_BCAST_Q},
- {X86::VPUNPCKHDQZ128rr, X86::VPUNPCKHDQZ128rmb, TB_BCAST_Q},
- {X86::VPUNPCKHDQZ256rr, X86::VPUNPCKHDQZ256rmb, TB_BCAST_Q},
- {X86::VPUNPCKHDQZrr, X86::VPUNPCKHDQZrmb, TB_BCAST_Q},
+ {X86::VPUNPCKHDQZ128rr, X86::VPUNPCKHDQZ128rmb, TB_BCAST_D},
+ {X86::VPUNPCKHDQZ256rr, X86::VPUNPCKHDQZ256rmb, TB_BCAST_D},
+ {X86::VPUNPCKHDQZrr, X86::VPUNPCKHDQZrmb, TB_BCAST_D},
{X86::VPUNPCKHQDQZ128rr, X86::VPUNPCKHQDQZ128rmb, TB_BCAST_Q},
{X86::VPUNPCKHQDQZ256rr, X86::VPUNPCKHQDQZ256rmb, TB_BCAST_Q},
{X86::VPUNPCKHQDQZrr, X86::VPUNPCKHQDQZrmb, TB_BCAST_Q},
- {X86::VPUNPCKLDQZ128rr, X86::VPUNPCKLDQZ128rmb, TB_BCAST_Q},
- {X86::VPUNPCKLDQZ256rr, X86::VPUNPCKLDQZ256rmb, TB_BCAST_Q},
- {X86::VPUNPCKLDQZrr, X86::VPUNPCKLDQZrmb, TB_BCAST_Q},
+ {X86::VPUNPCKLDQZ128rr, X86::VPUNPCKLDQZ128rmb, TB_BCAST_D},
+ {X86::VPUNPCKLDQZ256rr, X86::VPUNPCKLDQZ256rmb, TB_BCAST_D},
+ {X86::VPUNPCKLDQZrr, X86::VPUNPCKLDQZrmb, TB_BCAST_D},
{X86::VPUNPCKLQDQZ128rr, X86::VPUNPCKLQDQZ128rmb, TB_BCAST_Q},
{X86::VPUNPCKLQDQZ256rr, X86::VPUNPCKLQDQZ256rmb, TB_BCAST_Q},
{X86::VPUNPCKLQDQZrr, X86::VPUNPCKLQDQZrmb, TB_BCAST_Q},
@@ -7628,15 +7628,15 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VCMPPSZ128rrik, X86::VCMPPSZ128rmbik, TB_BCAST_SS},
{X86::VCMPPSZ256rrik, X86::VCMPPSZ256rmbik, TB_BCAST_SS},
{X86::VCMPPSZrrik, X86::VCMPPSZrmbik, TB_BCAST_SS},
- {X86::VCVTDQ2PDZ128rrk, X86::VCVTDQ2PDZ128rmbk, TB_BCAST_SD},
- {X86::VCVTDQ2PDZ256rrk, X86::VCVTDQ2PDZ256rmbk, TB_BCAST_SD},
- {X86::VCVTDQ2PDZrrk, X86::VCVTDQ2PDZrmbk, TB_BCAST_SD},
- {X86::VCVTDQ2PHZ128rrk, X86::VCVTDQ2PHZ128rmbk, TB_BCAST_SH},
- {X86::VCVTDQ2PHZ256rrk, X86::VCVTDQ2PHZ256rmbk, TB_BCAST_SH},
- {X86::VCVTDQ2PHZrrk, X86::VCVTDQ2PHZrmbk, TB_BCAST_SH},
- {X86::VCVTDQ2PSZ128rrk, X86::VCVTDQ2PSZ128rmbk, TB_BCAST_SS},
- {X86::VCVTDQ2PSZ256rrk, X86::VCVTDQ2PSZ256rmbk, TB_BCAST_SS},
- {X86::VCVTDQ2PSZrrk, X86::VCVTDQ2PSZrmbk, TB_BCAST_SS},
+ {X86::VCVTDQ2PDZ128rrk, X86::VCVTDQ2PDZ128rmbk, TB_BCAST_D},
+ {X86::VCVTDQ2PDZ256rrk, X86::VCVTDQ2PDZ256rmbk, TB_BCAST_D},
+ {X86::VCVTDQ2PDZrrk, X86::VCVTDQ2PDZrmbk, TB_BCAST_D},
+ {X86::VCVTDQ2PHZ128rrk, X86::VCVTDQ2PHZ128rmbk, TB_BCAST_D},
+ {X86::VCVTDQ2PHZ256rrk, X86::VCVTDQ2PHZ256rmbk, TB_BCAST_D},
+ {X86::VCVTDQ2PHZrrk, X86::VCVTDQ2PHZrmbk, TB_BCAST_D},
+ {X86::VCVTDQ2PSZ128rrk, X86::VCVTDQ2PSZ128rmbk, TB_BCAST_D},
+ {X86::VCVTDQ2PSZ256rrk, X86::VCVTDQ2PSZ256rmbk, TB_BCAST_D},
+ {X86::VCVTDQ2PSZrrk, X86::VCVTDQ2PSZrmbk, TB_BCAST_D},
{X86::VCVTNE2PS2BF16Z128rrkz, X86::VCVTNE2PS2BF16Z128rmbkz, TB_BCAST_SS},
{X86::VCVTNE2PS2BF16Z256rrkz, X86::VCVTNE2PS2BF16Z256rmbkz, TB_BCAST_SS},
{X86::VCVTNE2PS2BF16Zrrkz, X86::VCVTNE2PS2BF16Zrmbkz, TB_BCAST_SS},
@@ -7649,9 +7649,9 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VCVTPD2PHZ128rrk, X86::VCVTPD2PHZ128rmbk, TB_BCAST_SD},
{X86::VCVTPD2PHZ256rrk, X86::VCVTPD2PHZ256rmbk, TB_BCAST_SD},
{X86::VCVTPD2PHZrrk, X86::VCVTPD2PHZrmbk, TB_BCAST_SD},
- {X86::VCVTPD2PSZ128rrk, X86::VCVTPD2PSZ128rmbk, TB_BCAST_SS},
- {X86::VCVTPD2PSZ256rrk, X86::VCVTPD2PSZ256rmbk, TB_BCAST_SS},
- {X86::VCVTPD2PSZrrk, X86::VCVTPD2PSZrmbk, TB_BCAST_SS},
+ {X86::VCVTPD2PSZ128rrk, X86::VCVTPD2PSZ128rmbk, TB_BCAST_SD},
+ {X86::VCVTPD2PSZ256rrk, X86::VCVTPD2PSZ256rmbk, TB_BCAST_SD},
+ {X86::VCVTPD2PSZrrk, X86::VCVTPD2PSZrmbk, TB_BCAST_SD},
{X86::VCVTPD2QQZ128rrk, X86::VCVTPD2QQZ128rmbk, TB_BCAST_SD},
{X86::VCVTPD2QQZ256rrk, X86::VCVTPD2QQZ256rmbk, TB_BCAST_SD},
{X86::VCVTPD2QQZrrk, X86::VCVTPD2QQZrmbk, TB_BCAST_SD},
@@ -7703,15 +7703,15 @@ static const X86FoldTableEntry BroadcastTable3[] = {
{X86::VCVTPS2UQQZ128rrk, X86::VCVTPS2UQQZ128rmbk, TB_BCAST_SS},
{X86::VCVTPS2UQQZ256rrk, X86::VCVTPS2UQQZ256rmbk, TB_BCAST_SS},
{X86::VCVTPS2UQQZrrk, X86::VCVTPS2UQQZrmbk, TB_BCAST_SS},
- {X86::VCVTQQ2PDZ128rrk, X86::VCVTQQ2PDZ128rmbk, TB_BCAST_SD},
- {X86::VCVTQQ2PDZ256rrk, X86::VCVTQQ2PDZ256rmbk, TB_BCAST_SD},
- {X86::VCVTQQ2PDZrrk, X86::VCVTQQ2PDZrmbk, TB_BCAST_SD},
- {X86::VCVTQQ2PHZ128rrk, X86::VCVTQQ2PHZ128rmbk, TB_BCAST_SH},
- {X86::VCVTQQ2PHZ256rrk, X86::VCVTQQ2PHZ256rmbk, TB_BCAST_SH},
- {X86::...
[truncated]
|
phoebewang
reviewed
Jan 27, 2024
| StringSwitch<X86FoldTableEntry::BcastType>(In->getArg(I)->getAsString()) | ||
| .Case("i32mem", X86FoldTableEntry::BCAST_D) | ||
| .Case("i64mem", X86FoldTableEntry::BCAST_Q) | ||
| .Case("i16mem", X86FoldTableEntry::BCAST_SH) |
Contributor
There was a problem hiding this comment.
Why not map to BCAST_W?
Contributor
Author
There was a problem hiding this comment.
Sounds good. Done.
Contributor
There was a problem hiding this comment.
Shouldn't update here too? https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/X86/X86InstrFoldTables.cpp#L300
Contributor
Author
There was a problem hiding this comment.
Good catch! Done.
Contributor
Author
|
Thanks @phoebewang |
Contributor
|
/pull-request #79675 |
Contributor
|
Maybe backport to LLVM18 release. |
Contributor
|
/cherry-pick 7c3ee7c |
llvmbot
pushed a commit
to llvmbot/llvm-project
that referenced
this pull request
Jan 28, 2024
(cherry picked from commit 7c3ee7c)
Member
|
/pull-request #79721 |
llvmbot
pushed a commit
to llvmbot/llvm-project
that referenced
this pull request
Feb 7, 2024
(cherry picked from commit 7c3ee7c)
tstellar
pushed a commit
to tstellar/llvm-project
that referenced
this pull request
Feb 14, 2024
(cherry picked from commit 7c3ee7c)
tstellar
pushed a commit
to tstellar/llvm-project
that referenced
this pull request
Feb 14, 2024
(cherry picked from commit 7c3ee7c)
tstellar
pushed a commit
to tstellar/llvm-project
that referenced
this pull request
Feb 14, 2024
(cherry picked from commit 7c3ee7c)
tstellar
pushed a commit
to tstellar/llvm-project
that referenced
this pull request
Feb 14, 2024
(cherry picked from commit 7c3ee7c)
Closed
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
No description provided.