-
Notifications
You must be signed in to change notification settings - Fork 5.4k
Tensor primitives divide int32 #111505
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Tensor primitives divide int32 #111505
Changes from 26 commits
dfa44fa
37afdd2
0cc3b79
b8965ae
d5ebf81
ef596d9
1da230f
c8b913e
6ac02b3
2bc816b
77842e4
16a5178
1ca404f
d9b18a0
34329ad
65d2f8c
839baa9
c6e18e6
078aff6
f994b09
6767384
e61e97f
08e0d7c
66580e1
3feb91b
1807a4c
4929106
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1871,6 +1871,107 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) | |
| break; | ||
| } | ||
|
|
||
| case NI_Vector128_op_Division: | ||
| case NI_Vector256_op_Division: | ||
| { | ||
| // We can emulate SIMD integer division by converting the 32-bit integer -> 64-bit double, | ||
| // perform a 64-bit double divide, then convert back to a 32-bit integer. This is generating | ||
| // something similar to the following managed code: | ||
| // if (Vector128.EqualsAny(op2, Vector128<int>.Zero)) | ||
| // { | ||
| // throw new DivideByZeroException(); | ||
| // } | ||
| // | ||
| // Vector128<int> overflowMask = | ||
| // Vector128.Equals(op1, Vector128.Create(int.MaxValue) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this should be it's
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks, fixed. |
||
| // & Vector128.Equals(op2, Vector128.Create(-1)); | ||
| // if (!Vector128.EqualsAll(overflowMask, Vector128<int>.Zero)) | ||
| // { | ||
| // throw new OverflowException(); | ||
| // } | ||
| // | ||
| // Vector256<double> op1_f64 = | ||
| // Vector256.ConvertToDouble(Vector256.WidenLower(Vector128.ToVector256Unsafe(op1)))); | ||
| // Vector256<double> op2_f64 = | ||
| // Vector256.ConvertToDouble(Vector256.WidenLower(Vector128.ToVector256Unsafe(op2)))); | ||
| // Vector256<double> div_f64 = op1_f64 / op2_f64; | ||
| // Vector256<long> div_i64 = Vector256.ConvertToInt64(div_f64); | ||
| // Vector128<int> div_i32 = Vector256.Narrow(div_i64.GetLower(), div_i64.GetUpper()); | ||
| // return div_i32; | ||
| regNumber op2Reg = op2->GetRegNum(); | ||
| regNumber tmpReg1 = internalRegisters.Extract(node, RBM_ALLFLOAT); | ||
| regNumber tmpReg2 = internalRegisters.Extract(node, RBM_ALLFLOAT); | ||
| emitAttr typeSize = emitTypeSize(node->TypeGet()); | ||
| noway_assert(typeSize == EA_16BYTE || typeSize == EA_32BYTE); | ||
| emitAttr divTypeSize = typeSize == EA_16BYTE ? EA_32BYTE : EA_64BYTE; | ||
|
|
||
| CORINFO_FIELD_HANDLE maxValueFld; | ||
| CORINFO_FIELD_HANDLE negOneFld; | ||
|
|
||
| if (typeSize == EA_16BYTE) | ||
| { | ||
| simd16_t maxValueIntVec; | ||
| maxValueIntVec.i32[0] = INT_MAX; | ||
| maxValueIntVec.i32[1] = INT_MAX; | ||
| maxValueIntVec.i32[2] = INT_MAX; | ||
| maxValueIntVec.i32[3] = INT_MAX; | ||
|
|
||
| simd16_t negOneIntVec; | ||
| negOneIntVec.i32[0] = -1; | ||
| negOneIntVec.i32[1] = -1; | ||
| negOneIntVec.i32[2] = -1; | ||
| negOneIntVec.i32[3] = -1; | ||
|
|
||
| maxValueFld = emit->emitSimd16Const(maxValueIntVec); | ||
| negOneFld = emit->emitSimd16Const(negOneIntVec); | ||
| } | ||
| else | ||
| { | ||
| noway_assert(typeSize == EA_32BYTE); | ||
| simd32_t maxValueIntVec; | ||
| maxValueIntVec.i32[0] = INT_MAX; | ||
| maxValueIntVec.i32[1] = INT_MAX; | ||
| maxValueIntVec.i32[2] = INT_MAX; | ||
| maxValueIntVec.i32[3] = INT_MAX; | ||
| maxValueIntVec.i32[4] = INT_MAX; | ||
| maxValueIntVec.i32[5] = INT_MAX; | ||
| maxValueIntVec.i32[6] = INT_MAX; | ||
| maxValueIntVec.i32[7] = INT_MAX; | ||
|
|
||
| simd32_t negOneIntVec; | ||
| negOneIntVec.i32[0] = -1; | ||
| negOneIntVec.i32[1] = -1; | ||
| negOneIntVec.i32[2] = -1; | ||
| negOneIntVec.i32[3] = -1; | ||
| negOneIntVec.i32[4] = -1; | ||
| negOneIntVec.i32[5] = -1; | ||
| negOneIntVec.i32[6] = -1; | ||
| negOneIntVec.i32[7] = -1; | ||
|
|
||
| maxValueFld = emit->emitSimd32Const(maxValueIntVec); | ||
| negOneFld = emit->emitSimd32Const(negOneIntVec); | ||
| } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This could be simplified to a Then doing
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It might, alternatively, be cheaper to just check if the result of
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. isn't
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, that as well. |
||
|
|
||
| // div-by-zero check | ||
| emit->emitIns_SIMD_R_R_R(INS_xorpd, typeSize, tmpReg1, tmpReg1, tmpReg1, instOptions); | ||
| emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, typeSize, tmpReg1, tmpReg1, op2Reg, instOptions); | ||
| emit->emitIns_R_R(INS_ptest, typeSize, tmpReg1, tmpReg1, instOptions); | ||
| genJumpToThrowHlpBlk(EJ_jne, SCK_DIV_BY_ZERO); | ||
|
|
||
| // overflow check | ||
| emit->emitIns_SIMD_R_R_C(INS_pcmpeqd, typeSize, tmpReg1, op1Reg, maxValueFld, 0, instOptions); | ||
| emit->emitIns_SIMD_R_R_C(INS_pcmpeqd, typeSize, tmpReg2, op2Reg, negOneFld, 0, instOptions); | ||
| emit->emitIns_SIMD_R_R_R(INS_pand, typeSize, tmpReg1, tmpReg1, tmpReg2, instOptions); | ||
| emit->emitIns_R_R(INS_ptest, typeSize, tmpReg1, tmpReg1, instOptions); | ||
| genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW); | ||
|
|
||
| emit->emitIns_R_R(INS_cvtdq2pd, divTypeSize, tmpReg1, op1Reg, instOptions); | ||
| emit->emitIns_R_R(INS_cvtdq2pd, divTypeSize, tmpReg2, op2Reg, instOptions); | ||
| emit->emitIns_SIMD_R_R_R(INS_divpd, divTypeSize, targetReg, tmpReg1, tmpReg2, instOptions); | ||
| emit->emitIns_R_R(INS_cvttpd2dq, divTypeSize, targetReg, targetReg, instOptions); | ||
| break; | ||
| } | ||
|
|
||
| default: | ||
| { | ||
| unreached(); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should probably assert that we're only encountering it for
varTypeIsInteger(AsHWIntrinsic()->GetSimdBaseType()), just to avoid issues from potential future refactorings.I'd expect we want to go ahead and include
Vector512_op_Divisionhere as well, even if it can't be encountered quite yet.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, I've added the
assertand includedVector512_op_Division.