Skip to content
Merged
Changes from 4 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
dfa44fa
Add vectorized path for Int32 type in TensorPrimitives.Divide
Jan 16, 2025
37afdd2
Add ISA guards and Debug.Assert
Jan 16, 2025
0cc3b79
Simplify vectorizable check, simplify preprocessor guard
Jan 16, 2025
b8965ae
Use XPlat intrinsics, use x86 intrinsics just for conversion
Jan 21, 2025
d5ebf81
Get Vector128<int> operator/ working
Jan 31, 2025
ef596d9
Working for Vector256<int> operator/
Jan 31, 2025
1da230f
Works for operator/ where op2 is scalar int
Jan 31, 2025
c8b913e
Consolidate to one path
Jan 31, 2025
6ac02b3
Move logic from importer to gentree
Feb 7, 2025
2bc816b
Add support to vectorize Vector512<int> operator /
Feb 7, 2025
77842e4
Working GenTreeSIMDDivByZeroCheck node
Feb 11, 2025
16a5178
Make GenTreeSIMDDivByZeroCheck a unary op
Feb 11, 2025
1ca404f
Remove empty lower functions, remove leftover gtGetOp2
Feb 11, 2025
d9b18a0
JIT formatting
Feb 11, 2025
34329ad
Only use low SIMD registers for ptest
Feb 11, 2025
65d2f8c
Merge branch 'main' into tensor-primitives-divide-int32
alexcovington Feb 12, 2025
839baa9
Merge branch 'main' into tensor-primitives-divide-int32
alexcovington Feb 17, 2025
c6e18e6
Use gtNewSimdCmpOpAnyNode for div-by-zero check
Feb 18, 2025
078aff6
Add spill for op1
Feb 18, 2025
f994b09
Jit formatting
Feb 18, 2025
6767384
Rework to use NI_Vector*_op_Division
Feb 20, 2025
e61e97f
Remove GT_SIMD_DIV_BY_ZERO_CHECK and related class/functions
Feb 20, 2025
08e0d7c
Re-use tmp reg for converts
Feb 21, 2025
66580e1
Comments, jit formatting
Feb 21, 2025
3feb91b
Remove leftover SIMDDivByZero
Feb 21, 2025
1807a4c
Don't flag with GTF_OVERFLOW since it conflicts with GTF_HW_EM_OP
Feb 21, 2025
4929106
Use correct INT_MIN / -1, simplify vec const generation, add assert t…
Feb 24, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Diagnostics;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;

namespace System.Numerics.Tensors
{
Expand Down Expand Up @@ -70,11 +72,103 @@ public static void Divide<T>(T x, ReadOnlySpan<T> y, Span<T> destination)
internal readonly struct DivideOperator<T> : IBinaryOperator<T> where T : IDivisionOperators<T, T, T>
{
public static bool Vectorizable => typeof(T) == typeof(float)
|| typeof(T) == typeof(double);
|| typeof(T) == typeof(double)
|| (Vector256.IsHardwareAccelerated && typeof(T) == typeof(int));
public static T Invoke(T x, T y) => x / y;
public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y) => x / y;
public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y) => x / y;
public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y) => x / y;
public static Vector128<T> Invoke(Vector128<T> x, Vector128<T> y)
{
if (typeof(T) == typeof(int))
Comment thread
tannergooding marked this conversation as resolved.
Outdated
{
if (Vector128.EqualsAny(y.AsInt32(), Vector128<int>.Zero))
{
throw new DivideByZeroException();
}

Vector256<double> num_pd;
Vector256<double> den_pd;

if (Avx.IsSupported)
{
num_pd = Avx.ConvertToVector256Double(x.AsInt32());
den_pd = Avx.ConvertToVector256Double(y.AsInt32());
}
else
{
num_pd = Vector256.ConvertToDouble(Vector256.WidenLower(x.AsInt32().ToVector256Unsafe()));
den_pd = Vector256.ConvertToDouble(Vector256.WidenLower(y.AsInt32().ToVector256Unsafe()));
}

Vector256<double> div_pd = num_pd / den_pd;

Vector128<int> div_epi32;

if (Avx.IsSupported)
{
div_epi32 = Avx.ConvertToVector128Int32WithTruncation(div_pd);
}
else
{
Vector256<long> div_epi64 = Vector256.ConvertToInt64(div_pd);
div_epi32 = Vector128.Narrow(div_epi64.GetLower(), div_epi64.GetUpper());
}

return div_epi32.As<int, T>();
}
return x / y;
}
public static Vector256<T> Invoke(Vector256<T> x, Vector256<T> y)
{
if (typeof(T) == typeof(int))
{
if (!Vector512.IsHardwareAccelerated)
{
return Invoke(x.GetLower(), y.GetLower()).ToVector256Unsafe().WithUpper(Invoke(x.GetUpper(), y.GetUpper()));
}

if (Vector256.EqualsAny(y.AsInt32(), Vector256<int>.Zero))
{
throw new DivideByZeroException();
}

Vector512<double> num_pd;
Vector512<double> den_pd;

if (Avx512F.IsSupported)
{
num_pd = Avx512F.ConvertToVector512Double(x.AsInt32());
den_pd = Avx512F.ConvertToVector512Double(y.AsInt32());
}
else
{
num_pd = Vector512.ConvertToDouble(Vector512.WidenLower(x.AsInt32().ToVector512Unsafe()));
den_pd = Vector512.ConvertToDouble(Vector512.WidenLower(y.AsInt32().ToVector512Unsafe()));
}

Vector512<double> div_pd = num_pd / den_pd;

Vector256<int> div_epi32;

if (Avx512F.IsSupported)
{
div_epi32 = Avx512F.ConvertToVector256Int32WithTruncation(div_pd);
}
else
{
Vector512<long> div_epi64 = Vector512.ConvertToInt64(div_pd);
div_epi32 = Vector256.Narrow(div_epi64.GetLower(), div_epi64.GetUpper());
}
return div_epi32.As<int, T>();
}
return x / y;
}
public static Vector512<T> Invoke(Vector512<T> x, Vector512<T> y)
{
if (typeof(T) == typeof(int))
{
return Invoke(x.GetLower(), y.GetLower()).ToVector512Unsafe().WithUpper(Invoke(x.GetUpper(), y.GetUpper()));
}
return x / y;
}
}
}
}