-
Notifications
You must be signed in to change notification settings - Fork 5.4k
Get index of first non ascii byte #39506
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 10 commits
5d33b49
9d4182c
d22d29c
3dca000
49d87f0
6d77419
8f259e6
9dd727c
b8014d9
0152f82
0370ec9
c3008eb
6c12885
576af36
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -42,6 +42,28 @@ private static bool AllCharsInUInt64AreAscii(ulong value) | |
| return (value & ~0x007F007F_007F007Ful) == 0; | ||
| } | ||
|
|
||
| [MethodImpl(MethodImplOptions.AggressiveInlining)] | ||
| private static int GetIndexOfFirstNonAsciiByteInLane(Vector128<byte> value, Vector128<byte> bitmask) | ||
|
pgovind marked this conversation as resolved.
Outdated
|
||
| { | ||
| if (!AdvSimd.Arm64.IsSupported || !BitConverter.IsLittleEndian) | ||
| { | ||
| throw new PlatformNotSupportedException(); | ||
| } | ||
|
|
||
| // extractedBits[i] = (value[i] >> 7) & (1 << (12 * (i % 2))); | ||
| Vector128<byte> mostSignificantBitIsSet = AdvSimd.ShiftRightArithmetic(value.AsSByte(), 7).AsByte(); | ||
| Vector128<byte> extractedBits = AdvSimd.And(mostSignificantBitIsSet, bitmask); | ||
|
|
||
| // collapse mask to lower bits | ||
| extractedBits = AdvSimd.Arm64.AddPairwise(extractedBits, extractedBits); | ||
| ulong mask = extractedBits.AsUInt64().ToScalar(); | ||
|
|
||
| // calculate the index | ||
| int index = BitOperations.TrailingZeroCount(mask) >> 2; | ||
| Debug.Assert((mask != 0) ? index < 16 : index >= 16); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I found a way how to find an index of a first non-ascii byte in // value: Vector128<byte>
// Vn.B[15] : [ b15 * * * * * * * ]
// Vn.B[14] : [ b14 * * * * * * * ]
Vector128<ushort> value2 = value.AsUInt16();
// Vn.H[7] : [ b15 * * * * * * * b14 * * * * * * * ]
// Vn.H[6] : [ b13 * * * * * * * b12 * * * * * * * ]
Vector128<ushort> shiftToRightAndInsert = AdvSimd.ShiftRightAndInsert(value2, value2, 9);
// Vn.H[7] : [ b15 * * * * * * * b14 b15 * * * * * * ]
// Vn.H[6] : [ b13 * * * * * * * b12 b13 * * * * * * ]
Vector64<ushort> shiftToRightAndInsert2 = AdvSimd.ExtractNarrowingLower(shiftToRightAndInsert).AsUInt16();
// Vn.B[7] : [ b14 b15 * * * * * * ]
// Vn.B[6] : [ b12 b13 * * * * * * ]
// Vn.H[3] : [ b14 b15 * * * * * * b12 b13 * * * * * * ]
// Vn.H[2] : [ b10 b11 * * * * * * b08 b09 * * * * * * ]
Vector64<ushort> shiftToRightAndInsert3 = AdvSimd.ShiftRightAndInsert(shiftToRightAndInsert2, shiftToRightAndInsert2, 10);
// Vn.H[3] : [ b14 b15 * * * * * * b12 b13 b14 b15 * * * * ]
// Vn.H[2] : [ b10 b11 * * * * * * b08 b09 b10 b11 * * * * ]
Vector64<ushort> shiftToRightAndInsert4 = AdvSimd.ExtractNarrowingLower(shiftToRightAndInsert3.ToVector128Unsafe()).AsUInt16();
// Vn.B[3] : [ b12 b13 b14 b15 * * * * ]
// Vn.B[2] : [ b08 b09 b10 b11 * * * * ]
// Vn.H[1] : [ b12 b13 b14 b15 * * * * b08 b09 b10 b11 * * * * ]
// Vn.H[0] : [ b04 b05 b06 b07 * * * * b00 b01 b02 b03 * * * * ]
Vector64<uint> shiftToRightAndInsert5 = AdvSimd.ShiftRightAndInsert(shiftToRightAndInsert4, shiftToRightAndInsert4, 12).AsUInt32();
// Vn.H[1] : [ b12 b13 b14 b15 * * * * b08 b09 b10 b11 b12 b13 b14 b15 ]
// Vn.H[0] : [ b04 b05 b06 b07 * * * * b00 b01 b02 b03 b04 b05 b06 b07 ]
// Vn.S[0] : [ b12 b13 b14 b15 * * * * b08 b09 b10 b11 b12 b13 b14 b15 b04 b05 b06 b07 * * * * b00 b01 b02 b03 b04 b05 b06 b07 ]
Vector64<ushort> shiftToLeftAndInsert = AdvSimd.ShiftLeftAndInsert(shiftToRightAndInsert5, shiftToRightAndInsert5, 24).AsUInt16();
// Vn.S[0] : [ b00 b01 b02 b03 b04 b05 b06 b07 b08 b09 b10 b11 b12 b13 b14 b15 b04 b05 b06 b07 * * * * b00 b01 b02 b03 b04 b05 b06 b07 ]
// Vn.H[1] : [ b00 b01 b02 b03 b04 b05 b06 b07 b08 b09 b10 b11 b12 b13 b14 b15 ]
ushort index = AdvSimd.LeadingZeroCount(shiftToLeftAndInsert).GetElement(1);Here is a link to proof-of-concept code: https://gist.github.com/echesakovMSFT/b27ed28024091472db6d3ca007f34a6d#file-program-cs There code that JIT generates for the part computing the index as follows: 4EB01E11 mov v17.16b, v16.16b
6F174611 sri v17.8h, v16.8h, #9
0E212A30 xtn v16.8b, v17.8h
0EB01E11 mov v17.8b, v16.8b
2F164611 sri v17.4h, v16.4h, #10
0EB11E30 mov v16.8b, v17.8b
0E212A10 xtn v16.8b, v16.8h
0EB01E11 mov v17.8b, v16.8b
2F144611 sri v17.4h, v16.4h, #12
0EB11E30 mov v16.8b, v17.8b
2F385630 sli v16.2s, v17.2s, #24
2E604A10 clz v16.4h, v16.4h
0E063E00 umov w0, v16.h[1]The five @TamarChristinaArm Do you see any issue with this approach? Aside from only supporting little-endian.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This code also allows to compute mask non-ascii bytes by replacing ushort index = AdvSimd.LeadingZeroCount(shiftToLeftAndInsert).GetElement(1);with ushort mask = shiftToLeftAndInsert.GetElement(1);Although, the mask will be reversed in this case
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd actually like to get this PR in soon, considering our deadline is Friday. I'm thinking I'll file a follow-up issue to investigate this improvement and the improvements suggested in #39507. How does that sound?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
@pgovind Sure, to clarify I was not suggesting to incorporate the algorithm right now. I understand that this would be an unreasonable ask. But I can explain motivation and how I discovered this approach. I was trying to find a way to avoid doing this in your code ulong mask = extractedBits.AsUInt64().ToScalar();
// calculate the index
int index = BitOperations.TrailingZeroCount(mask) >> 2;since a call to It turns out that I ended up with a completely different approach.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @echesakovMSFT In and out of itself the The example you have, while it avoids having to load the mask ends up with 7 instructions that are 2 cycles each and the so I think if you amortize the cost of the mask that sequence will end up being slower. That said, you can improve this sequence further. The issue here isn't the use of a mask, it's the use of a mask that can't be created in register. Instead take a look at the sequence I pasted #39507 (comment) which also avoid the But looking at the overall algorithm, the majority of the cases don't require the actual index yet (with which I mean, you only check if it contains it or not...). The helper function should really end at the Also what is the expected loop iterations? if you expect the majority of the characters to be ascii then the implementation is not really taking advantage of that. You can test both vector together without needing a mask. Essentially what you want is a slightly modified version of strlen https://github.com/ARM-software/optimized-routines/blob/224cb5f67b71757b99fe1e10b5a437c17a1d733c/string/aarch64/strlen.S#L144 now when You can easily do this by testing the lowpart of after this you implement the index finding part Now you still need the Which should allow the JIT to lower the This avoid you needing the mask (does need some minor tweaking for big-endian) but should be significantly smaller than what is currently being generated and should be a couple of orders faster.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @TamarChristinaArm Thank you a lot for your detailed expanation! Going forward we should definitely capture your analysis here and in other PR.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. np, Also FYI for maximum throughput instead of Which has the same latency as
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and you do need an
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Actually, we haven't had a chance to finish #33972 in .NET 5 - so, in this case, using
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Hmm well you can declare the use of |
||
| return index; | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Given a DWORD which represents two packed chars in machine-endian order, | ||
| /// <see langword="true"/> iff the first char (in machine-endian order) is ASCII. | ||
|
|
@@ -67,8 +89,8 @@ public static unsafe nuint GetIndexOfFirstNonAsciiByte(byte* pBuffer, nuint buff | |
| // pmovmskb which we know are optimized, and (b) we can avoid downclocking the processor while | ||
| // this method is running. | ||
|
|
||
| return (Sse2.IsSupported) | ||
| ? GetIndexOfFirstNonAsciiByte_Sse2(pBuffer, bufferLength) | ||
| return (Sse2.IsSupported || AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian) | ||
| ? GetIndexOfFirstNonAsciiByte_Intrinsified(pBuffer, bufferLength) | ||
| : GetIndexOfFirstNonAsciiByte_Default(pBuffer, bufferLength); | ||
| } | ||
|
|
||
|
|
@@ -215,17 +237,41 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Default(byte* pBuffer, n | |
| goto Finish; | ||
| } | ||
|
|
||
| private static unsafe nuint GetIndexOfFirstNonAsciiByte_Sse2(byte* pBuffer, nuint bufferLength) | ||
| [MethodImpl(MethodImplOptions.AggressiveInlining)] | ||
| private static bool ContainsNonAsciiByte_Sse2(uint sseMask) | ||
| { | ||
| if (!Sse2.IsSupported) | ||
|
pgovind marked this conversation as resolved.
Outdated
|
||
| { | ||
| throw new PlatformNotSupportedException(); | ||
| } | ||
| return sseMask != 0; | ||
| } | ||
|
|
||
| [MethodImpl(MethodImplOptions.AggressiveInlining)] | ||
| private static bool ContainsNonAsciiByte_AdvSimd(uint advSimdIndex) | ||
| { | ||
| if (!AdvSimd.IsSupported) | ||
|
pgovind marked this conversation as resolved.
Outdated
|
||
| { | ||
| throw new PlatformNotSupportedException(); | ||
| } | ||
| return advSimdIndex < 16; | ||
| } | ||
|
|
||
| private static unsafe nuint GetIndexOfFirstNonAsciiByte_Intrinsified(byte* pBuffer, nuint bufferLength) | ||
| { | ||
| // JIT turns the below into constants | ||
|
|
||
| uint SizeOfVector128 = (uint)Unsafe.SizeOf<Vector128<byte>>(); | ||
| nuint MaskOfAllBitsInVector128 = (nuint)(SizeOfVector128 - 1); | ||
|
|
||
| Debug.Assert(Sse2.IsSupported, "Should've been checked by caller."); | ||
| Debug.Assert(BitConverter.IsLittleEndian, "SSE2 assumes little-endian."); | ||
| Debug.Assert(Sse2.IsSupported || AdvSimd.Arm64.IsSupported, "Sse2 or AdvSimd64 required."); | ||
| Debug.Assert(BitConverter.IsLittleEndian, "This SSE2/Arm64 implementation assumes little-endian."); | ||
|
|
||
| Vector128<byte> bitmask = BitConverter.IsLittleEndian ? | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm assuming the bitmask is intentionally defined as a local?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You throw
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yup. Declaring it static won't worry because of initializing issues in either .NET Standard or .NET Framework (I don't remember which. I just remember @carlossanlop's conclusion)
Yup, the idea is that this part will just work if we start supporting BigEndian environments |
||
| Vector128.Create((ushort)0x1001).AsByte() : | ||
| Vector128.Create((ushort)0x0110).AsByte(); | ||
|
|
||
| uint currentMask, secondMask; | ||
| uint currentSseMaskOrAdvSimdIndex, secondSseMaskOrAdvSimdIndex; | ||
| byte* pOriginalBuffer = pBuffer; | ||
|
|
||
| // This method is written such that control generally flows top-to-bottom, avoiding | ||
|
|
@@ -240,11 +286,25 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Sse2(byte* pBuffer, nuin | |
|
|
||
| // Read the first vector unaligned. | ||
|
|
||
| currentMask = (uint)Sse2.MoveMask(Sse2.LoadVector128(pBuffer)); // unaligned load | ||
|
|
||
| if (currentMask != 0) | ||
| if (Sse2.IsSupported) | ||
| { | ||
| goto FoundNonAsciiDataInCurrentMask; | ||
| currentSseMaskOrAdvSimdIndex = (uint)Sse2.MoveMask(Sse2.LoadVector128(pBuffer)); // unaligned load | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I remember adding a comment about this variable. I think you should use different variables to represent this value for
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree, but I think this is a reasonable compromise. More info here where I replied to your question :) #39506 (comment)
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, somehow I missed it. I have responded to your comment in the same thread.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done, fixed now. |
||
| if (ContainsNonAsciiByte_Sse2(currentSseMaskOrAdvSimdIndex)) | ||
| { | ||
| goto FoundNonAsciiDataInCurrentMask; | ||
| } | ||
| } | ||
| else if (AdvSimd.Arm64.IsSupported) | ||
| { | ||
| currentSseMaskOrAdvSimdIndex = (uint)GetIndexOfFirstNonAsciiByteInLane(AdvSimd.LoadVector128(pBuffer), bitmask); // unaligned load | ||
| if (ContainsNonAsciiByte_AdvSimd(currentSseMaskOrAdvSimdIndex)) | ||
| { | ||
| goto FoundNonAsciiDataInCurrentMask; | ||
| } | ||
| } | ||
|
pgovind marked this conversation as resolved.
Outdated
|
||
| else | ||
| { | ||
| throw new PlatformNotSupportedException(); | ||
| } | ||
|
|
||
| // If we have less than 32 bytes to process, just go straight to the final unaligned | ||
|
|
@@ -281,15 +341,33 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Sse2(byte* pBuffer, nuin | |
|
|
||
| do | ||
| { | ||
| Vector128<byte> firstVector = Sse2.LoadAlignedVector128(pBuffer); | ||
| Vector128<byte> secondVector = Sse2.LoadAlignedVector128(pBuffer + SizeOfVector128); | ||
| if (Sse2.IsSupported) | ||
| { | ||
| Vector128<byte> firstVector = Sse2.LoadAlignedVector128(pBuffer); | ||
| Vector128<byte> secondVector = Sse2.LoadAlignedVector128(pBuffer + SizeOfVector128); | ||
|
|
||
| currentMask = (uint)Sse2.MoveMask(firstVector); | ||
| secondMask = (uint)Sse2.MoveMask(secondVector); | ||
| currentSseMaskOrAdvSimdIndex = (uint)Sse2.MoveMask(firstVector); | ||
| secondSseMaskOrAdvSimdIndex = (uint)Sse2.MoveMask(secondVector); | ||
| if (ContainsNonAsciiByte_Sse2(currentSseMaskOrAdvSimdIndex | secondSseMaskOrAdvSimdIndex)) | ||
| { | ||
| goto FoundNonAsciiDataInInnerLoop; | ||
| } | ||
| } | ||
| else if (AdvSimd.Arm64.IsSupported) | ||
| { | ||
| Vector128<byte> firstVector = AdvSimd.LoadVector128(pBuffer); | ||
| Vector128<byte> secondVector = AdvSimd.LoadVector128(pBuffer + SizeOfVector128); | ||
|
|
||
| if ((currentMask | secondMask) != 0) | ||
| currentSseMaskOrAdvSimdIndex = (uint)GetIndexOfFirstNonAsciiByteInLane(firstVector, bitmask); | ||
| secondSseMaskOrAdvSimdIndex = (uint)GetIndexOfFirstNonAsciiByteInLane(secondVector, bitmask); | ||
| if (ContainsNonAsciiByte_AdvSimd(currentSseMaskOrAdvSimdIndex) || ContainsNonAsciiByte_AdvSimd(secondSseMaskOrAdvSimdIndex)) | ||
| { | ||
| goto FoundNonAsciiDataInInnerLoop; | ||
| } | ||
| } | ||
| else | ||
| { | ||
| goto FoundNonAsciiDataInInnerLoop; | ||
| throw new PlatformNotSupportedException(); | ||
| } | ||
|
|
||
| pBuffer += 2 * SizeOfVector128; | ||
|
|
@@ -313,10 +391,25 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Sse2(byte* pBuffer, nuin | |
| // At least one full vector's worth of data remains, so we can safely read it. | ||
| // Remember, at this point pBuffer is still aligned. | ||
|
|
||
| currentMask = (uint)Sse2.MoveMask(Sse2.LoadAlignedVector128(pBuffer)); | ||
| if (currentMask != 0) | ||
| if (Sse2.IsSupported) | ||
| { | ||
| goto FoundNonAsciiDataInCurrentMask; | ||
| currentSseMaskOrAdvSimdIndex = (uint)Sse2.MoveMask(Sse2.LoadAlignedVector128(pBuffer)); | ||
| if (ContainsNonAsciiByte_Sse2(currentSseMaskOrAdvSimdIndex)) | ||
| { | ||
| goto FoundNonAsciiDataInCurrentMask; | ||
| } | ||
| } | ||
| else if (AdvSimd.Arm64.IsSupported) | ||
| { | ||
| currentSseMaskOrAdvSimdIndex = (uint)GetIndexOfFirstNonAsciiByteInLane(AdvSimd.LoadVector128(pBuffer), bitmask); | ||
| if (ContainsNonAsciiByte_AdvSimd(currentSseMaskOrAdvSimdIndex)) | ||
| { | ||
| goto FoundNonAsciiDataInCurrentMask; | ||
| } | ||
| } | ||
| else | ||
| { | ||
| throw new PlatformNotSupportedException(); | ||
| } | ||
|
|
||
| IncrementCurrentOffsetBeforeFinalUnalignedVectorRead: | ||
|
|
@@ -332,17 +425,33 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Sse2(byte* pBuffer, nuin | |
|
|
||
| pBuffer += (bufferLength & MaskOfAllBitsInVector128) - SizeOfVector128; | ||
|
|
||
| currentMask = (uint)Sse2.MoveMask(Sse2.LoadVector128(pBuffer)); // unaligned load | ||
| if (currentMask != 0) | ||
| if (Sse2.IsSupported) | ||
| { | ||
| goto FoundNonAsciiDataInCurrentMask; | ||
| currentSseMaskOrAdvSimdIndex = (uint)Sse2.MoveMask(Sse2.LoadVector128(pBuffer)); // unaligned load | ||
|
pgovind marked this conversation as resolved.
Outdated
|
||
| if (ContainsNonAsciiByte_Sse2(currentSseMaskOrAdvSimdIndex)) | ||
| { | ||
| goto FoundNonAsciiDataInCurrentMask; | ||
| } | ||
|
|
||
| } | ||
| else if (AdvSimd.Arm64.IsSupported) | ||
| { | ||
| currentSseMaskOrAdvSimdIndex = (uint)GetIndexOfFirstNonAsciiByteInLane(AdvSimd.LoadVector128(pBuffer), bitmask); // unaligned load | ||
| if (ContainsNonAsciiByte_AdvSimd(currentSseMaskOrAdvSimdIndex)) | ||
| { | ||
| goto FoundNonAsciiDataInCurrentMask; | ||
| } | ||
|
|
||
| } | ||
| else | ||
| { | ||
| throw new PlatformNotSupportedException(); | ||
| } | ||
|
|
||
| pBuffer += SizeOfVector128; | ||
| } | ||
|
|
||
| Finish: | ||
|
|
||
| return (nuint)pBuffer - (nuint)pOriginalBuffer; // and we're done! | ||
|
|
||
| FoundNonAsciiDataInInnerLoop: | ||
|
|
@@ -351,20 +460,46 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Sse2(byte* pBuffer, nuin | |
| // instead be the second mask. If so, skip the entire first mask and drain ASCII bytes | ||
| // from the second mask. | ||
|
|
||
| if (currentMask == 0) | ||
| if (Sse2.IsSupported) | ||
| { | ||
| pBuffer += SizeOfVector128; | ||
| currentMask = secondMask; | ||
| if (!ContainsNonAsciiByte_Sse2(currentSseMaskOrAdvSimdIndex)) | ||
|
pgovind marked this conversation as resolved.
Outdated
|
||
| { | ||
| pBuffer += SizeOfVector128; | ||
| currentSseMaskOrAdvSimdIndex = secondSseMaskOrAdvSimdIndex; | ||
| } | ||
| } | ||
| else if (AdvSimd.IsSupported) | ||
| { | ||
| if (!ContainsNonAsciiByte_AdvSimd(currentSseMaskOrAdvSimdIndex)) | ||
|
pgovind marked this conversation as resolved.
Outdated
|
||
| { | ||
| pBuffer += SizeOfVector128; | ||
| currentSseMaskOrAdvSimdIndex = secondSseMaskOrAdvSimdIndex; | ||
| } | ||
| } | ||
| else | ||
| { | ||
| throw new PlatformNotSupportedException(); | ||
| } | ||
|
|
||
| FoundNonAsciiDataInCurrentMask: | ||
|
pgovind marked this conversation as resolved.
Outdated
|
||
|
|
||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same for this label, it seems like duplicate computations are being made.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmmm, not really. Both the SSE and AdvSimd paths can come here, so we need to special case this label for both paths |
||
| // The mask contains - from the LSB - a 0 for each ASCII byte we saw, and a 1 for each non-ASCII byte. | ||
|
pgovind marked this conversation as resolved.
Outdated
|
||
| // Tzcnt is the correct operation to count the number of zero bits quickly. If this instruction isn't | ||
| // available, we'll fall back to a normal loop. | ||
|
|
||
| Debug.Assert(currentMask != 0, "Shouldn't be here unless we see non-ASCII data."); | ||
| pBuffer += (uint)BitOperations.TrailingZeroCount(currentMask); | ||
| if (Sse2.IsSupported) | ||
| { | ||
| Debug.Assert(ContainsNonAsciiByte_Sse2(currentSseMaskOrAdvSimdIndex), "Shouldn't be here unless we see non-ASCII data."); | ||
| pBuffer += (uint)BitOperations.TrailingZeroCount(currentSseMaskOrAdvSimdIndex); | ||
| } | ||
| else if (AdvSimd.Arm64.IsSupported) | ||
| { | ||
| Debug.Assert(ContainsNonAsciiByte_AdvSimd(currentSseMaskOrAdvSimdIndex), "Shouldn't be here unless we see non-ASCII data."); | ||
| pBuffer += currentSseMaskOrAdvSimdIndex; | ||
| } | ||
| else | ||
| { | ||
| throw new PlatformNotSupportedException(); | ||
| } | ||
|
|
||
| goto Finish; | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.