44using System . Diagnostics ;
55using System . Runtime . CompilerServices ;
66using System . Runtime . Intrinsics ;
7+ using System . Runtime . Intrinsics . Arm ;
78using System . Runtime . Intrinsics . X86 ;
89using System . Numerics ;
910
@@ -78,26 +79,38 @@ static Utf16Utility()
7879 long tempUtf8CodeUnitCountAdjustment = 0 ;
7980 int tempScalarCountAdjustment = 0 ;
8081
81- if ( Sse2 . IsSupported )
82+ if ( ( AdvSimd . Arm64 . IsSupported && BitConverter . IsLittleEndian ) || Sse2 . IsSupported )
8283 {
8384 if ( inputLength >= Vector128 < ushort > . Count )
8485 {
8586 Vector128 < ushort > vector0080 = Vector128 . Create ( ( ushort ) 0x80 ) ;
8687 Vector128 < ushort > vectorA800 = Vector128 . Create ( ( ushort ) 0xA800 ) ;
8788 Vector128 < short > vector8800 = Vector128 . Create ( unchecked ( ( short ) 0x8800 ) ) ;
8889 Vector128 < ushort > vectorZero = Vector128 < ushort > . Zero ;
89-
9090 do
9191 {
92- Vector128 < ushort > utf16Data = Sse2 . LoadVector128 ( ( ushort * ) pInputBuffer ) ; // unaligned
93- uint mask ;
92+ Vector128 < ushort > utf16Data ;
93+ if ( AdvSimd . Arm64 . IsSupported )
94+ {
95+ utf16Data = AdvSimd . LoadVector128 ( ( ushort * ) pInputBuffer ) ; // unaligned
96+ }
97+ else
98+ {
99+ utf16Data = Sse2 . LoadVector128 ( ( ushort * ) pInputBuffer ) ; // unaligned
100+ }
94101
95102 Vector128 < ushort > charIsNonAscii ;
96- if ( Sse41 . IsSupported )
103+
104+ if ( AdvSimd . Arm64 . IsSupported )
105+ {
106+ // Sets the 0x0080 bit of each element in 'charIsNonAscii' if the corresponding
107+ // input was 0x0080 <= [value]. (i.e., [value] is non-ASCII.)
108+ charIsNonAscii = AdvSimd . Min ( utf16Data , vector0080 ) ;
109+ }
110+ else if ( Sse41 . IsSupported )
97111 {
98112 // Sets the 0x0080 bit of each element in 'charIsNonAscii' if the corresponding
99113 // input was 0x0080 <= [value]. (i.e., [value] is non-ASCII.)
100-
101114 charIsNonAscii = Sse41 . Min ( utf16Data , vector0080 ) ;
102115 }
103116 else
@@ -111,16 +124,34 @@ static Utf16Utility()
111124
112125#if DEBUG
113126 // Quick check to ensure we didn't accidentally set the 0x8000 bit of any element.
114- uint debugMask = ( uint ) Sse2 . MoveMask ( charIsNonAscii . AsByte ( ) ) ;
127+ uint debugMask ;
128+ if ( AdvSimd . Arm64 . IsSupported )
129+ {
130+ debugMask = GetNonAsciiBytes ( charIsNonAscii . AsByte ( ) ) ;
131+ }
132+ else
133+ {
134+ debugMask = ( uint ) Sse2 . MoveMask ( charIsNonAscii . AsByte ( ) ) ;
135+ }
115136 Debug . Assert ( ( debugMask & 0b_1010_1010_1010_1010 ) == 0 , "Shouldn't have set the 0x8000 bit of any element in 'charIsNonAscii'." ) ;
116137#endif // DEBUG
117138
118139 // Sets the 0x8080 bits of each element in 'charIsNonAscii' if the corresponding
119140 // input was 0x0800 <= [value]. This also handles the missing range a few lines above.
120141
121- Vector128 < ushort > charIsThreeByteUtf8Encoded = Sse2 . Subtract ( vectorZero , Sse2 . ShiftRightLogical ( utf16Data , 11 ) ) ;
142+ Vector128 < ushort > charIsThreeByteUtf8Encoded ;
143+ uint mask ;
122144
123- mask = ( uint ) Sse2 . MoveMask ( Sse2 . Or ( charIsNonAscii , charIsThreeByteUtf8Encoded ) . AsByte ( ) ) ;
145+ if ( AdvSimd . IsSupported )
146+ {
147+ charIsThreeByteUtf8Encoded = AdvSimd . Subtract ( vectorZero , AdvSimd . ShiftRightLogical ( utf16Data , 11 ) ) ;
148+ mask = GetNonAsciiBytes ( AdvSimd . Or ( charIsNonAscii , charIsThreeByteUtf8Encoded ) . AsByte ( ) ) ;
149+ }
150+ else
151+ {
152+ charIsThreeByteUtf8Encoded = Sse2 . Subtract ( vectorZero , Sse2 . ShiftRightLogical ( utf16Data , 11 ) ) ;
153+ mask = ( uint ) Sse2 . MoveMask ( Sse2 . Or ( charIsNonAscii , charIsThreeByteUtf8Encoded ) . AsByte ( ) ) ;
154+ }
124155
125156 // Each even bit of mask will be 1 only if the char was >= 0x0080,
126157 // and each odd bit of mask will be 1 only if the char was >= 0x0800.
@@ -151,9 +182,16 @@ static Utf16Utility()
151182 // Surrogates need to be special-cased for two reasons: (a) we need
152183 // to account for the fact that we over-counted in the addition above;
153184 // and (b) they require separate validation.
154-
155- utf16Data = Sse2 . Add ( utf16Data , vectorA800 ) ;
156- mask = ( uint ) Sse2 . MoveMask ( Sse2 . CompareLessThan ( utf16Data . AsInt16 ( ) , vector8800 ) . AsByte ( ) ) ;
185+ if ( AdvSimd . Arm64 . IsSupported )
186+ {
187+ utf16Data = AdvSimd . Add ( utf16Data , vectorA800 ) ;
188+ mask = GetNonAsciiBytes ( AdvSimd . CompareLessThan ( utf16Data . AsInt16 ( ) , vector8800 ) . AsByte ( ) ) ;
189+ }
190+ else
191+ {
192+ utf16Data = Sse2 . Add ( utf16Data , vectorA800 ) ;
193+ mask = ( uint ) Sse2 . MoveMask ( Sse2 . CompareLessThan ( utf16Data . AsInt16 ( ) , vector8800 ) . AsByte ( ) ) ;
194+ }
157195
158196 if ( mask != 0 )
159197 {
@@ -178,7 +216,15 @@ static Utf16Utility()
178216 // Since 'mask' already has 00 in these positions (since the corresponding char
179217 // wasn't a surrogate), "mask AND mask2 == 00" holds for these positions.
180218
181- uint mask2 = ( uint ) Sse2 . MoveMask ( Sse2 . ShiftRightLogical ( utf16Data , 3 ) . AsByte ( ) ) ;
219+ uint mask2 ;
220+ if ( AdvSimd . Arm64 . IsSupported )
221+ {
222+ mask2 = GetNonAsciiBytes ( AdvSimd . ShiftRightLogical ( utf16Data , 3 ) . AsByte ( ) ) ;
223+ }
224+ else
225+ {
226+ mask2 = ( uint ) Sse2 . MoveMask ( Sse2 . ShiftRightLogical ( utf16Data , 3 ) . AsByte ( ) ) ;
227+ }
182228
183229 // 'lowSurrogatesMask' has its bits occur in pairs:
184230 // - 01 if the corresponding char was a low surrogate char,
@@ -433,5 +479,24 @@ static Utf16Utility()
433479 scalarCountAdjustment = tempScalarCountAdjustment ;
434480 return pInputBuffer ;
435481 }
482+
483+ private static readonly Vector128 < byte > s_bitMask128 = BitConverter . IsLittleEndian ?
484+ Vector128 . Create ( 0x80402010_08040201 ) . AsByte ( ) :
485+ Vector128 . Create ( 0x01020408_10204080 ) . AsByte ( ) ;
486+
487+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
488+ private static uint GetNonAsciiBytes ( Vector128 < byte > value )
489+ {
490+ Debug . Assert ( AdvSimd . Arm64 . IsSupported ) ;
491+
492+ Vector128 < byte > mostSignificantBitIsSet = AdvSimd . ShiftRightArithmetic ( value . AsSByte ( ) , 7 ) . AsByte ( ) ;
493+ Vector128 < byte > extractedBits = AdvSimd . And ( mostSignificantBitIsSet , s_bitMask128 ) ;
494+
495+ // self-pairwise add until all flags have moved to the first two bytes of the vector
496+ extractedBits = AdvSimd . Arm64 . AddPairwise ( extractedBits , extractedBits ) ;
497+ extractedBits = AdvSimd . Arm64 . AddPairwise ( extractedBits , extractedBits ) ;
498+ extractedBits = AdvSimd . Arm64 . AddPairwise ( extractedBits , extractedBits ) ;
499+ return extractedBits . AsUInt16 ( ) . ToScalar ( ) ;
500+ }
436501 }
437502}
0 commit comments