@@ -622,90 +622,89 @@ internal static unsafe void ByteToNormalizedFloat(
622622 ReadOnlySpan < byte > source ,
623623 Span < float > dest )
624624 {
625- if ( Avx2 . IsSupported )
625+ fixed ( byte * sourceBase = source )
626626 {
627- VerifySpanInput ( source , dest , Vector256 < byte > . Count ) ;
628-
629- int n = dest . Length / Vector256 < byte > . Count ;
627+ if ( Avx2 . IsSupported )
628+ {
629+ VerifySpanInput ( source , dest , Vector256 < byte > . Count ) ;
630630
631- byte * sourceBase = ( byte * ) Unsafe . AsPointer ( ref MemoryMarshal . GetReference ( source ) ) ;
631+ int n = dest . Length / Vector256 < byte > . Count ;
632632
633- ref Vector256 < float > destBase =
634- ref Unsafe . As < float , Vector256 < float > > ( ref MemoryMarshal . GetReference ( dest ) ) ;
633+ ref Vector256 < float > destBase =
634+ ref Unsafe . As < float , Vector256 < float > > ( ref MemoryMarshal . GetReference ( dest ) ) ;
635635
636- var scale = Vector256 . Create ( 1 / ( float ) byte . MaxValue ) ;
636+ var scale = Vector256 . Create ( 1 / ( float ) byte . MaxValue ) ;
637637
638- for ( int i = 0 ; i < n ; i ++ )
639- {
640- int si = Vector256 < byte > . Count * i ;
641- Vector256 < int > i0 = Avx2 . ConvertToVector256Int32 ( sourceBase + si ) ;
642- Vector256 < int > i1 = Avx2 . ConvertToVector256Int32 ( sourceBase + si + Vector256 < int > . Count ) ;
643- Vector256 < int > i2 = Avx2 . ConvertToVector256Int32 ( sourceBase + si + ( Vector256 < int > . Count * 2 ) ) ;
644- Vector256 < int > i3 = Avx2 . ConvertToVector256Int32 ( sourceBase + si + ( Vector256 < int > . Count * 3 ) ) ;
645-
646- Vector256 < float > f0 = Avx . Multiply ( scale , Avx . ConvertToVector256Single ( i0 ) ) ;
647- Vector256 < float > f1 = Avx . Multiply ( scale , Avx . ConvertToVector256Single ( i1 ) ) ;
648- Vector256 < float > f2 = Avx . Multiply ( scale , Avx . ConvertToVector256Single ( i2 ) ) ;
649- Vector256 < float > f3 = Avx . Multiply ( scale , Avx . ConvertToVector256Single ( i3 ) ) ;
650-
651- ref Vector256 < float > d = ref Unsafe . Add ( ref destBase , i * 4 ) ;
652-
653- d = f0 ;
654- Unsafe . Add ( ref d , 1 ) = f1;
655- Unsafe . Add ( ref d , 2 ) = f2;
656- Unsafe . Add ( ref d , 3 ) = f3;
638+ for ( int i = 0 ; i < n ; i ++ )
639+ {
640+ int si = Vector256 < byte > . Count * i ;
641+ Vector256 < int > i0 = Avx2 . ConvertToVector256Int32 ( sourceBase + si ) ;
642+ Vector256 < int > i1 = Avx2 . ConvertToVector256Int32 ( sourceBase + si + Vector256 < int > . Count ) ;
643+ Vector256 < int > i2 = Avx2 . ConvertToVector256Int32 ( sourceBase + si + ( Vector256 < int > . Count * 2 ) ) ;
644+ Vector256 < int > i3 = Avx2 . ConvertToVector256Int32 ( sourceBase + si + ( Vector256 < int > . Count * 3 ) ) ;
645+
646+ Vector256 < float > f0 = Avx . Multiply ( scale , Avx . ConvertToVector256Single ( i0 ) ) ;
647+ Vector256 < float > f1 = Avx . Multiply ( scale , Avx . ConvertToVector256Single ( i1 ) ) ;
648+ Vector256 < float > f2 = Avx . Multiply ( scale , Avx . ConvertToVector256Single ( i2 ) ) ;
649+ Vector256 < float > f3 = Avx . Multiply ( scale , Avx . ConvertToVector256Single ( i3 ) ) ;
650+
651+ ref Vector256 < float > d = ref Unsafe . Add ( ref destBase , i * 4 ) ;
652+
653+ d = f0 ;
654+ Unsafe . Add ( ref d , 1 ) = f1;
655+ Unsafe . Add ( ref d , 2 ) = f2;
656+ Unsafe . Add ( ref d , 3 ) = f3;
657+ }
657658 }
658- }
659- else
660- {
661- // Sse
662- VerifySpanInput ( source , dest , Vector128 < byte > . Count ) ;
663-
664- int n = dest . Length / Vector128 < byte > . Count ;
659+ else
660+ {
661+ // Sse
662+ VerifySpanInput ( source , dest , Vector128 < byte > . Count ) ;
665663
666- byte * sourceBase = ( byte * ) Unsafe . AsPointer ( ref MemoryMarshal . GetReference ( source ) ) ;
664+ int n = dest . Length / Vector128 < byte > . Count ;
667665
668- ref Vector128 < float > destBase =
669- ref Unsafe . As < float , Vector128 < float > > ( ref MemoryMarshal . GetReference ( dest ) ) ;
666+ ref Vector128 < float > destBase =
667+ ref Unsafe . As < float , Vector128 < float > > ( ref MemoryMarshal . GetReference ( dest ) ) ;
670668
671- var scale = Vector128 . Create ( 1 / ( float ) byte . MaxValue ) ;
672- Vector128 < byte > zero = Vector128 < byte > . Zero ;
669+ var scale = Vector128 . Create ( 1 / ( float ) byte . MaxValue ) ;
670+ Vector128 < byte > zero = Vector128 < byte > . Zero ;
673671
674- for ( int i = 0 ; i < n ; i ++ )
675- {
676- int si = Vector128 < byte > . Count * i ;
677-
678- Vector128 < int > i0 , i1 , i2 , i3 ;
679- if ( Sse41 . IsSupported )
680- {
681- i0 = Sse41 . ConvertToVector128Int32 ( sourceBase + si ) ;
682- i1 = Sse41 . ConvertToVector128Int32 ( sourceBase + si + Vector128 < int > . Count ) ;
683- i2 = Sse41 . ConvertToVector128Int32 ( sourceBase + si + ( Vector128 < int > . Count * 2 ) ) ;
684- i3 = Sse41 . ConvertToVector128Int32 ( sourceBase + si + ( Vector128 < int > . Count * 3 ) ) ;
685- }
686- else
672+ for ( int i = 0 ; i < n ; i ++ )
687673 {
688- Vector128 < byte > b = Sse2 . LoadVector128 ( sourceBase + si ) ;
689- Vector128 < short > s0 = Sse2 . UnpackLow ( b , zero ) . AsInt16 ( ) ;
690- Vector128 < short > s1 = Sse2 . UnpackHigh ( b , zero ) . AsInt16 ( ) ;
691-
692- i0 = Sse2 . UnpackLow ( s0 , zero . AsInt16 ( ) ) . AsInt32 ( ) ;
693- i1 = Sse2 . UnpackHigh ( s0 , zero . AsInt16 ( ) ) . AsInt32 ( ) ;
694- i2 = Sse2 . UnpackLow ( s1 , zero . AsInt16 ( ) ) . AsInt32 ( ) ;
695- i3 = Sse2 . UnpackHigh ( s1 , zero . AsInt16 ( ) ) . AsInt32 ( ) ;
674+ int si = Vector128 < byte > . Count * i ;
675+
676+ Vector128 < int > i0 , i1 , i2 , i3 ;
677+ if ( Sse41 . IsSupported )
678+ {
679+ i0 = Sse41 . ConvertToVector128Int32 ( sourceBase + si ) ;
680+ i1 = Sse41 . ConvertToVector128Int32 ( sourceBase + si + Vector128 < int > . Count ) ;
681+ i2 = Sse41 . ConvertToVector128Int32 ( sourceBase + si + ( Vector128 < int > . Count * 2 ) ) ;
682+ i3 = Sse41 . ConvertToVector128Int32 ( sourceBase + si + ( Vector128 < int > . Count * 3 ) ) ;
683+ }
684+ else
685+ {
686+ Vector128 < byte > b = Sse2 . LoadVector128 ( sourceBase + si ) ;
687+ Vector128 < short > s0 = Sse2 . UnpackLow ( b , zero ) . AsInt16 ( ) ;
688+ Vector128 < short > s1 = Sse2 . UnpackHigh ( b , zero ) . AsInt16 ( ) ;
689+
690+ i0 = Sse2 . UnpackLow ( s0 , zero . AsInt16 ( ) ) . AsInt32 ( ) ;
691+ i1 = Sse2 . UnpackHigh ( s0 , zero . AsInt16 ( ) ) . AsInt32 ( ) ;
692+ i2 = Sse2 . UnpackLow ( s1 , zero . AsInt16 ( ) ) . AsInt32 ( ) ;
693+ i3 = Sse2 . UnpackHigh ( s1 , zero . AsInt16 ( ) ) . AsInt32 ( ) ;
694+ }
695+
696+ Vector128 < float > f0 = Sse . Multiply ( scale , Sse2 . ConvertToVector128Single ( i0 ) ) ;
697+ Vector128 < float > f1 = Sse . Multiply ( scale , Sse2 . ConvertToVector128Single ( i1 ) ) ;
698+ Vector128 < float > f2 = Sse . Multiply ( scale , Sse2 . ConvertToVector128Single ( i2 ) ) ;
699+ Vector128 < float > f3 = Sse . Multiply ( scale , Sse2 . ConvertToVector128Single ( i3 ) ) ;
700+
701+ ref Vector128 < float > d = ref Unsafe . Add ( ref destBase , i * 4 ) ;
702+
703+ d = f0 ;
704+ Unsafe . Add ( ref d , 1 ) = f1;
705+ Unsafe . Add ( ref d , 2 ) = f2;
706+ Unsafe . Add ( ref d , 3 ) = f3;
696707 }
697-
698- Vector128 < float > f0 = Sse . Multiply ( scale , Sse2 . ConvertToVector128Single ( i0 ) ) ;
699- Vector128 < float > f1 = Sse . Multiply ( scale , Sse2 . ConvertToVector128Single ( i1 ) ) ;
700- Vector128 < float > f2 = Sse . Multiply ( scale , Sse2 . ConvertToVector128Single ( i2 ) ) ;
701- Vector128 < float > f3 = Sse . Multiply ( scale , Sse2 . ConvertToVector128Single ( i3 ) ) ;
702-
703- ref Vector128 < float > d = ref Unsafe . Add ( ref destBase , i * 4 ) ;
704-
705- d = f0 ;
706- Unsafe . Add ( ref d , 1 ) = f1;
707- Unsafe . Add ( ref d , 2 ) = f2;
708- Unsafe . Add ( ref d , 3 ) = f3;
709708 }
710709 }
711710 }
0 commit comments