Skip to content

Commit 2410a37

Browse files
Merge pull request #1770 from SixLabors/af/pin-ByteToNormalizedFloat
Pin sourceBase in HwIntrinsics ByteToNormalizedFloat
2 parents 4ce2d0c + 6f5269f commit 2410a37

File tree

1 file changed

+71
-72
lines changed

1 file changed

+71
-72
lines changed

src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs

Lines changed: 71 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -622,90 +622,89 @@ internal static unsafe void ByteToNormalizedFloat(
622622
ReadOnlySpan<byte> source,
623623
Span<float> dest)
624624
{
625-
if (Avx2.IsSupported)
625+
fixed (byte* sourceBase = source)
626626
{
627-
VerifySpanInput(source, dest, Vector256<byte>.Count);
628-
629-
int n = dest.Length / Vector256<byte>.Count;
627+
if (Avx2.IsSupported)
628+
{
629+
VerifySpanInput(source, dest, Vector256<byte>.Count);
630630

631-
byte* sourceBase = (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(source));
631+
int n = dest.Length / Vector256<byte>.Count;
632632

633-
ref Vector256<float> destBase =
634-
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(dest));
633+
ref Vector256<float> destBase =
634+
ref Unsafe.As<float, Vector256<float>>(ref MemoryMarshal.GetReference(dest));
635635

636-
var scale = Vector256.Create(1 / (float)byte.MaxValue);
636+
var scale = Vector256.Create(1 / (float)byte.MaxValue);
637637

638-
for (int i = 0; i < n; i++)
639-
{
640-
int si = Vector256<byte>.Count * i;
641-
Vector256<int> i0 = Avx2.ConvertToVector256Int32(sourceBase + si);
642-
Vector256<int> i1 = Avx2.ConvertToVector256Int32(sourceBase + si + Vector256<int>.Count);
643-
Vector256<int> i2 = Avx2.ConvertToVector256Int32(sourceBase + si + (Vector256<int>.Count * 2));
644-
Vector256<int> i3 = Avx2.ConvertToVector256Int32(sourceBase + si + (Vector256<int>.Count * 3));
645-
646-
Vector256<float> f0 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i0));
647-
Vector256<float> f1 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i1));
648-
Vector256<float> f2 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i2));
649-
Vector256<float> f3 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i3));
650-
651-
ref Vector256<float> d = ref Unsafe.Add(ref destBase, i * 4);
652-
653-
d = f0;
654-
Unsafe.Add(ref d, 1) = f1;
655-
Unsafe.Add(ref d, 2) = f2;
656-
Unsafe.Add(ref d, 3) = f3;
638+
for (int i = 0; i < n; i++)
639+
{
640+
int si = Vector256<byte>.Count * i;
641+
Vector256<int> i0 = Avx2.ConvertToVector256Int32(sourceBase + si);
642+
Vector256<int> i1 = Avx2.ConvertToVector256Int32(sourceBase + si + Vector256<int>.Count);
643+
Vector256<int> i2 = Avx2.ConvertToVector256Int32(sourceBase + si + (Vector256<int>.Count * 2));
644+
Vector256<int> i3 = Avx2.ConvertToVector256Int32(sourceBase + si + (Vector256<int>.Count * 3));
645+
646+
Vector256<float> f0 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i0));
647+
Vector256<float> f1 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i1));
648+
Vector256<float> f2 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i2));
649+
Vector256<float> f3 = Avx.Multiply(scale, Avx.ConvertToVector256Single(i3));
650+
651+
ref Vector256<float> d = ref Unsafe.Add(ref destBase, i * 4);
652+
653+
d = f0;
654+
Unsafe.Add(ref d, 1) = f1;
655+
Unsafe.Add(ref d, 2) = f2;
656+
Unsafe.Add(ref d, 3) = f3;
657+
}
657658
}
658-
}
659-
else
660-
{
661-
// Sse
662-
VerifySpanInput(source, dest, Vector128<byte>.Count);
663-
664-
int n = dest.Length / Vector128<byte>.Count;
659+
else
660+
{
661+
// Sse
662+
VerifySpanInput(source, dest, Vector128<byte>.Count);
665663

666-
byte* sourceBase = (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(source));
664+
int n = dest.Length / Vector128<byte>.Count;
667665

668-
ref Vector128<float> destBase =
669-
ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(dest));
666+
ref Vector128<float> destBase =
667+
ref Unsafe.As<float, Vector128<float>>(ref MemoryMarshal.GetReference(dest));
670668

671-
var scale = Vector128.Create(1 / (float)byte.MaxValue);
672-
Vector128<byte> zero = Vector128<byte>.Zero;
669+
var scale = Vector128.Create(1 / (float)byte.MaxValue);
670+
Vector128<byte> zero = Vector128<byte>.Zero;
673671

674-
for (int i = 0; i < n; i++)
675-
{
676-
int si = Vector128<byte>.Count * i;
677-
678-
Vector128<int> i0, i1, i2, i3;
679-
if (Sse41.IsSupported)
680-
{
681-
i0 = Sse41.ConvertToVector128Int32(sourceBase + si);
682-
i1 = Sse41.ConvertToVector128Int32(sourceBase + si + Vector128<int>.Count);
683-
i2 = Sse41.ConvertToVector128Int32(sourceBase + si + (Vector128<int>.Count * 2));
684-
i3 = Sse41.ConvertToVector128Int32(sourceBase + si + (Vector128<int>.Count * 3));
685-
}
686-
else
672+
for (int i = 0; i < n; i++)
687673
{
688-
Vector128<byte> b = Sse2.LoadVector128(sourceBase + si);
689-
Vector128<short> s0 = Sse2.UnpackLow(b, zero).AsInt16();
690-
Vector128<short> s1 = Sse2.UnpackHigh(b, zero).AsInt16();
691-
692-
i0 = Sse2.UnpackLow(s0, zero.AsInt16()).AsInt32();
693-
i1 = Sse2.UnpackHigh(s0, zero.AsInt16()).AsInt32();
694-
i2 = Sse2.UnpackLow(s1, zero.AsInt16()).AsInt32();
695-
i3 = Sse2.UnpackHigh(s1, zero.AsInt16()).AsInt32();
674+
int si = Vector128<byte>.Count * i;
675+
676+
Vector128<int> i0, i1, i2, i3;
677+
if (Sse41.IsSupported)
678+
{
679+
i0 = Sse41.ConvertToVector128Int32(sourceBase + si);
680+
i1 = Sse41.ConvertToVector128Int32(sourceBase + si + Vector128<int>.Count);
681+
i2 = Sse41.ConvertToVector128Int32(sourceBase + si + (Vector128<int>.Count * 2));
682+
i3 = Sse41.ConvertToVector128Int32(sourceBase + si + (Vector128<int>.Count * 3));
683+
}
684+
else
685+
{
686+
Vector128<byte> b = Sse2.LoadVector128(sourceBase + si);
687+
Vector128<short> s0 = Sse2.UnpackLow(b, zero).AsInt16();
688+
Vector128<short> s1 = Sse2.UnpackHigh(b, zero).AsInt16();
689+
690+
i0 = Sse2.UnpackLow(s0, zero.AsInt16()).AsInt32();
691+
i1 = Sse2.UnpackHigh(s0, zero.AsInt16()).AsInt32();
692+
i2 = Sse2.UnpackLow(s1, zero.AsInt16()).AsInt32();
693+
i3 = Sse2.UnpackHigh(s1, zero.AsInt16()).AsInt32();
694+
}
695+
696+
Vector128<float> f0 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i0));
697+
Vector128<float> f1 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i1));
698+
Vector128<float> f2 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i2));
699+
Vector128<float> f3 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i3));
700+
701+
ref Vector128<float> d = ref Unsafe.Add(ref destBase, i * 4);
702+
703+
d = f0;
704+
Unsafe.Add(ref d, 1) = f1;
705+
Unsafe.Add(ref d, 2) = f2;
706+
Unsafe.Add(ref d, 3) = f3;
696707
}
697-
698-
Vector128<float> f0 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i0));
699-
Vector128<float> f1 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i1));
700-
Vector128<float> f2 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i2));
701-
Vector128<float> f3 = Sse.Multiply(scale, Sse2.ConvertToVector128Single(i3));
702-
703-
ref Vector128<float> d = ref Unsafe.Add(ref destBase, i * 4);
704-
705-
d = f0;
706-
Unsafe.Add(ref d, 1) = f1;
707-
Unsafe.Add(ref d, 2) = f2;
708-
Unsafe.Add(ref d, 3) = f3;
709708
}
710709
}
711710
}

0 commit comments

Comments
 (0)