Skip to content

Commit fc1563a

Browse files
committed
feat: performance comparison
1 parent 5df6e89 commit fc1563a

File tree

7 files changed

+153
-184
lines changed

7 files changed

+153
-184
lines changed

.editorconfig

Lines changed: 0 additions & 168 deletions
This file was deleted.

LinqExtensions.cs

Lines changed: 61 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
using System.Numerics;
22
using System.Runtime.CompilerServices;
33
using System.Runtime.InteropServices;
4+
#if !NET48
45
using System.Runtime.Intrinsics;
6+
#endif
7+
using Microsoft.CodeAnalysis.CSharp.Syntax;
58

69
namespace SimdIteration;
710

@@ -10,8 +13,22 @@ public static class LinqExtensions
1013
[MethodImpl(MethodImplOptions.AggressiveInlining)]
1114
public static IEnumerable<TSource[]> OptimizedChunk<TSource>(this IEnumerable<TSource> source, int size)
1215
{
16+
#if NET48
17+
if (source == null)
18+
{
19+
throw new ArgumentNullException(nameof(source));
20+
}
21+
#else
1322
ArgumentNullException.ThrowIfNull(source);
23+
#endif
24+
#if NET8_0
1425
ArgumentOutOfRangeException.ThrowIfLessThan(size, 1);
26+
#else
27+
if(size < 1)
28+
{
29+
throw new ArgumentOutOfRangeException(nameof(source));
30+
}
31+
#endif
1532
return SafeOptimizedChunker(source, size);
1633
}
1734

@@ -71,35 +88,65 @@ private static IEnumerable<TSource[]> SafeOptimizedChunker<TSource>(IEnumerable<
7188
}
7289
}
7390
}
91+
#if !NET48
92+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
93+
public static int SumSimd(this int[] source) => SimdCore<int>.Sum(new ReadOnlySpan<int>(source));
7494

7595
[MethodImpl(MethodImplOptions.AggressiveInlining)]
76-
public static int Sum(this int[] source) => SimdCore<int>.Sum(new ReadOnlySpan<int>(source));
96+
public static (int Min, int Max) MinMaxSimd(this int[] source) => SimdCore<int>.MinMax(new ReadOnlySpan<int>(source));
97+
#endif
7798

7899
[MethodImpl(MethodImplOptions.AggressiveInlining)]
79-
public static (int Min, int Max) MinMax(this int[] source) => SimdCore<int>.MinMax(new ReadOnlySpan<int>(source));
100+
public static (int Min, int Max) MinMaxLinq(this int[] source)
101+
{
102+
return (source.Min(), source.Max());
103+
}
80104

81105
[MethodImpl(MethodImplOptions.AggressiveInlining)]
82-
public static decimal Average(this int[] source) => SimdCore<int>.Sum(new ReadOnlySpan<int>(source)) / (source.Length * 1M);
106+
public static (int Min, int Max) MinMaxForEach(this int[] source)
107+
{
108+
int min = source[0];
109+
int max = min;
110+
111+
foreach(int value in source)
112+
{
113+
if(value < min)
114+
{
115+
min = value;
116+
}
117+
if(value > max)
118+
{
119+
max = value;
120+
}
121+
}
122+
123+
return (min, max);
124+
}
83125
}
84126

127+
#if !NET48
85128
file static class SimdCore<T> where T : struct, INumber<T>
86129
{
87130
#region fields
88131
private static readonly int _vectorLength;
132+
#if NET8_0
89133
private static readonly bool _is512;
134+
#endif
90135
private static readonly bool _is256;
91136
private static readonly bool _is128;
92137
#endregion
93138

94139
#region ctor
95140
static SimdCore()
96141
{
97-
if (Vector256.IsHardwareAccelerated)
142+
#if NET8_0
143+
if (Vector512.IsHardwareAccelerated)
98144
{
99145
_is512 = true;
100146
_vectorLength = Vector512<T>.Count;
101147
return;
102148
}
149+
#endif
103150
if (Vector256.IsHardwareAccelerated)
104151
{
105152
_is256 = true;
@@ -121,10 +168,12 @@ internal static T Sum(ReadOnlySpan<T> source)
121168
{
122169
if (source.Length > _vectorLength)
123170
{
171+
#if NET8_0
124172
if (_is512)
125173
{
126174
return Sum512(source);
127175
}
176+
#endif
128177
if (_is256)
129178
{
130179
return Sum256(source);
@@ -136,7 +185,7 @@ internal static T Sum(ReadOnlySpan<T> source)
136185
}
137186
return SumFallback(source);
138187
}
139-
188+
#if NET8_0
140189
private static T Sum512(ReadOnlySpan<T> source)
141190
{
142191
T sum = T.Zero;
@@ -163,7 +212,7 @@ private static T Sum512(ReadOnlySpan<T> source)
163212

164213
return sum + Vector512.Sum(vectorSum512);
165214
}
166-
215+
#endif
167216
private static T Sum256(ReadOnlySpan<T> source)
168217
{
169218
T sum = T.Zero;
@@ -236,10 +285,12 @@ internal static (T Min, T Max) MinMax(ReadOnlySpan<T> source)
236285
{
237286
if (source.Length > _vectorLength)
238287
{
288+
#if NET8_0
239289
if (_is512)
240290
{
241291
return MinMax512(source);
242292
}
293+
#endif
243294
if (_is256)
244295
{
245296
return MinMax256(source);
@@ -252,6 +303,7 @@ internal static (T Min, T Max) MinMax(ReadOnlySpan<T> source)
252303
return MinMaxFallback(source);
253304
}
254305

306+
#if NET8_0
255307
private static (T Min, T Max) MinMax512(ReadOnlySpan<T> source)
256308
{
257309
ref T current = ref MemoryMarshal.GetReference(source);
@@ -302,7 +354,7 @@ private static (T Min, T Max) MinMax512(ReadOnlySpan<T> source)
302354

303355
return (min, max);
304356
}
305-
357+
#endif
306358
private static (T Min, T Max) MinMax256(ReadOnlySpan<T> source)
307359
{
308360
ref T current = ref MemoryMarshal.GetReference(source);
@@ -426,4 +478,5 @@ private static (T Min, T Max) MinMaxFallback(ReadOnlySpan<T> source)
426478
return (min, max);
427479
}
428480
#endregion
429-
}
481+
}
482+
#endif

Program.cs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
1-
using static System.Console;
1+
using BenchmarkDotNet.Running;
2+
using SimdIteration;
3+
using static System.Console;
24

3-
WriteLine("Hello SIMD");
5+
#if DEBUG
6+
SimdBenchmark benchmark = new ();
7+
benchmark.Setup();
8+
WriteLine(benchmark.MinMaxLinq());
9+
WriteLine(benchmark.MinMaxForEach());
10+
WriteLine(benchmark.MinMaxSimd());
11+
#else
12+
BenchmarkRunner.Run<SimdBenchmark>();
13+
#endif

README.md

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,29 @@
11
# Simd Iteration
22

3-
Test SIMD 512, 256, 128 registers for fast aggregate calculations
3+
Test SIMD 512, 256, 128 registers for fast aggregate calculations.
4+
5+
Unfortunately my hardware doesn't support Vector512.
6+
7+
Anyway, the performance improvement is mindblowing.
8+
9+
> [!IMPORTANT]
10+
> net8 is x100 times faster than net48 for calculate the Min and Max at the same time !!
11+
12+
## Results
13+
14+
BenchmarkDotNet=v0.13.5, OS=Windows 10 (10.0.19044.3086/21H2/November2021Update)
15+
AMD Ryzen 7 1700, 1 CPU, 16 logical and 8 physical cores
16+
.NET SDK=8.0.100-rc.1.23455.8
17+
[Host] : .NET 8.0.0 (8.0.23.41904), X64 RyuJIT AVX2
18+
.NET 7.0 : .NET 7.0.11 (7.0.1123.42427), X64 RyuJIT AVX2
19+
.NET 8.0 : .NET 8.0.0 (8.0.23.41904), X64 RyuJIT AVX2
20+
.NET Framework 4.8 : .NET Framework 4.8 (4.8.4644.0), X64 RyuJIT VectorSize=256
21+
22+
23+
| Method | Job | Runtime | Size | Mean | Allocated |
24+
|-------------- |------------------- |------------------- |------ |---------------:|----------:|
25+
| MinMaxLinq | .NET Framework 4.8 | .NET Framework 4.8 | 10000 | 118,675.226 ns | 65 B |
26+
| MinMaxLinq | .NET 7.0 | .NET 7.0 | 10000 | 2,350.046 ns | - |
27+
| MinMaxLinq | .NET 8.0 | .NET 8.0 | 10000 | 1,228.518 ns | - |
28+
| MinMaxSimd | .NET 7.0 | .NET 7.0 | 10000 | 834.291 ns | - |
29+
| **MinMaxSimd** | **.NET 8.0** | **.NET 8.0** | 10000 | **808.150 ns** | - |

0 commit comments

Comments
 (0)