Skip to content

Commit af25b04

Browse files
committed
feat: add 6d blocked kernel
--- type: pre_commit_static_analysis_report description: Results of running static analysis checks when committing changes. report: - task: lint_filenames status: passed - task: lint_editorconfig status: passed - task: lint_markdown status: na - task: lint_package_json status: na - task: lint_repl_help status: na - task: lint_javascript_src status: passed - task: lint_javascript_cli status: na - task: lint_javascript_examples status: na - task: lint_javascript_tests status: na - task: lint_javascript_benchmarks status: na - task: lint_python status: na - task: lint_r status: na - task: lint_c_src status: na - task: lint_c_examples status: na - task: lint_c_benchmarks status: na - task: lint_c_tests_fixtures status: na - task: lint_shell status: na - task: lint_typescript_declarations status: na - task: lint_typescript_tests status: na - task: lint_license_headers status: passed ---
1 parent c401a2a commit af25b04

File tree

1 file changed

+386
-0
lines changed
  • lib/node_modules/@stdlib/ndarray/base/unary-reduce-strided1d-to-struct/lib

1 file changed

+386
-0
lines changed
Lines changed: 386 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,386 @@
1+
/**
2+
* @license Apache-2.0
3+
*
4+
* Copyright (c) 2025 The Stdlib Authors.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
/* eslint-disable max-depth, max-len */
20+
21+
'use strict';
22+
23+
// MODULES //
24+
25+
var loopOrder = require( '@stdlib/ndarray/base/unary-loop-interchange-order' );
26+
var blockSize = require( '@stdlib/ndarray/base/unary-tiling-block-size' );
27+
var takeIndexed = require( '@stdlib/array/base/take-indexed' );
28+
var copyIndexed = require( '@stdlib/array/base/copy-indexed' );
29+
var zeros = require( '@stdlib/array/base/zeros' );
30+
var incrementOffsets = require( './increment_offsets.js' );
31+
var setViewOffsets = require( './set_view_offsets.js' );
32+
var offsets = require( './offsets.js' );
33+
34+
35+
// MAIN //
36+
37+
/**
38+
* Performs a reduction over an input ndarray and assigns results to a provided output ndarray via loop blocking.
39+
*
40+
* @private
41+
* @param {Function} fcn - wrapper for a one-dimensional strided array reduction function
42+
* @param {Array<Object>} arrays - ndarrays
43+
* @param {Array<Object>} views - initialized ndarray-like objects representing sub-array views
44+
* @param {IntegerArray} strides - loop dimension strides for the input ndarray
45+
* @param {Function} strategy - input ndarray reshape strategy
46+
* @param {Options} opts - function options
47+
* @returns {void}
48+
*
49+
* @example
50+
* var Float64Array = require( '@stdlib/array/float64' );
51+
* var ndarray2array = require( '@stdlib/ndarray/base/to-array' );
52+
* var Float64Results = require( '@stdlib/stats/base/ztest/one-sample/results/float64' );
53+
* var structFactory = require( '@stdlib/array/struct-factory' );
54+
* var ztest = require( '@stdlib/stats/base/ndarray/ztest' );
55+
*
56+
* var ResultsArray = structFactory( Float64Results );
57+
*
58+
* // Create data buffers:
59+
* var xbuf = new Float64Array( [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 ] );
60+
* var ybuf = new ResultsArray( 3 );
61+
*
62+
* // Define the array shapes:
63+
* var xsh = [ 1, 1, 1, 1, 1, 3, 2, 2 ];
64+
* var ysh = [ 1, 1, 1, 1, 1, 3 ];
65+
*
66+
* // Define the array strides:
67+
* var sx = [ 12, 12, 12, 12, 12, 4, 2, 1 ];
68+
* var sy = [ 3, 3, 3, 3, 3, 1 ];
69+
*
70+
* // Define the index offsets:
71+
* var ox = 0;
72+
* var oy = 0;
73+
*
74+
* // Create an input ndarray-like object:
75+
* var x = {
76+
* 'dtype': 'float64',
77+
* 'data': xbuf,
78+
* 'shape': xsh,
79+
* 'strides': sx,
80+
* 'offset': ox,
81+
* 'order': 'row-major'
82+
* };
83+
*
84+
* // Create an output ndarray-like object:
85+
* var y = {
86+
* 'dtype': Float64Results,
87+
* 'data': ybuf,
88+
* 'shape': ysh,
89+
* 'strides': sy,
90+
* 'offset': oy,
91+
* 'order': 'row-major'
92+
* };
93+
*
94+
* // Create additional parameter ndarray-like objects:
95+
* var alternative = {
96+
* 'dtype': 'generic',
97+
* 'data': [ 'two-sided' ],
98+
* 'shape': ysh,
99+
* 'strides': [ 0, 0, 0, 0, 0, 0 ],
100+
* 'offset': 0,
101+
* 'order': 'row-major'
102+
};
103+
* var alpha = {
104+
* 'dtype': 'float64',
105+
* 'data': [ 0.05 ],
106+
* 'shape': ysh,
107+
* 'strides': [ 0, 0, 0, 0, 0, 0 ],
108+
* 'offset': 0,
109+
* 'order': 'row-major'
110+
};
111+
* var mu = {
112+
* 'dtype': 'float64',
113+
* 'data': [ 0.0 ],
114+
* 'shape': ysh,
115+
* 'strides': [ 0, 0, 0, 0, 0, 0 ],
116+
* 'offset': 0,
117+
* 'order': 'row-major'
118+
};
119+
* var sigma = {
120+
* 'dtype': 'float64',
121+
* 'data': [ 1.0 ],
122+
* 'shape': ysh,
123+
* 'strides': [ 0, 0, 0, 0, 0, 0 ],
124+
* 'offset': 0,
125+
* 'order': 'row-major'
126+
* };
127+
*
128+
* // Initialize ndarray-like objects representing sub-array views:
129+
* var views = [
130+
* {
131+
* 'dtype': x.dtype,
132+
* 'data': x.data,
133+
* 'shape': [ 2, 2 ],
134+
* 'strides': [ 2, 1 ],
135+
* 'offset': x.offset,
136+
* 'order': x.order
137+
* },
138+
* {
139+
* 'dtype': y.dtype,
140+
* 'data': y.data,
141+
* 'shape': [],
142+
* 'strides': [ 0 ],
143+
* 'offset': y.offset,
144+
* 'order': y.order
145+
* },
146+
* {
147+
* 'dtype': alternative.dtype,
148+
* 'data': alternative.data,
149+
* 'shape': [],
150+
* 'strides': [ 0 ],
151+
* 'offset': alternative.offset,
152+
* 'order': alternative.order
153+
* },
154+
* {
155+
* 'dtype': alpha.dtype,
156+
* 'data': alpha.data,
157+
* 'shape': [],
158+
* 'strides': [ 0 ],
159+
* 'offset': alpha.offset,
160+
* 'order': alpha.order
161+
* },
162+
* {
163+
* 'dtype': mu.dtype,
164+
* 'data': mu.data,
165+
* 'shape': [],
166+
* 'strides': [ 0 ],
167+
* 'offset': mu.offset,
168+
* 'order': mu.order
169+
* },
170+
* {
171+
* 'dtype': sigma.dtype,
172+
* 'data': sigma.data,
173+
* 'shape': [],
174+
* 'strides': [ 0 ],
175+
* 'offset': sigma.offset,
176+
* 'order': sigma.order
177+
* }
178+
* ];
179+
*
180+
* // Define a reshape strategy:
181+
* function strategy( x ) {
182+
* return {
183+
* 'dtype': x.dtype,
184+
* 'data': x.data,
185+
* 'shape': [ 4 ],
186+
* 'strides': [ 1 ],
187+
* 'offset': x.offset,
188+
* 'order': x.order
189+
* };
190+
* }
191+
*
192+
* // Perform a reduction:
193+
* blockedunary6d( ztest, [ x, y, alternative, alpha, mu, sigma ], views, [ 12, 12, 12, 12, 12, 4 ], strategy, {} );
194+
*
195+
* var arr = ndarray2array( y.data, y.shape, y.strides, y.offset, y.order );
196+
* // returns [ [ [ [ [ [ <Float64Results>, <Float64Results>, <Float64Results> ] ] ] ] ] ]
197+
*/
198+
function blockedunary6d( fcn, arrays, views, strides, strategy, opts ) { // eslint-disable-line max-statements
199+
var bsize;
200+
var dv0;
201+
var dv1;
202+
var dv2;
203+
var dv3;
204+
var dv4;
205+
var dv5;
206+
var ov1;
207+
var ov2;
208+
var ov3;
209+
var ov4;
210+
var ov5;
211+
var sh;
212+
var s0;
213+
var s1;
214+
var s2;
215+
var s3;
216+
var s4;
217+
var s5;
218+
var sv;
219+
var ov;
220+
var iv;
221+
var i0;
222+
var i1;
223+
var i2;
224+
var i3;
225+
var i4;
226+
var i5;
227+
var j0;
228+
var j1;
229+
var j2;
230+
var j3;
231+
var j4;
232+
var j5;
233+
var N;
234+
var x;
235+
var y;
236+
var v;
237+
var o;
238+
var k;
239+
240+
// Note on variable naming convention: S#, dv#, i#, j# where # corresponds to the loop number, with `0` being the innermost loop...
241+
242+
N = arrays.length;
243+
x = arrays[ 0 ];
244+
y = arrays[ 1 ];
245+
246+
// Resolve the loop interchange order:
247+
o = loopOrder( y.shape, strides, y.strides );
248+
sh = o.sh;
249+
sv = [ o.sx, o.sy ];
250+
for ( k = 2; k < N; k++ ) {
251+
sv.push( takeIndexed( arrays[k].strides, o.idx ) );
252+
}
253+
// Determine the block size:
254+
bsize = blockSize( x.dtype, y.dtype );
255+
256+
// Resolve a list of pointers to the first indexed elements in the respective ndarrays:
257+
ov = offsets( arrays );
258+
259+
// Cache offset increments for the innermost loop...
260+
dv0 = [];
261+
for ( k = 0; k < N; k++ ) {
262+
dv0.push( sv[k][0] );
263+
}
264+
// Initialize loop variables...
265+
ov1 = zeros( N );
266+
ov2 = zeros( N );
267+
ov3 = zeros( N );
268+
ov4 = zeros( N );
269+
ov5 = zeros( N );
270+
dv1 = zeros( N );
271+
dv2 = zeros( N );
272+
dv3 = zeros( N );
273+
dv4 = zeros( N );
274+
dv5 = zeros( N );
275+
iv = zeros( N );
276+
277+
// Shallow copy the list of views to an internal array so that we can update with reshaped views without impacting the original list of views:
278+
v = copyIndexed( views );
279+
280+
// Iterate over blocks...
281+
for ( j5 = sh[5]; j5 > 0; ) {
282+
if ( j5 < bsize ) {
283+
s5 = j5;
284+
j5 = 0;
285+
} else {
286+
s5 = bsize;
287+
j5 -= bsize;
288+
}
289+
for ( k = 0; k < N; k++ ) {
290+
ov5[ k ] = ov[k] + ( j5*sv[k][5] );
291+
}
292+
for ( j4 = sh[4]; j4 > 0; ) {
293+
if ( j4 < bsize ) {
294+
s4 = j4;
295+
j4 = 0;
296+
} else {
297+
s4 = bsize;
298+
j4 -= bsize;
299+
}
300+
for ( k = 0; k < N; k++ ) {
301+
dv5 = sv[k][5] - ( s4*sv[k][4] );
302+
ov4[ k ] = ov5[k] + ( j4*sv[k][4] );
303+
}
304+
for ( j3 = sh[3]; j3 > 0; ) {
305+
if ( j3 < bsize ) {
306+
s3 = j3;
307+
j3 = 0;
308+
} else {
309+
s3 = bsize;
310+
j3 -= bsize;
311+
}
312+
for ( k = 0; k < N; k++ ) {
313+
dv4[ k ] = sv[k][4] - ( s3*sv[k][3] );
314+
ov3[ k ] = ov4[k] + ( j3*sv[k][3] );
315+
}
316+
for ( j2 = sh[2]; j2 > 0; ) {
317+
if ( j2 < bsize ) {
318+
s2 = j2;
319+
j2 = 0;
320+
} else {
321+
s2 = bsize;
322+
j2 -= bsize;
323+
}
324+
for ( k = 0; k < N; k++ ) {
325+
dv3[ k ] = sv[k][3] - ( s2*sv[k][2] );
326+
ov2[ k ] = ov3[k] + ( j2*sv[k][2] );
327+
}
328+
for ( j1 = sh[1]; j1 > 0; ) {
329+
if ( j1 < bsize ) {
330+
s1 = j1;
331+
j1 = 0;
332+
} else {
333+
s1 = bsize;
334+
j1 -= bsize;
335+
}
336+
for ( k = 0; k < N; k++ ) {
337+
dv2[ k ] = sv[k][2] - ( s1*sv[k][1] );
338+
ov1[ k ] = ov2[k] + ( j1*sv[k][1] );
339+
}
340+
for ( j0 = sh[0]; j0 > 0; ) {
341+
if ( j0 < bsize ) {
342+
s0 = j0;
343+
j0 = 0;
344+
} else {
345+
s0 = bsize;
346+
j0 -= bsize;
347+
}
348+
// Compute index offsets and loop offset increments for the first ndarray elements in the current block...
349+
for ( k = 0; k < N; k++ ) {
350+
iv[ k ] = ov1[k] + ( j0*sv[k][0] );
351+
dv1[ k ] = sv[k][1] - ( s0*sv[k][0] );
352+
}
353+
// Iterate over the non-reduced ndarray dimensions...
354+
for ( i5 = 0; i5 < s5; i5++ ) {
355+
for ( i4 = 0; i4 < s4; i4++ ) {
356+
for ( i3 = 0; i3 < s3; i3++ ) {
357+
for ( i2 = 0; i2 < s2; i2++ ) {
358+
for ( i1 = 0; i1 < s1; i1++ ) {
359+
for ( i0 = 0; i0 < s0; i0++ ) {
360+
setViewOffsets( views, iv );
361+
v[ 0 ] = strategy( views[ 0 ] );
362+
fcn( v, opts );
363+
incrementOffsets( iv, dv0 );
364+
}
365+
incrementOffsets( iv, dv1 );
366+
}
367+
incrementOffsets( iv, dv2 );
368+
}
369+
incrementOffsets( iv, dv3 );
370+
}
371+
incrementOffsets( iv, dv4 );
372+
}
373+
incrementOffsets( iv, dv5 );
374+
}
375+
}
376+
}
377+
}
378+
}
379+
}
380+
}
381+
}
382+
383+
384+
// EXPORTS //
385+
386+
module.exports = blockedunary6d;

0 commit comments

Comments
 (0)