Skip to content

Commit fb6a928

Browse files
committed
refactor: implement loop interchange
--- type: pre_commit_static_analysis_report description: Results of running static analysis checks when committing changes. report: - task: lint_filenames status: passed - task: lint_editorconfig status: passed - task: lint_markdown status: na - task: lint_package_json status: na - task: lint_repl_help status: na - task: lint_javascript_src status: passed - task: lint_javascript_cli status: na - task: lint_javascript_examples status: na - task: lint_javascript_tests status: na - task: lint_javascript_benchmarks status: na - task: lint_python status: na - task: lint_r status: na - task: lint_c_src status: na - task: lint_c_examples status: na - task: lint_c_benchmarks status: na - task: lint_c_tests_fixtures status: na - task: lint_shell status: na - task: lint_typescript_declarations status: na - task: lint_typescript_tests status: na - task: lint_license_headers status: passed ---
1 parent 4f2d6e3 commit fb6a928

File tree

1 file changed

+76
-13
lines changed
  • lib/node_modules/@stdlib/blas/base/dger/lib

1 file changed

+76
-13
lines changed

lib/node_modules/@stdlib/blas/base/dger/lib/base.js

Lines changed: 76 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,33 @@
1818

1919
'use strict';
2020

21+
// MODULES //
22+
23+
var isRowMajor = require( '@stdlib/ndarray/base/assert/is-row-major' );
24+
25+
2126
// MAIN //
2227

2328
/**
2429
* Performs the rank 1 operation `A = α*x*y^T + A`, where `α` is a scalar, `x` is an `M` element vector, `y` is an `N` element vector, and `A` is an `M` by `N` matrix.
2530
*
31+
* ## Notes
32+
*
33+
* - To help motivate the use of loop interchange below, we first recognize that a matrix stored in row-major order is equivalent to storing the matrix's transpose in column-major order. Hence, we can interpret an `M` by `N` row-major matrix `B` as the matrix `A^T` stored in column-major. In which case, we can derive an update equation for `B` as follows:
34+
*
35+
* ```tex
36+
* \begin{align*}
37+
* B &= A^T \\
38+
* &= (\alpha \bar{x} \bar{y}^T + A)^T \\
39+
* &= (\alpha \bar{x} \bar{y}^T)^T + A^T \\
40+
* &= \alpha (\bar{x} \bar{y}^T)^T + A^T \\
41+
* &= \alpha \bar{y} \bar{x}^T + A^T \\
42+
* &= \alpha \bar{y} \bar{x}^T + B
43+
* \end{align*}
44+
* ```
45+
*
46+
* Accordingly, we can reuse the same loop logic for column-major and row-major `A` by simply swapping `x` and `y` and `M` and `N` when `A` is row-major. That is the essence of loop interchange.
47+
*
2648
* @private
2749
* @param {NonNegativeInteger} M - number of rows in the matrix `A`
2850
* @param {NonNegativeInteger} N - number of columns in the matrix `A`
@@ -51,24 +73,65 @@
5173
*/
5274
function dger( M, N, alpha, x, strideX, offsetX, y, strideY, offsetY, A, strideA1, strideA2, offsetA ) { // eslint-disable-line max-params, max-len
5375
var tmp;
54-
var idx;
55-
var jy;
76+
var da0;
77+
var da1;
78+
var sx;
79+
var sy;
80+
var ia;
5681
var ix;
57-
var i;
58-
var j;
82+
var iy;
83+
var i0;
84+
var i1;
85+
var S0;
86+
var S1;
87+
88+
// Note on variable naming convention: S#, da#, ia#, i# where # corresponds to the loop number, with `0` being the innermost loop...
89+
90+
if ( isRowMajor( [ strideA1, strideA2 ] ) ) {
91+
// For row-major matrices, the last dimension has the fastest changing index...
92+
S0 = N;
93+
S1 = M;
94+
da0 = strideA2; // offset increment for innermost loop
95+
da1 = strideA1 - ( S0*strideA2 ); // offset increment for outermost loop
5996

60-
jy = offsetY;
61-
for ( j = 0; j < N; j++ ) {
62-
if ( y[ jy ] !== 0.0 ) {
63-
tmp = alpha * y[ jy ];
97+
// Swap the vectors...
98+
tmp = x;
99+
x = y;
100+
y = tmp;
101+
102+
tmp = strideX;
103+
strideX = strideY;
104+
strideY = tmp;
105+
106+
tmp = offsetX;
107+
offsetX = offsetY;
108+
offsetY = tmp;
109+
} else { // order === 'column-major'
110+
// For column-major matrices, the first dimension has the fastest changing index...
111+
S0 = M;
112+
S1 = N;
113+
da0 = strideA1; // offset increment for innermost loop
114+
da1 = strideA2 - ( S0*strideA1 ); // offset increment for outermost loop
115+
}
116+
sx = strideX;
117+
sy = strideY;
118+
ix = offsetX;
119+
iy = offsetY;
120+
ia = offsetA;
121+
for ( i1 = 0; i1 < S1; i1++ ) {
122+
if ( y[ iy ] === 0.0 ) {
123+
ia += da0 * S0;
124+
} else {
125+
tmp = alpha * y[ iy ];
64126
ix = offsetX;
65-
for ( i = 0; i < M; i++ ) {
66-
idx = offsetA + ( i * strideA1 ) + ( j * strideA2 );
67-
A[ idx ] += x[ ix ] * tmp;
68-
ix += strideX;
127+
for ( i0 = 0; i0 < S0; i0++ ) {
128+
A[ ia ] += x[ ix ] * tmp;
129+
ix += sx;
130+
ia += da0;
69131
}
70132
}
71-
jy += strideY;
133+
iy += sy;
134+
ia += da1;
72135
}
73136
return A;
74137
}

0 commit comments

Comments
 (0)