refactor: implement loop interchange

kgryte · kgryte · commit fb6a9287429e · 2025-06-24T22:54:02.000-07:00
---
type: pre_commit_static_analysis_report
description: Results of running static analysis checks when committing changes.
report:
  - task: lint_filenames
    status: passed
  - task: lint_editorconfig
    status: passed
  - task: lint_markdown
    status: na
  - task: lint_package_json
    status: na
  - task: lint_repl_help
    status: na
  - task: lint_javascript_src
    status: passed
  - task: lint_javascript_cli
    status: na
  - task: lint_javascript_examples
    status: na
  - task: lint_javascript_tests
    status: na
  - task: lint_javascript_benchmarks
    status: na
  - task: lint_python
    status: na
  - task: lint_r
    status: na
  - task: lint_c_src
    status: na
  - task: lint_c_examples
    status: na
  - task: lint_c_benchmarks
    status: na
  - task: lint_c_tests_fixtures
    status: na
  - task: lint_shell
    status: na
  - task: lint_typescript_declarations
    status: na
  - task: lint_typescript_tests
    status: na
  - task: lint_license_headers
    status: passed
---
diff --git a/lib/node_modules/@stdlib/blas/base/dger/lib/base.js b/lib/node_modules/@stdlib/blas/base/dger/lib/base.js
@@ -18,11 +18,33 @@
 
 'use strict';
 
+// MODULES //
+
+var isRowMajor = require( '@stdlib/ndarray/base/assert/is-row-major' );
+
+
 // MAIN //
 
 /**
 * Performs the rank 1 operation `A = α*x*y^T + A`, where `α` is a scalar, `x` is an `M` element vector, `y` is an `N` element vector, and `A` is an `M` by `N` matrix.
 *
+* ## Notes
+*
+* -   To help motivate the use of loop interchange below, we first recognize that a matrix stored in row-major order is equivalent to storing the matrix's transpose in column-major order. Hence, we can interpret an `M` by `N` row-major matrix `B` as the matrix `A^T` stored in column-major. In which case, we can derive an update equation for `B` as follows:
+*
+*     ```tex
+*     \begin{align*}
+*     B &= A^T \\
+*       &= (\alpha \bar{x} \bar{y}^T + A)^T \\
+*       &= (\alpha \bar{x} \bar{y}^T)^T + A^T \\
+*       &= \alpha (\bar{x} \bar{y}^T)^T + A^T \\
+*       &= \alpha \bar{y} \bar{x}^T + A^T \\
+*       &= \alpha \bar{y} \bar{x}^T + B
+*     \end{align*}
+*     ```
+*
+*     Accordingly, we can reuse the same loop logic for column-major and row-major `A` by simply swapping `x` and `y` and `M` and `N` when `A` is row-major. That is the essence of loop interchange.
+*
 * @private
 * @param {NonNegativeInteger} M - number of rows in the matrix `A`
 * @param {NonNegativeInteger} N - number of columns in the matrix `A`
@@ -51,24 +73,65 @@
 */
 function dger( M, N, alpha, x, strideX, offsetX, y, strideY, offsetY, A, strideA1, strideA2, offsetA ) { // eslint-disable-line max-params, max-len
 	var tmp;
-	var idx;
-	var jy;
+	var da0;
+	var da1;
+	var sx;
+	var sy;
+	var ia;
 	var ix;
-	var i;
-	var j;
+	var iy;
+	var i0;
+	var i1;
+	var S0;
+	var S1;
+
+	// Note on variable naming convention: S#, da#, ia#, i# where # corresponds to the loop number, with `0` being the innermost loop...
+
+	if ( isRowMajor( [ strideA1, strideA2 ] ) ) {
+		// For row-major matrices, the last dimension has the fastest changing index...
+		S0 = N;
+		S1 = M;
+		da0 = strideA2;                   // offset increment for innermost loop
+		da1 = strideA1 - ( S0*strideA2 ); // offset increment for outermost loop
 
-	jy = offsetY;
-	for ( j = 0; j < N; j++ ) {
-		if ( y[ jy ] !== 0.0 ) {
-			tmp = alpha * y[ jy ];
+		// Swap the vectors...
+		tmp = x;
+		x = y;
+		y = tmp;
+
+		tmp = strideX;
+		strideX = strideY;
+		strideY = tmp;
+
+		tmp = offsetX;
+		offsetX = offsetY;
+		offsetY = tmp;
+	} else { // order === 'column-major'
+		// For column-major matrices, the first dimension has the fastest changing index...
+		S0 = M;
+		S1 = N;
+		da0 = strideA1;                   // offset increment for innermost loop
+		da1 = strideA2 - ( S0*strideA1 ); // offset increment for outermost loop
+	}
+	sx = strideX;
+	sy = strideY;
+	ix = offsetX;
+	iy = offsetY;
+	ia = offsetA;
+	for ( i1 = 0; i1 < S1; i1++ ) {
+		if ( y[ iy ] === 0.0 ) {
+			ia += da0 * S0;
+		} else {
+			tmp = alpha * y[ iy ];
 			ix = offsetX;
-			for ( i = 0; i < M; i++ ) {
-				idx = offsetA + ( i * strideA1 ) + ( j * strideA2 );
-				A[ idx ] += x[ ix ] * tmp;
-				ix += strideX;
+			for ( i0 = 0; i0 < S0; i0++ ) {
+				A[ ia ] += x[ ix ] * tmp;
+				ix += sx;
+				ia += da0;
 			}
 		}
-		jy += strideY;
+		iy += sy;
+		ia += da1;
 	}
 	return A;
 }