|
18 | 18 |
|
19 | 19 | 'use strict';
|
20 | 20 |
|
| 21 | +// MODULES // |
| 22 | + |
| 23 | +var isRowMajor = require( '@stdlib/ndarray/base/assert/is-row-major' ); |
| 24 | + |
| 25 | + |
21 | 26 | // MAIN //
|
22 | 27 |
|
23 | 28 | /**
|
24 | 29 | * Performs the rank 1 operation `A = α*x*y^T + A`, where `α` is a scalar, `x` is an `M` element vector, `y` is an `N` element vector, and `A` is an `M` by `N` matrix.
|
25 | 30 | *
|
| 31 | +* ## Notes |
| 32 | +* |
| 33 | +* - To help motivate the use of loop interchange below, we first recognize that a matrix stored in row-major order is equivalent to storing the matrix's transpose in column-major order. Hence, we can interpret an `M` by `N` row-major matrix `B` as the matrix `A^T` stored in column-major. In which case, we can derive an update equation for `B` as follows: |
| 34 | +* |
| 35 | +* ```tex |
| 36 | +* \begin{align*} |
| 37 | +* B &= A^T \\ |
| 38 | +* &= (\alpha \bar{x} \bar{y}^T + A)^T \\ |
| 39 | +* &= (\alpha \bar{x} \bar{y}^T)^T + A^T \\ |
| 40 | +* &= \alpha (\bar{x} \bar{y}^T)^T + A^T \\ |
| 41 | +* &= \alpha \bar{y} \bar{x}^T + A^T \\ |
| 42 | +* &= \alpha \bar{y} \bar{x}^T + B |
| 43 | +* \end{align*} |
| 44 | +* ``` |
| 45 | +* |
| 46 | +* Accordingly, we can reuse the same loop logic for column-major and row-major `A` by simply swapping `x` and `y` and `M` and `N` when `A` is row-major. That is the essence of loop interchange. |
| 47 | +* |
26 | 48 | * @private
|
27 | 49 | * @param {NonNegativeInteger} M - number of rows in the matrix `A`
|
28 | 50 | * @param {NonNegativeInteger} N - number of columns in the matrix `A`
|
|
51 | 73 | */
|
52 | 74 | function dger( M, N, alpha, x, strideX, offsetX, y, strideY, offsetY, A, strideA1, strideA2, offsetA ) { // eslint-disable-line max-params, max-len
|
53 | 75 | var tmp;
|
54 |
| - var idx; |
55 |
| - var jy; |
| 76 | + var da0; |
| 77 | + var da1; |
| 78 | + var sx; |
| 79 | + var sy; |
| 80 | + var ia; |
56 | 81 | var ix;
|
57 |
| - var i; |
58 |
| - var j; |
| 82 | + var iy; |
| 83 | + var i0; |
| 84 | + var i1; |
| 85 | + var S0; |
| 86 | + var S1; |
| 87 | + |
| 88 | + // Note on variable naming convention: S#, da#, ia#, i# where # corresponds to the loop number, with `0` being the innermost loop... |
| 89 | + |
| 90 | + if ( isRowMajor( [ strideA1, strideA2 ] ) ) { |
| 91 | + // For row-major matrices, the last dimension has the fastest changing index... |
| 92 | + S0 = N; |
| 93 | + S1 = M; |
| 94 | + da0 = strideA2; // offset increment for innermost loop |
| 95 | + da1 = strideA1 - ( S0*strideA2 ); // offset increment for outermost loop |
59 | 96 |
|
60 |
| - jy = offsetY; |
61 |
| - for ( j = 0; j < N; j++ ) { |
62 |
| - if ( y[ jy ] !== 0.0 ) { |
63 |
| - tmp = alpha * y[ jy ]; |
| 97 | + // Swap the vectors... |
| 98 | + tmp = x; |
| 99 | + x = y; |
| 100 | + y = tmp; |
| 101 | + |
| 102 | + tmp = strideX; |
| 103 | + strideX = strideY; |
| 104 | + strideY = tmp; |
| 105 | + |
| 106 | + tmp = offsetX; |
| 107 | + offsetX = offsetY; |
| 108 | + offsetY = tmp; |
| 109 | + } else { // order === 'column-major' |
| 110 | + // For column-major matrices, the first dimension has the fastest changing index... |
| 111 | + S0 = M; |
| 112 | + S1 = N; |
| 113 | + da0 = strideA1; // offset increment for innermost loop |
| 114 | + da1 = strideA2 - ( S0*strideA1 ); // offset increment for outermost loop |
| 115 | + } |
| 116 | + sx = strideX; |
| 117 | + sy = strideY; |
| 118 | + ix = offsetX; |
| 119 | + iy = offsetY; |
| 120 | + ia = offsetA; |
| 121 | + for ( i1 = 0; i1 < S1; i1++ ) { |
| 122 | + if ( y[ iy ] === 0.0 ) { |
| 123 | + ia += da0 * S0; |
| 124 | + } else { |
| 125 | + tmp = alpha * y[ iy ]; |
64 | 126 | ix = offsetX;
|
65 |
| - for ( i = 0; i < M; i++ ) { |
66 |
| - idx = offsetA + ( i * strideA1 ) + ( j * strideA2 ); |
67 |
| - A[ idx ] += x[ ix ] * tmp; |
68 |
| - ix += strideX; |
| 127 | + for ( i0 = 0; i0 < S0; i0++ ) { |
| 128 | + A[ ia ] += x[ ix ] * tmp; |
| 129 | + ix += sx; |
| 130 | + ia += da0; |
69 | 131 | }
|
70 | 132 | }
|
71 |
| - jy += strideY; |
| 133 | + iy += sy; |
| 134 | + ia += da1; |
72 | 135 | }
|
73 | 136 | return A;
|
74 | 137 | }
|
|
0 commit comments