fix: Prevent extra backpropagation

robertleeplummerjr · robertleeplummerjr · commit 87dcbfb3c81a · 2022-02-17T15:21:39.000-05:00
diff --git a/src/layer/add.ts b/src/layer/add.ts
@@ -46,7 +46,6 @@ export class Add extends Operator {
       this.inputLayer1.weights,
       this.inputLayer2.weights
     ) as Texture;
-    clear(this.deltas);
   }
 
   compare(): void {
diff --git a/src/layer/base-layer.ts b/src/layer/base-layer.ts
@@ -57,6 +57,7 @@ export interface ILayerSettings {
   initPraxis?:
     | ((layerTemplate: ILayer, settings?: IPraxisSettings) => IPraxis)
     | null;
+  cleanupDeltas?: boolean;
 }
 
 export const baseLayerDefaultSettings: ILayerSettings = {
@@ -67,6 +68,7 @@ export const baseLayerDefaultSettings: ILayerSettings = {
   deltas: null,
   praxis: null,
   praxisOpts: null,
+  cleanupDeltas: true,
 };
 
 export type BaseLayerType = new (settings?: Partial<ILayerSettings>) => ILayer;
@@ -95,6 +97,9 @@ export class BaseLayer implements ILayer {
 
   set weights(weights: KernelOutput | Input) {
     this.settings.weights = weights as KernelOutput;
+    if (this.settings.cleanupDeltas && this.deltas) {
+      clear(this.deltas);
+    }
   }
 
   get deltas(): KernelOutput {
@@ -245,7 +250,6 @@ export class BaseLayer implements ILayer {
     if (!this.praxis) throw new Error('this.praxis not defined');
     this.weights = this.praxis.run(this, learningRate as number);
     release(oldWeights);
-    clear(this.deltas);
   }
 
   toArray(): TextureArrayOutput {
diff --git a/src/layer/convolution.ts b/src/layer/convolution.ts
@@ -464,7 +464,6 @@ export class Convolution extends Filter {
     const { weights: oldWeights } = this;
     this.weights = (this.praxis as IPraxis).run(this, learningRate);
     release(oldWeights);
-    clear(this.deltas);
   }
 }
 
diff --git a/src/layer/input.ts b/src/layer/input.ts
@@ -61,7 +61,6 @@ export class Input extends EntryPoint {
     } else {
       throw new Error('Inputs are not of sized correctly');
     }
-    clear(this.deltas);
   }
 
   predict1D(inputs: KernelOutput): void {
@@ -71,7 +70,6 @@ export class Input extends EntryPoint {
     } else {
       this.weights = inputs;
     }
-    clear(this.deltas);
   }
 
   compare(): void {
diff --git a/src/layer/leaky-relu.ts b/src/layer/leaky-relu.ts
@@ -75,7 +75,6 @@ export class LeakyRelu extends Activation {
     this.weights = (this.predictKernel as IKernelRunShortcut)(
       this.inputLayer.weights
     );
-    clear(this.deltas);
   }
 
   compare(): void {
diff --git a/src/layer/multiply-element.ts b/src/layer/multiply-element.ts
@@ -61,7 +61,6 @@ export class MultiplyElement extends Operator {
       this.inputLayer1.weights,
       this.inputLayer2.weights
     );
-    clear(this.deltas);
   }
 
   compare(): void {
diff --git a/src/layer/multiply.ts b/src/layer/multiply.ts
@@ -124,7 +124,6 @@ export class Multiply extends Operator {
       this.inputLayer1.weights,
       this.inputLayer2.weights
     ) as Texture;
-    clear(this.deltas);
   }
 
   compare(): void {
diff --git a/src/layer/recurrent-zeros.ts b/src/layer/recurrent-zeros.ts
@@ -54,7 +54,6 @@ export class RecurrentZeros extends Internal implements IRecurrentInput {
     this.weights = (this.praxis as IPraxis).run(this, learningRate);
     // this.deltas = deltas;
     release(oldWeights);
-    clear(this.deltas);
   }
 
   // validate(): void {
diff --git a/src/layer/relu.ts b/src/layer/relu.ts
@@ -76,7 +76,6 @@ export class Relu extends Activation {
     this.weights = (this.predictKernel as IKernelRunShortcut)(
       this.inputLayer.weights
     );
-    clear(this.deltas);
   }
 
   compare(): void {
diff --git a/src/layer/sigmoid.ts b/src/layer/sigmoid.ts
@@ -75,7 +75,6 @@ export class Sigmoid extends Activation {
     this.weights = (this.predictKernel as IKernelRunShortcut)(
       this.inputLayer.weights
     );
-    clear(this.deltas);
   }
 
   compare(): void {
diff --git a/src/layer/tanh.ts b/src/layer/tanh.ts
@@ -75,7 +75,6 @@ export class Tanh extends Activation {
     this.weights = (this.predictKernel as IKernelRunShortcut)(
       this.inputLayer.weights
     );
-    clear(this.deltas);
   }
 
   compare(): void {
diff --git a/src/layer/target.ts b/src/layer/target.ts
@@ -68,7 +68,6 @@ export class Target extends BaseLayer {
     // NOTE: this looks like it shouldn't be, but the weights are immutable, and this is where they are reused.
     release(this.weights);
     this.weights = clone(this.inputLayer.weights as KernelOutput);
-    clear(this.deltas);
   }
 
   compare(targetValues: KernelOutput): void {
diff --git a/src/layer/transpose.ts b/src/layer/transpose.ts
@@ -36,7 +36,6 @@ export class Transpose extends Modifier {
     this.weights = (this.predictKernel as IKernelRunShortcut)(
       this.inputLayer.weights
     );
-    clear(this.deltas);
   }
 
   compare(): void {
diff --git a/src/praxis/momentum-root-mean-squared-propagation.ts b/src/praxis/momentum-root-mean-squared-propagation.ts
@@ -90,7 +90,7 @@ export interface IMomentumRootMeanSquaredPropagationSettings
 
 export const defaults: IMomentumRootMeanSquaredPropagationSettings = {
   decayRate: 0.999,
-  regularizationStrength: 0.0001,
+  regularizationStrength: 0.000001,
   learningRate: 0.01,
   smoothEps: 1e-8,
   clipValue: 5,
diff --git a/src/recurrent.end-to-end.test.ts b/src/recurrent.end-to-end.test.ts
@@ -86,7 +86,7 @@ describe('Recurrent Class: End to End', () => {
       recurrentNet: Recurrent<number[]>;
     } {
       const timeStep: RNNTimeStep = new RNNTimeStep({
-        regc: 0.001,
+        regc: 0.000001,
         inputSize: 1,
         hiddenLayers: [3],
         outputSize: 1,
@@ -693,11 +693,13 @@ describe('Recurrent Class: End to End', () => {
           expect(asArrayOfArrayOfNumber(model[2].weights)[0][0]).toBe(
             timeStep.model.allMatrices[2].weights[0]
           );
-          expect(asArrayOfArrayOfNumber(model[2].weights)[1][0]).toBe(
-            timeStep.model.allMatrices[2].weights[1]
+          expect(asArrayOfArrayOfNumber(model[2].weights)[1][0]).toBeCloseTo(
+            timeStep.model.allMatrices[2].weights[1],
+            0.00000000009
           );
-          expect(asArrayOfArrayOfNumber(model[2].weights)[2][0]).toBe(
-            timeStep.model.allMatrices[2].weights[2]
+          expect(asArrayOfArrayOfNumber(model[2].weights)[2][0]).toBeCloseTo(
+            timeStep.model.allMatrices[2].weights[2],
+            0.00000000009
           );
           expect(asArrayOfArrayOfNumber(model[3].weights)[0][0]).toBe(
             timeStep.model.allMatrices[3].weights[0]
@@ -1313,7 +1315,7 @@ describe('Recurrent Class: End to End', () => {
       inputLayer: () => input({ height: 1 }),
       hiddenLayers: [
         (inputLayer: ILayer, recurrentInput: IRecurrentInput) =>
-          lstmCell({ height: 3 }, inputLayer, recurrentInput),
+          lstmCell({ height: 10 }, inputLayer, recurrentInput),
       ],
       outputLayer: (inputLayer: ILayer) => output({ height: 1 }, inputLayer),
     });
@@ -1325,7 +1327,7 @@ describe('Recurrent Class: End to End', () => {
     ];
     const errorThresh = 0.03;
     const iterations = 5000;
-    const status = net.train(xorNetValues);
+    const status = net.train(xorNetValues, { errorThresh, iterations });
     // expect(
     //   status.error <= errorThresh || status.iterations <= iterations
     // ).toBeTruthy();
@@ -1335,8 +1337,8 @@ describe('Recurrent Class: End to End', () => {
     console.log(net.run([[1], [0.001]]));
     console.log(net.run([[1], [1]]));
     expect(net.run([[0.001], [0.001]])[0][0]).toBeLessThan(0.1);
-    expect(net.run([[0.001], [1]])[0][0]).toBeGreaterThan(9);
-    expect(net.run([[1], [0.001]])[0][0]).toBeGreaterThan(9);
+    expect(net.run([[0.001], [1]])[0][0]).toBeGreaterThan(0.9);
+    expect(net.run([[1], [0.001]])[0][0]).toBeGreaterThan(0.9);
     expect(net.run([[1], [1]])[0][0]).toBeLessThan(0.1);
   });
   test('can learn 1,2,3', () => {
diff --git a/src/recurrent.ts b/src/recurrent.ts
@@ -412,7 +412,7 @@ export class Recurrent<
         throw new Error('this.meanSquaredError not setup');
       }
       let error: KernelOutput = new Float32Array(1);
-      for (let i = 0, max = inputs.length - 1; i <= max; i++) {
+      for (let i = 0, max = inputs.length - 2; i <= max; i++) {
         const layerSet = this._layerSets[i];
         const lastLayer = layerSet[layerSet.length - 1];
         const prevError: KernelOutput = error;

Original file line number	Diff line number	Diff line change
`@@ -46,7 +46,6 @@ export class Add extends Operator {`
`46`	`46`	`this.inputLayer1.weights,`
`47`	`47`	`this.inputLayer2.weights`
`48`	`48`	`) as Texture;`
`49`		`- clear(this.deltas);`
`50`	`49`	`}`
`51`	`50`
`52`	`51`	`compare(): void {`
Original file line number	Diff line number	Diff line change
`@@ -464,7 +464,6 @@ export class Convolution extends Filter {`
`464`	`464`	`const { weights: oldWeights } = this;`
`465`	`465`	`this.weights = (this.praxis as IPraxis).run(this, learningRate);`
`466`	`466`	`release(oldWeights);`
`467`		`- clear(this.deltas);`
`468`	`467`	`}`
`469`	`468`	`}`
`470`	`469`
Original file line number	Diff line number	Diff line change
`@@ -61,7 +61,6 @@ export class Input extends EntryPoint {`
`61`	`61`	`} else {`
`62`	`62`	`throw new Error('Inputs are not of sized correctly');`
`63`	`63`	`}`
`64`		`- clear(this.deltas);`
`65`	`64`	`}`
`66`	`65`
`67`	`66`	`predict1D(inputs: KernelOutput): void {`
`@@ -71,7 +70,6 @@ export class Input extends EntryPoint {`
`71`	`70`	`} else {`
`72`	`71`	`this.weights = inputs;`
`73`	`72`	`}`
`74`		`- clear(this.deltas);`
`75`	`73`	`}`
`76`	`74`
`77`	`75`	`compare(): void {`
Original file line number	Diff line number	Diff line change
`@@ -75,7 +75,6 @@ export class LeakyRelu extends Activation {`
`75`	`75`	`this.weights = (this.predictKernel as IKernelRunShortcut)(`
`76`	`76`	`this.inputLayer.weights`
`77`	`77`	`);`
`78`		`- clear(this.deltas);`
`79`	`78`	`}`
`80`	`79`
`81`	`80`	`compare(): void {`
Original file line number	Diff line number	Diff line change
`@@ -61,7 +61,6 @@ export class MultiplyElement extends Operator {`
`61`	`61`	`this.inputLayer1.weights,`
`62`	`62`	`this.inputLayer2.weights`
`63`	`63`	`);`
`64`		`- clear(this.deltas);`
`65`	`64`	`}`
`66`	`65`
`67`	`66`	`compare(): void {`
Original file line number	Diff line number	Diff line change
`@@ -124,7 +124,6 @@ export class Multiply extends Operator {`
`124`	`124`	`this.inputLayer1.weights,`
`125`	`125`	`this.inputLayer2.weights`
`126`	`126`	`) as Texture;`
`127`		`- clear(this.deltas);`
`128`	`127`	`}`
`129`	`128`
`130`	`129`	`compare(): void {`
Original file line number	Diff line number	Diff line change
`@@ -54,7 +54,6 @@ export class RecurrentZeros extends Internal implements IRecurrentInput {`
`54`	`54`	`this.weights = (this.praxis as IPraxis).run(this, learningRate);`
`55`	`55`	`// this.deltas = deltas;`
`56`	`56`	`release(oldWeights);`
`57`		`- clear(this.deltas);`
`58`	`57`	`}`
`59`	`58`
`60`	`59`	`// validate(): void {`
Original file line number	Diff line number	Diff line change
`@@ -76,7 +76,6 @@ export class Relu extends Activation {`
`76`	`76`	`this.weights = (this.predictKernel as IKernelRunShortcut)(`
`77`	`77`	`this.inputLayer.weights`
`78`	`78`	`);`
`79`		`- clear(this.deltas);`
`80`	`79`	`}`
`81`	`80`
`82`	`81`	`compare(): void {`
Original file line number	Diff line number	Diff line change
`@@ -75,7 +75,6 @@ export class Sigmoid extends Activation {`
`75`	`75`	`this.weights = (this.predictKernel as IKernelRunShortcut)(`
`76`	`76`	`this.inputLayer.weights`
`77`	`77`	`);`
`78`		`- clear(this.deltas);`
`79`	`78`	`}`
`80`	`79`
`81`	`80`	`compare(): void {`
Original file line number	Diff line number	Diff line change
`@@ -75,7 +75,6 @@ export class Tanh extends Activation {`
`75`	`75`	`this.weights = (this.predictKernel as IKernelRunShortcut)(`
`76`	`76`	`this.inputLayer.weights`
`77`	`77`	`);`
`78`		`- clear(this.deltas);`
`79`	`78`	`}`
`80`	`79`
`81`	`80`	`compare(): void {`