Skip to content

Commit 9a180ae

Browse files
committed
fix: align Lion learning rate with spec
1 parent 1ad64ea commit 9a180ae

File tree

2 files changed

+17
-31
lines changed

2 files changed

+17
-31
lines changed

src/Optimizers/LionOptimizer.cs

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ protected override IFullModel<T, TInput, TOutput> UpdateSolution(IFullModel<T, T
213213
{
214214
var parameters = currentSolution.GetParameters();
215215
var weightDecay = NumOps.FromDouble(_options.WeightDecay);
216-
var effectiveLearningRate = GetEffectiveLearningRate();
216+
var effectiveLearningRate = _currentLearningRate;
217217

218218
for (int i = 0; i < gradient.Length; i++)
219219
{
@@ -273,7 +273,7 @@ public override Vector<T> UpdateParameters(Vector<T> parameters, Vector<T> gradi
273273
_t++;
274274

275275
var weightDecay = NumOps.FromDouble(_options.WeightDecay);
276-
var effectiveLearningRate = GetEffectiveLearningRate();
276+
var effectiveLearningRate = _currentLearningRate;
277277
var updatedParams = new Vector<T>(parameters.Length);
278278

279279
for (int i = 0; i < parameters.Length; i++)
@@ -332,7 +332,7 @@ public override Matrix<T> UpdateParameters(Matrix<T> parameters, Matrix<T> gradi
332332
_t++;
333333

334334
var weightDecay = NumOps.FromDouble(_options.WeightDecay);
335-
var effectiveLearningRate = GetEffectiveLearningRate();
335+
var effectiveLearningRate = _currentLearningRate;
336336
var updatedMatrix = new Matrix<T>(parameters.Rows, parameters.Columns);
337337
int index = 0;
338338

@@ -377,12 +377,6 @@ public override Matrix<T> UpdateParameters(Matrix<T> parameters, Matrix<T> gradi
377377
return updatedMatrix;
378378
}
379379

380-
private T GetEffectiveLearningRate()
381-
{
382-
var beta1Factor = NumOps.Subtract(NumOps.One, _currentBeta1);
383-
var beta2Factor = NumOps.Subtract(NumOps.One, _currentBeta2);
384-
return NumOps.Multiply(_currentLearningRate, NumOps.Multiply(beta1Factor, beta2Factor));
385-
}
386380

387381
/// <summary>
388382
/// Resets the optimizer's internal state.

tests/AiDotNet.Tests/UnitTests/Optimizers/LionOptimizerTests.cs

Lines changed: 14 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -414,7 +414,7 @@ public void UpdateParameters_Vector_DifferentBeta1Values_ProducesDifferentResult
414414
var options1 = new LionOptimizerOptions<double, Vector<double>, Vector<double>>
415415
{
416416
LearningRate = 0.1,
417-
Beta1 = 0.5,
417+
Beta1 = 0.2,
418418
Beta2 = 0.99
419419
};
420420
var options2 = new LionOptimizerOptions<double, Vector<double>, Vector<double>>
@@ -428,15 +428,12 @@ public void UpdateParameters_Vector_DifferentBeta1Values_ProducesDifferentResult
428428
var optimizer2 = new LionOptimizer<double, Vector<double>, Vector<double>>(null, options2);
429429

430430
var parameters = new Vector<double>(new double[] { 1.0, 2.0, 3.0 });
431-
var gradient1 = new Vector<double>(new double[] { 0.5, 0.5, 0.5 });
432-
var gradient2 = new Vector<double>(new double[] { 1.0, 1.0, 1.0 });
431+
var gradient1 = new Vector<double>(new double[] { 100.0, 100.0, 100.0 });
432+
var gradient2 = new Vector<double>(new double[] { -1.0, -1.0, -1.0 });
433433

434-
// Act - Build momentum then update with different gradient
435-
optimizer1.UpdateParameters(parameters, gradient1);
436-
var updated1 = optimizer1.UpdateParameters(parameters, gradient2);
437-
438-
optimizer2.UpdateParameters(parameters, gradient1);
439-
var updated2 = optimizer2.UpdateParameters(parameters, gradient2);
434+
// Act - Build momentum then flip gradient sign; beta1 affects the sign of the interpolated update.
435+
var updated1 = optimizer1.UpdateParameters(optimizer1.UpdateParameters(parameters, gradient1), gradient2);
436+
var updated2 = optimizer2.UpdateParameters(optimizer2.UpdateParameters(parameters, gradient1), gradient2);
440437

441438
// Assert - Different beta1 values should produce different interpolations
442439
// and thus different results
@@ -477,21 +474,16 @@ public void UpdateParameters_Vector_DifferentBeta2Values_ProducesDifferentMoment
477474
var optimizer2 = new LionOptimizer<double, Vector<double>, Vector<double>>(null, options2);
478475

479476
var parameters = new Vector<double>(new double[] { 1.0, 2.0, 3.0 });
480-
var gradient = new Vector<double>(new double[] { 1.0, 1.0, 1.0 });
481-
482-
// Act - Multiple updates to see momentum effect
483-
var params1 = new Vector<double>(parameters);
484-
var params2 = new Vector<double>(parameters);
477+
var gradientPos = new Vector<double>(new double[] { 1.0, 1.0, 1.0 });
478+
var gradientNeg = new Vector<double>(new double[] { -1.0, -1.0, -1.0 });
485479

486-
for (int i = 0; i < 3; i++)
487-
{
488-
params1 = optimizer1.UpdateParameters(params1, gradient);
489-
params2 = optimizer2.UpdateParameters(params2, gradient);
490-
}
480+
// Act - Build different momentum states with the same positive gradient, then flip gradient sign.
481+
// Beta2 affects m_{t-1}, which in turn can change the sign of the interpolated update.
482+
var params1 = optimizer1.UpdateParameters(optimizer1.UpdateParameters(parameters, gradientPos), gradientNeg);
483+
var params2 = optimizer2.UpdateParameters(optimizer2.UpdateParameters(parameters, gradientPos), gradientNeg);
491484

492-
// Assert - Both should update, but momentum behavior differs
493-
Assert.NotEqual(parameters, params1);
494-
Assert.NotEqual(parameters, params2);
485+
// Assert - Momentum behavior differs between the two beta2 values
486+
Assert.NotEqual(params1, params2);
495487

496488
// Verify that different Beta2 values produce different momentum behavior
497489
bool anyDifferent = false;

0 commit comments

Comments
 (0)