File tree Expand file tree Collapse file tree 2 files changed +7
-3
lines changed Expand file tree Collapse file tree 2 files changed +7
-3
lines changed Original file line number Diff line number Diff line change @@ -432,17 +432,17 @@ def _register_adam_variants(registry: OptimizerRegistry) -> None:
432
432
OptimInfo (
433
433
name = 'adafactorbv' ,
434
434
opt_class = AdafactorBigVision ,
435
- description = 'Big Vision variant of Adafactor with factored gradients, half precision momentum. ' ,
435
+ description = 'Big Vision variant of Adafactor with factored gradients, half precision momentum' ,
436
436
),
437
437
OptimInfo (
438
438
name = 'adopt' ,
439
439
opt_class = Adopt ,
440
- description = 'Memory-efficient implementation of Adam with factored gradients ' ,
440
+ description = 'Modified Adam that can converge with any β2 with the optimal rate ' ,
441
441
),
442
442
OptimInfo (
443
443
name = 'adoptw' ,
444
444
opt_class = Adopt ,
445
- description = 'Memory-efficient implementation of Adam with factored gradients ' ,
445
+ description = 'Modified AdamW (decoupled decay) that can converge with any β2 with the optimal rate ' ,
446
446
defaults = {'decoupled' : True }
447
447
),
448
448
]
Original file line number Diff line number Diff line change @@ -51,6 +51,10 @@ def _get_value(x):
51
51
52
52
53
53
class Adopt (Optimizer ):
54
+ """
55
+ ADOPT: Modified Adam Can Converge with Any β2 with the Optimal Rate: https://arxiv.org/abs/2411.02853
56
+
57
+ """
54
58
def __init__ (
55
59
self ,
56
60
params ,
You can’t perform that action at this time.
0 commit comments