23
23
#include " ndBrainTrainer.h"
24
24
#include " ndBrainSaveLoad.h"
25
25
#include " ndBrainLayerLinear.h"
26
- #include " ndBrainOptimizerAdamLegacy.h"
27
26
#include " ndBrainLayerActivationRelu.h"
28
27
#include " ndBrainLayerActivationTanh.h"
29
28
#include " ndBrainLayerActivationLinear.h"
@@ -397,8 +396,8 @@ ndBrainAgentContinuePolicyGradient_TrainerMaster::ndBrainAgentContinuePolicyGrad
397
396
,m_criticTrainers()
398
397
,m_policyTrainers()
399
398
,m_policyAuxiliaryTrainers()
400
- ,m_criticOptimizer()
401
- ,m_policyOptimizer()
399
+ // ,m_criticOptimizer()
400
+ // ,m_policyOptimizer()
402
401
,m_advantage()
403
402
,m_randomPermutation()
404
403
,m_randomGenerator()
@@ -419,6 +418,7 @@ ndBrainAgentContinuePolicyGradient_TrainerMaster::ndBrainAgentContinuePolicyGrad
419
418
,m_referenceProbability()
420
419
,m_agents()
421
420
{
421
+ ndAssert (0 );
422
422
ndAssert (m_parameters.m_numberOfActions );
423
423
ndAssert (m_parameters.m_numberOfObservations );
424
424
ndSetRandSeed (m_randomSeed);
@@ -686,110 +686,112 @@ ndFloat32 ndBrainAgentContinuePolicyGradient_TrainerMaster::GetAverageScore() co
686
686
687
687
void ndBrainAgentContinuePolicyGradient_TrainerMaster::BuildPolicyClass ()
688
688
{
689
- ndFixSizeArray<ndBrainLayer*, 32 > layers;
690
-
691
- layers.SetCount (0 );
692
- layers.PushBack (new ndBrainLayerLinear (m_parameters.m_numberOfObservations , m_parameters.m_hiddenLayersNumberOfNeurons ));
693
- layers.PushBack (new ndBrainLayerActivationTanh (layers[layers.GetCount () - 1 ]->GetOutputSize ()));
694
- for (ndInt32 i = 0 ; i < m_parameters.m_numberOfHiddenLayers ; ++i)
695
- {
696
- ndAssert (layers[layers.GetCount () - 1 ]->GetOutputSize () == m_parameters.m_hiddenLayersNumberOfNeurons );
697
- layers.PushBack (new ndBrainLayerLinear (m_parameters.m_hiddenLayersNumberOfNeurons , m_parameters.m_hiddenLayersNumberOfNeurons ));
698
- layers.PushBack (new ND_CONTINUE_POLICY_GRADIENT_HIDEN_LAYERS_ACTIVATION (layers[layers.GetCount () - 1 ]->GetOutputSize ()));
699
- }
700
-
701
- ndInt32 nunberOfOutput = m_parameters.m_usePerActionSigmas ? 2 * m_parameters.m_numberOfActions : m_parameters.m_numberOfActions ;
702
- layers.PushBack (new ndBrainLayerLinear (layers[layers.GetCount () - 1 ]->GetOutputSize (), nunberOfOutput));
703
- layers.PushBack (new ndBrainLayerActivationTanh (layers[layers.GetCount () - 1 ]->GetOutputSize ()));
704
- if (m_parameters.m_usePerActionSigmas )
705
- {
706
- ndBrainFixSizeVector<256 > bias;
707
- ndBrainFixSizeVector<256 > slope;
708
- bias.SetCount (layers[layers.GetCount () - 1 ]->GetOutputSize ());
709
- slope.SetCount (layers[layers.GetCount () - 1 ]->GetOutputSize ());
710
-
711
- ndInt32 sigmaSize = nunberOfOutput / 2 ;
712
- ndBrainFloat b = ndBrainFloat (0 .5f ) * (ND_CONTINUE_POLICY_MAX_PER_ACTION_SIGMA + ND_CONTINUE_POLICY_MIN_PER_ACTION_SIGMA);
713
- ndBrainFloat a = ndBrainFloat (0 .5f ) * (ND_CONTINUE_POLICY_MAX_PER_ACTION_SIGMA - ND_CONTINUE_POLICY_MIN_PER_ACTION_SIGMA);
714
-
715
- bias.Set (ndBrainFloat (0 .0f ));
716
- slope.Set (ndBrainFloat (1 .0f ));
717
- ndMemSet (&bias[sigmaSize], b, sigmaSize);
718
- ndMemSet (&slope[sigmaSize], a, sigmaSize);
719
- layers.PushBack (new ndBrainLayerActivationLinear (slope, bias));
720
- }
721
-
722
- m_policy = ndSharedPtr<ndBrain>(new ndBrain);
723
- for (ndInt32 i = 0 ; i < layers.GetCount (); ++i)
724
- {
725
- m_policy->AddLayer (layers[i]);
726
- }
727
- m_policy->InitWeights ();
728
-
729
- // m_policy.SaveToFile("xxxx.dnn");
730
- // ndSharedPtr<ndBrain> xxx(ndBrainLoad::Load("xxxx.dnn"));
731
-
732
- m_policyTrainers.SetCount (0 );
733
- m_policyAuxiliaryTrainers.SetCount (0 );
734
- for (ndInt32 i = 0 ; i < m_parameters.m_miniBatchSize ; ++i)
735
- {
736
- ndAssert (0 );
737
- // ndBrainTrainer* const trainer = new ndBrainTrainer(m_policy);
738
- // m_policyTrainers.PushBack(trainer);
739
- //
740
- // ndBrainTrainer* const auxiliaryTrainer = new ndBrainTrainer(m_policy);
741
- // m_policyAuxiliaryTrainers.PushBack(auxiliaryTrainer);
742
- }
743
-
744
- m_policyOptimizer = ndSharedPtr<ndBrainOptimizerAdamLegacy> (new ndBrainOptimizerAdamLegacy ());
745
- m_policyOptimizer->SetRegularizer (m_parameters.m_policyRegularizer );
746
- m_policyOptimizer->SetRegularizerType (m_parameters.m_policyRegularizerType );
747
-
748
- m_trajectoryAccumulator.Init (m_policy->GetOutputSize (), m_policy->GetInputSize ());
689
+ ndAssert (0 );
690
+ // ndFixSizeArray<ndBrainLayer*, 32> layers;
691
+ //
692
+ // layers.SetCount(0);
693
+ // layers.PushBack(new ndBrainLayerLinear(m_parameters.m_numberOfObservations, m_parameters.m_hiddenLayersNumberOfNeurons));
694
+ // layers.PushBack(new ndBrainLayerActivationTanh(layers[layers.GetCount() - 1]->GetOutputSize()));
695
+ // for (ndInt32 i = 0; i < m_parameters.m_numberOfHiddenLayers; ++i)
696
+ // {
697
+ // ndAssert(layers[layers.GetCount() - 1]->GetOutputSize() == m_parameters.m_hiddenLayersNumberOfNeurons);
698
+ // layers.PushBack(new ndBrainLayerLinear(m_parameters.m_hiddenLayersNumberOfNeurons, m_parameters.m_hiddenLayersNumberOfNeurons));
699
+ // layers.PushBack(new ND_CONTINUE_POLICY_GRADIENT_HIDEN_LAYERS_ACTIVATION(layers[layers.GetCount() - 1]->GetOutputSize()));
700
+ // }
701
+ //
702
+ // ndInt32 nunberOfOutput = m_parameters.m_usePerActionSigmas ? 2 * m_parameters.m_numberOfActions : m_parameters.m_numberOfActions;
703
+ // layers.PushBack(new ndBrainLayerLinear(layers[layers.GetCount() - 1]->GetOutputSize(), nunberOfOutput));
704
+ // layers.PushBack(new ndBrainLayerActivationTanh(layers[layers.GetCount() - 1]->GetOutputSize()));
705
+ // if (m_parameters.m_usePerActionSigmas)
706
+ // {
707
+ // ndBrainFixSizeVector<256> bias;
708
+ // ndBrainFixSizeVector<256> slope;
709
+ // bias.SetCount(layers[layers.GetCount() - 1]->GetOutputSize());
710
+ // slope.SetCount(layers[layers.GetCount() - 1]->GetOutputSize());
711
+ //
712
+ // ndInt32 sigmaSize = nunberOfOutput / 2;
713
+ // ndBrainFloat b = ndBrainFloat(0.5f) * (ND_CONTINUE_POLICY_MAX_PER_ACTION_SIGMA + ND_CONTINUE_POLICY_MIN_PER_ACTION_SIGMA);
714
+ // ndBrainFloat a = ndBrainFloat(0.5f) * (ND_CONTINUE_POLICY_MAX_PER_ACTION_SIGMA - ND_CONTINUE_POLICY_MIN_PER_ACTION_SIGMA);
715
+ //
716
+ // bias.Set(ndBrainFloat(0.0f));
717
+ // slope.Set(ndBrainFloat(1.0f));
718
+ // ndMemSet(&bias[sigmaSize], b, sigmaSize);
719
+ // ndMemSet(&slope[sigmaSize], a, sigmaSize);
720
+ // layers.PushBack(new ndBrainLayerActivationLinear(slope, bias));
721
+ // }
722
+ //
723
+ // m_policy = ndSharedPtr<ndBrain>(new ndBrain);
724
+ // for (ndInt32 i = 0; i < layers.GetCount(); ++i)
725
+ // {
726
+ // m_policy->AddLayer(layers[i]);
727
+ // }
728
+ // m_policy->InitWeights();
729
+ //
730
+ // //m_policy.SaveToFile("xxxx.dnn");
731
+ // //ndSharedPtr<ndBrain> xxx(ndBrainLoad::Load("xxxx.dnn"));
732
+ //
733
+ // m_policyTrainers.SetCount(0);
734
+ // m_policyAuxiliaryTrainers.SetCount(0);
735
+ // for (ndInt32 i = 0; i < m_parameters.m_miniBatchSize; ++i)
736
+ // {
737
+ // ndAssert(0);
738
+ // //ndBrainTrainer* const trainer = new ndBrainTrainer(m_policy);
739
+ // //m_policyTrainers.PushBack(trainer);
740
+ // //
741
+ // //ndBrainTrainer* const auxiliaryTrainer = new ndBrainTrainer(m_policy);
742
+ // //m_policyAuxiliaryTrainers.PushBack(auxiliaryTrainer);
743
+ // }
744
+ //
745
+ // m_policyOptimizer = ndSharedPtr<ndBrainOptimizerAdamLegacy> (new ndBrainOptimizerAdamLegacy());
746
+ // m_policyOptimizer->SetRegularizer(m_parameters.m_policyRegularizer);
747
+ // m_policyOptimizer->SetRegularizerType(m_parameters.m_policyRegularizerType);
748
+ //
749
+ // m_trajectoryAccumulator.Init(m_policy->GetOutputSize(), m_policy->GetInputSize());
749
750
}
750
751
751
752
void ndBrainAgentContinuePolicyGradient_TrainerMaster::BuildCriticClass ()
752
753
{
753
- ndFixSizeArray<ndBrainLayer*, 32 > layers;
754
-
755
- // build state value critic neural net
756
- layers.SetCount (0 );
757
- layers.PushBack (new ndBrainLayerLinear (m_policy->GetInputSize (), m_parameters.m_hiddenLayersNumberOfNeurons ));
758
- layers.PushBack (new ndBrainLayerActivationTanh (layers[layers.GetCount () - 1 ]->GetOutputSize ()));
759
-
760
- for (ndInt32 i = 0 ; i < m_parameters.m_numberOfHiddenLayers ; ++i)
761
- {
762
- ndAssert (layers[layers.GetCount () - 1 ]->GetOutputSize () == m_parameters.m_hiddenLayersNumberOfNeurons );
763
- layers.PushBack (new ndBrainLayerLinear (layers[layers.GetCount () - 1 ]->GetOutputSize (), m_parameters.m_hiddenLayersNumberOfNeurons ));
764
- layers.PushBack (new ND_CONTINUE_POLICY_GRADIENT_HIDEN_LAYERS_ACTIVATION (layers[layers.GetCount () - 1 ]->GetOutputSize ()));
765
- }
766
- layers.PushBack (new ndBrainLayerLinear (layers[layers.GetCount () - 1 ]->GetOutputSize (), 1 ));
767
- layers.PushBack (new ndBrainLayerActivationLeakyRelu (layers[layers.GetCount () - 1 ]->GetOutputSize ()));
768
-
769
- m_critic = ndSharedPtr<ndBrain>(new ndBrain);
770
- for (ndInt32 i = 0 ; i < layers.GetCount (); ++i)
771
- {
772
- m_critic->AddLayer (layers[i]);
773
- }
774
- m_critic->InitWeights ();
775
-
776
- ndAssert (m_critic->GetOutputSize () == 1 );
777
- ndAssert (m_critic->GetInputSize () == m_policy->GetInputSize ());
778
-
779
- m_criticTrainers.SetCount (0 );
780
- for (ndInt32 i = 0 ; i < m_parameters.m_miniBatchSize ; ++i)
781
- {
782
- ndAssert (0 );
783
- // ndBrainTrainer* const trainer = new ndBrainTrainer(m_critic);
784
- // m_criticTrainers.PushBack(trainer);
785
- }
786
-
787
- m_criticOptimizer = ndSharedPtr<ndBrainOptimizerAdamLegacy> (new ndBrainOptimizerAdamLegacy ());
788
- m_criticOptimizer->SetRegularizer (m_parameters.m_criticRegularizer );
789
- m_criticOptimizer->SetRegularizerType (m_parameters.m_criticRegularizerType );
790
-
791
- m_baseValueWorkingBufferSize = m_critic->CalculateWorkingBufferSize ();
792
- m_workingBuffer.SetCount (m_baseValueWorkingBufferSize * m_parameters.m_threadsCount );
754
+ ndAssert (0 );
755
+ // ndFixSizeArray<ndBrainLayer*, 32> layers;
756
+ //
757
+ // // build state value critic neural net
758
+ // layers.SetCount(0);
759
+ // layers.PushBack(new ndBrainLayerLinear(m_policy->GetInputSize(), m_parameters.m_hiddenLayersNumberOfNeurons));
760
+ // layers.PushBack(new ndBrainLayerActivationTanh(layers[layers.GetCount() - 1]->GetOutputSize()));
761
+ //
762
+ // for (ndInt32 i = 0; i < m_parameters.m_numberOfHiddenLayers; ++i)
763
+ // {
764
+ // ndAssert(layers[layers.GetCount() - 1]->GetOutputSize() == m_parameters.m_hiddenLayersNumberOfNeurons);
765
+ // layers.PushBack(new ndBrainLayerLinear(layers[layers.GetCount() - 1]->GetOutputSize(), m_parameters.m_hiddenLayersNumberOfNeurons));
766
+ // layers.PushBack(new ND_CONTINUE_POLICY_GRADIENT_HIDEN_LAYERS_ACTIVATION(layers[layers.GetCount() - 1]->GetOutputSize()));
767
+ // }
768
+ // layers.PushBack(new ndBrainLayerLinear(layers[layers.GetCount() - 1]->GetOutputSize(), 1));
769
+ // layers.PushBack(new ndBrainLayerActivationLeakyRelu(layers[layers.GetCount() - 1]->GetOutputSize()));
770
+ //
771
+ // m_critic = ndSharedPtr<ndBrain>(new ndBrain);
772
+ // for (ndInt32 i = 0; i < layers.GetCount(); ++i)
773
+ // {
774
+ // m_critic->AddLayer(layers[i]);
775
+ // }
776
+ // m_critic->InitWeights();
777
+ //
778
+ // ndAssert(m_critic->GetOutputSize() == 1);
779
+ // ndAssert(m_critic->GetInputSize() == m_policy->GetInputSize());
780
+ //
781
+ // m_criticTrainers.SetCount(0);
782
+ // for (ndInt32 i = 0; i < m_parameters.m_miniBatchSize; ++i)
783
+ // {
784
+ // ndAssert(0);
785
+ // //ndBrainTrainer* const trainer = new ndBrainTrainer(m_critic);
786
+ // //m_criticTrainers.PushBack(trainer);
787
+ // }
788
+ //
789
+ // m_criticOptimizer = ndSharedPtr<ndBrainOptimizerAdamLegacy> (new ndBrainOptimizerAdamLegacy());
790
+ // m_criticOptimizer->SetRegularizer(m_parameters.m_criticRegularizer);
791
+ // m_criticOptimizer->SetRegularizerType(m_parameters.m_criticRegularizerType);
792
+ //
793
+ // m_baseValueWorkingBufferSize = m_critic->CalculateWorkingBufferSize();
794
+ // m_workingBuffer.SetCount(m_baseValueWorkingBufferSize * m_parameters.m_threadsCount);
793
795
}
794
796
795
797
// #pragma optimize( "", off )
0 commit comments