FLearningAgentsPPOTrainingSettings | Unreal Engine 5.7 Documentation

API > API/Plugins > API/Plugins/LearningAgentsTraining

The configurable settings for the PPO training process.


Name	FLearningAgentsPPOTrainingSettings
Type	struct
Header File	/Engine/Plugins/Experimental/LearningAgents/Source/LearningAgentsTraining/Public/LearningAgentsPPOTrainer.h
Include Path	#include "LearningAgentsPPOTrainer.h"

Syntax

USTRUCT (BlueprintType , Category="LearningAgents")  
struct FLearningAgentsPPOTrainingSettings

Variables

Public

Name	Type	Remarks	Include Path	Unreal Specifiers
ActionEntropyWeight	float	Weighting used for the entropy bonus.	LearningAgentsPPOTrainer.h	EditAnywhere Category="LearningAgents" Meta=(ClampMin="0.0", UIMin="0.0", UIMax="1.0")
ActionRegularizationWeight	float	Weight used to regularize actions.	LearningAgentsPPOTrainer.h	EditAnywhere Category="LearningAgents" Meta=(ClampMin="0.0", UIMin="0.0", UIMax="1.0")
ActionSurrogateWeight	float	Weight for the loss used to train the policy via the PPO surrogate objective.	LearningAgentsPPOTrainer.h	EditAnywhere Category="LearningAgents" Meta=(ClampMin="0.0", UIMin="0.0", UIMax="1.0")
bAdvantageNormalization	bool	When true, advantages are normalized.	LearningAgentsPPOTrainer.h	EditAnywhere Category="LearningAgents"
bSaveSnapshots	bool	If true, snapshots of the trained networks will be emitted to the intermediate directory.	LearningAgentsPPOTrainer.h	EditAnywhere Category="LearningAgents"
bUseGradNormMaxClipping	bool	When true, gradient norm max clipping will be used on the policy, critic, encoder, and decoder.	LearningAgentsPPOTrainer.h	EditAnywhere Category="LearningAgents"
bUseMLflow	bool	If true, MLflow will be used for experiment tracking.	LearningAgentsPPOTrainer.h	EditAnywhere Category="LearningAgents"
bUseTensorboard	bool	If true, TensorBoard logs will be emitted to the intermediate directory.	LearningAgentsPPOTrainer.h	EditAnywhere Category="LearningAgents"
CriticBatchSize	int32	Batch size to use for training the critic.	LearningAgentsPPOTrainer.h	EditAnywhere Category="LearningAgents" Meta=(ClampMin="1", UIMin="1", UIMax="4096")
CriticWarmupIterations	int32	Number of iterations of training to perform to warm - up the Critic.	LearningAgentsPPOTrainer.h	EditAnywhere Category="LearningAgents" Meta=(ClampMin="1", UIMin="1", UIMax="128")
Device	ELearningAgentsTrainingDevice	The device to train on.	LearningAgentsPPOTrainer.h	EditAnywhere Category="LearningAgents"
DiscountFactor	float	The discount factor to use during training.	LearningAgentsPPOTrainer.h	EditAnywhere Category="LearningAgents" Meta=(ClampMin="0.0", ClampMax="1.0", UIMin="0.0", UIMax="1.0")
EpsilonClip	float	Clipping ratio to apply to policy updates.	LearningAgentsPPOTrainer.h	EditAnywhere Category="LearningAgents" Meta=(ClampMin="0.0", ClampMax="1.0", UIMin="0.0", UIMax="1.0")
GaeLambda	float	This is used in the Generalized Advantage Estimation, where larger values will tend to assign more credit to recent actions.	LearningAgentsPPOTrainer.h	EditAnywhere Category="LearningAgents" Meta=(ClampMin="0.0", ClampMax="1.0", UIMin="0.0", UIMax="1.0")
GradNormMax	float	The maximum gradient norm to clip updates to.	LearningAgentsPPOTrainer.h	EditAnywhere Category="LearningAgents" Meta=(UIMin="0.0", UIMax="10.0")
IterationsPerGather	int32	Number of training iterations to perform per buffer of experience gathered.	LearningAgentsPPOTrainer.h	EditAnywhere Category="LearningAgents" Meta=(ClampMin="1", UIMin="1", UIMax="1024")
IterationsPerSnapshot	int32	The iterations interval to save new networks snapshot.	LearningAgentsPPOTrainer.h	EditAnywhere Category="LearningAgents" Meta=(ClampMin="0", UIMin="0")
LearningRateCritic	float	Learning rate of the critic network.	LearningAgentsPPOTrainer.h	EditAnywhere Category="LearningAgents" Meta=(ClampMin="0.0", UIMin="0.0", UIMax="1.0")
LearningRateDecay	float	Amount by which to multiply the learning rate every 1000 iterations.	LearningAgentsPPOTrainer.h	EditAnywhere Category="LearningAgents" Meta=(ClampMin="0.0", ClampMax="1.0", UIMin="0.0", UIMax="1.0")
LearningRatePolicy	float	Learning rate of the policy network. Typical values are between 0.001 and 0.0001.	LearningAgentsPPOTrainer.h	EditAnywhere Category="LearningAgents" Meta=(ClampMin="0.0", UIMin="0.0", UIMax="1.0")
MaximumAdvantage	float	The maximum advantage to allow.	LearningAgentsPPOTrainer.h	EditAnywhere Category="LearningAgents" Meta=(UIMin="0.0", UIMax="10.0")
MinimumAdvantage	float	The minimum advantage to allow.	LearningAgentsPPOTrainer.h	EditAnywhere Category="LearningAgents" Meta=(UIMin="-10.0", UIMax="0.0")
MLflowTrackingUri	FString	The URI of the MLflow Tracking Server to log to.	LearningAgentsPPOTrainer.h	EditAnywhere Category="LearningAgents"
NumberOfIterations	int32	The number of iterations to run before ending training.	LearningAgentsPPOTrainer.h	EditAnywhere Category="LearningAgents" Meta=(ClampMin="1", UIMin="1")
NumberOfStepsToTrimAtEndOfEpisode	int32	The number of steps to trim from the end of the episode.	LearningAgentsPPOTrainer.h	EditAnywhere Category="LearningAgents" Meta=(ClampMin="0", UIMin="0")
NumberOfStepsToTrimAtStartOfEpisode	int32	The number of steps to trim from the start of the episode, e.g. can be useful if some things are still getting setup at the start of the episode and you don't want them used for training.	LearningAgentsPPOTrainer.h	EditAnywhere Category="LearningAgents" Meta=(ClampMin="0", UIMin="0")
PolicyBatchSize	int32	Batch size to use for training the policy.	LearningAgentsPPOTrainer.h	EditAnywhere Category="LearningAgents" Meta=(ClampMin="1", UIMin="1", UIMax="4096")
PolicyWindowSize	int32	The number of consecutive steps of observations and actions over which to train the policy.	LearningAgentsPPOTrainer.h	EditAnywhere Category="LearningAgents" Meta=(ClampMin="1", UIMin="1", UIMax="128")
RandomSeed	int32	The seed used for any random sampling the trainer will perform, e.g. for weight initialization.	LearningAgentsPPOTrainer.h	EditAnywhere Category="LearningAgents" Meta=(ClampMin="0", UIMin="0")
ReturnRegularizationWeight	float	Weight used to regularize returns. Encourages the critic not to over or under estimate returns.	LearningAgentsPPOTrainer.h	EditAnywhere Category="LearningAgents" Meta=(ClampMin="0.0", UIMin="0.0", UIMax="1.0")
WeightDecay	float	Amount of weight decay to apply to the network.	LearningAgentsPPOTrainer.h	EditAnywhere Category="LearningAgents" Meta=(ClampMin="0.0", UIMin="0.0", UIMax="1.0")

Functions

Public

Name	Remarks	Include Path	Unreal Specifiers
TSharedRef< FJsonObject > AsJsonConfig()		LearningAgentsPPOTrainer.h

Navigation

Syntax

Variables

Public

Functions

Public