WalkerDynamic:
trainer: ppo
batch_size: 2048
beta: 0.005
beta: 0.01
epsilon: 0.2
epsilon: 0.3
hidden_units: 512
lambd: 0.95
learning_rate: 0.0003
normalize: true
num_epoch: 10
num_epoch: 10 #3
num_layers: 3
time_horizon: 1000
sequence_length: 64