WalkerDynamic:
trainer: ppo
batch_size: 2048
beta: 0.01
buffer_size: 20480
epsilon: 0.3
beta: 0.005
buffer_size: 131072 #20480
epsilon: 0.2
hidden_units: 512
lambd: 0.95
learning_rate: 0.0003
normalize: true
num_epoch: 10 #3
num_epoch: 3
num_layers: 3
time_horizon: 1000
sequence_length: 64