behaviors:
WalkerDynamic:
trainer: ppo
batch_size: 16384 #2048
batch_size: 2048
buffer_size: 163840 #20480
buffer_size: 20480
epsilon: 0.2
hidden_units: 512
lambd: 0.95
memory_size: 128
normalize: true
num_epoch: 3
num_epoch: 10
num_layers: 3
time_horizon: 1000
sequence_length: 64
# learning_rate_schedule: constant
# batch_size: 24576 #2048
# buffer_size: 245760 #20480
epsilon: 0.3
# max_steps: 1e8
summary_freq: 30000
# learning_rate: 1.0e-3