behaviors:
WalkerDynamic:
trainer: ppo
batch_size: 2048
batch_size: 16384 #2048
beta: 0.005
buffer_size: 163840 #20480
epsilon: 0.2
strength: 1.0
gamma: 0.995
# WalkerDynamic:
# normalize: true
# # learning_rate_schedule: constant
# num_epoch: 3
# time_horizon: 80
# batch_size: 512
# buffer_size: 5120
# max_steps: 2e7
# # max_steps: 1e8
# summary_freq: 30000
# # learning_rate: 1.0e-3
# num_layers: 3
# hidden_units: 512
# reward_signals:
# extrinsic:
# strength: 1.0
# gamma: 0.995
# learning_rate_schedule: constant
beta: 9.0e-3
# max_steps: 1e8
# learning_rate: 1.0e-3
num_layers: 3
hidden_units: 512
reward_signals:
# time_horizon: 1000
# batch_size: 2048
# buffer_size: 20480
WalkerStatic:
normalize: true