3DBallHard:
trainer_type: ppo
hyperparameters:
batch_size: 1200
batch_size: 120
buffer_size: 12000
learning_rate: 0.0003
beta: 0.001
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.995
gamma: 0.99
max_steps: 5000000
max_steps: 500000
time_horizon: 1000
summary_freq: 12000
threaded: true