DodgeBall:
trainer_type: ppo
hyperparameters:
batch_size: 128
buffer_size: 2048
batch_size: 1024
buffer_size: 10240
learning_rate: 0.0003
beta: 0.01
epsilon: 0.2
# encoding_size: 256
# learning_rate: 0.0003
keep_checkpoints: 5
max_steps: 2000000
max_steps: 20000000
time_horizon: 64
summary_freq: 5000
threaded: true