trainer_type: sac
hyperparameters:
learning_rate: 0.0003
learning_rate_schedule: constant
learning_rate_schedule: linear
batch_size: 64
buffer_size: 500000
buffer_init_steps: 0
behaviors:
3DBallHard:
3DBall:
trainer_type: sac_transfer