```yml
behaviors:
RollerBall:
trainer: ppo
batch_size: 10
beta: 5.0e-3
buffer_size: 100
epsilon: 0.2
hidden_units: 128
lambd: 0.95
learning_rate: 3.0e-4
learning_rate_schedule: linear
max_steps: 5.0e4
memory_size: 128
normalize: false
num_epoch: 3
num_layers: 2
trainer_type: ppo
hyperparameters:
beta: 5.0e-4
lambd: 0.99
network_settings:
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
max_steps: 500000
use_recurrent: false
```
Since this example creates a very simple training environment with only a few