behaviors:
Tennis:
trainer: ppo
batch_size: 2048
buffer_size: 20480
batch_size: 4096
buffer_size: 40960
epsilon: 0.1
epsilon: 0.2
hidden_units: 512
lambd: 0.95
learning_rate: 0.0003