trainer_type: ppo
hyperparameters:
batch_size: 128
buffer_size: 2048
buffer_size: 10240
learning_rate: 0.0003
beta: 0.01
epsilon: 0.2
network_settings:
normalize: false
hidden_units: 256
hidden_units: 512
num_layers: 2
vis_encode_type: simple
reward_signals: