* Add missing config and make sure to use floats in example * Moved init_path
normalize: false
num_layers: 2
time_horizon: 64
summary_freq: 10000
init_path: null
# PPO-specific configs
beta: 5.0e-3
batch_size: 512
num_epoch: 3
samples_per_update: 0
init_path:
reward_signals:
# environment reward
strength: 0.02
gamma: 0.99
encoding_size: 256
learning_rate: 3e-4
learning_rate: 3.0e-4
# GAIL
gail:
demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
use_actions: false
use_vail: false