behaviors: Pyramids: trainer_type: ppo time_horizon: 128 max_steps: 1.0e7 hyperparameters: batch_size: 128 beta: 0.01 buffer_size: 2048 epsilon: 0.2 lambd: 0.95 learning_rate: 0.0003 num_epoch: 3 network_settings: num_layers: 2 normalize: false hidden_units: 512 reward_signals: extrinsic: strength: 1.0 gamma: 0.99 curiosity: strength: 0.02 gamma: 0.99 network_settings: hidden_units: 256 gail: strength: 0.01 gamma: 0.99 demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo behavioral_cloning: demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo strength: 0.5 steps: 150000