ml-agents/config/ppo/WallJump_curriculum.yaml


								behaviors:

								  BigWallJump:

								    trainer_type: ppo

								    hyperparameters:

								      batch_size: 128

								      buffer_size: 2048

								      learning_rate: 0.0003

								      beta: 0.005

								      epsilon: 0.2

								      lambd: 0.95

								      num_epoch: 3

								      learning_rate_schedule: linear

								    network_settings:

								      normalize: false

								      hidden_units: 256

								      num_layers: 2

								      vis_encode_type: simple

								    reward_signals:

								      extrinsic:

								        gamma: 0.99

								        strength: 1.0

								    keep_checkpoints: 5

								    max_steps: 20000000

								    time_horizon: 128

								    summary_freq: 20000

								  SmallWallJump:

								    trainer_type: ppo

								    hyperparameters:

								      batch_size: 128

								      buffer_size: 2048

								      learning_rate: 0.0003

								      beta: 0.005

								      epsilon: 0.2

								      lambd: 0.95

								      num_epoch: 3

								      learning_rate_schedule: linear

								    network_settings:

								      normalize: false

								      hidden_units: 256

								      num_layers: 2

								      vis_encode_type: simple

								    reward_signals:

								      extrinsic:

								        gamma: 0.99

								        strength: 1.0

								    keep_checkpoints: 5

								    max_steps: 5000000

								    time_horizon: 128

								    summary_freq: 20000

								environment_parameters:

								  big_wall_height:

								    curriculum:

								      - name: Lesson0 # The '-' is important as this is a list

								        completion_criteria:

								          measure: progress

								          behavior: BigWallJump

								          signal_smoothing: true

								          min_lesson_length: 100

								          threshold: 0.1

								        value:

								          sampler_type: uniform

								          sampler_parameters:

								            min_value: 0.0

								            max_value: 4.0

								      - name: Lesson1 # This is the start of the second lesson

								        completion_criteria:

								          measure: progress

								          behavior: BigWallJump

								          signal_smoothing: true

								          min_lesson_length: 100

								          threshold: 0.3

								        value:

								          sampler_type: uniform

								          sampler_parameters:

								            min_value: 4.0

								            max_value: 7.0

								      - name: Lesson2

								        completion_criteria:

								          measure: progress

								          behavior: BigWallJump

								          signal_smoothing: true

								          min_lesson_length: 100

								          threshold: 0.5

								        value:

								          sampler_type: uniform

								          sampler_parameters:

								            min_value: 6.0

								            max_value: 8.0

								      - name: Lesson3

								        value: 8.0

								  small_wall_height:

								    curriculum:

								      - name: Lesson0

								        completion_criteria:

								          measure: progress

								          behavior: SmallWallJump

								          signal_smoothing: true

								          min_lesson_length: 100

								          threshold: 0.1

								        value: 1.5

								      - name: Lesson1

								        completion_criteria:

								          measure: progress

								          behavior: SmallWallJump

								          signal_smoothing: true

								          min_lesson_length: 100

								          threshold: 0.3

								        value: 2.0

								      - name: Lesson2

								        completion_criteria:

								          measure: progress

								          behavior: SmallWallJump

								          signal_smoothing: true

								          min_lesson_length: 100

								          threshold: 0.5

								        value: 2.5

								      - name: Lesson3

								        value: 4.0