ml-agents/config/ppo/Sorter_curriculum.yaml


								behaviors:

								  Sorter:

								    trainer_type: ppo

								    hyperparameters:

								      batch_size: 512

								      buffer_size: 40960

								      learning_rate: 0.0003

								      beta: 0.005

								      epsilon: 0.2

								      lambd: 0.95

								      num_epoch: 3

								      learning_rate_schedule: linear

								    network_settings:

								      normalize: False

								      hidden_units: 128

								      num_layers: 2

								      vis_encode_type: simple

								    reward_signals:

								      extrinsic:

								        gamma: 0.99

								        strength: 1.0

								    keep_checkpoints: 5

								    max_steps: 100000000

								    time_horizon: 256

								    summary_freq: 10000

								    threaded: true

								environment_parameters:

								  num_tiles:

								    curriculum:

								      - name: Lesson0 # The '-' is important as this is a list

								        completion_criteria:

								          measure: progress

								          behavior: Sorter

								          signal_smoothing: true

								          min_lesson_length: 100

								          threshold: 0.05

								        value: 2.0

								      - name: Lesson1

								        completion_criteria:

								          measure: progress

								          behavior: Sorter

								          signal_smoothing: true

								          min_lesson_length: 100

								          threshold: 0.1

								        value: 4.0

								      - name: Lesson2

								        completion_criteria:

								          measure: progress

								          behavior: Sorter

								          signal_smoothing: true

								          min_lesson_length: 100

								          threshold: 0.15

								        value: 6.0

								      - name: Lesson3

								        completion_criteria:

								          measure: progress

								          behavior: Sorter

								          signal_smoothing: true

								          min_lesson_length: 100

								          threshold: 0.2

								        value: 8.0

								      - name: Lesson4

								        completion_criteria:

								          measure: progress

								          behavior: Sorter

								          signal_smoothing: true

								          min_lesson_length: 100

								          threshold: 0.25

								        value: 10.0

								      - name: Lesson5

								        completion_criteria:

								          measure: progress

								          behavior: Sorter

								          signal_smoothing: true

								          min_lesson_length: 100

								          threshold: 0.3

								        value: 12.0

								      - name: Lesson6

								        completion_criteria:

								          measure: progress

								          behavior: Sorter

								          signal_smoothing: true

								          min_lesson_length: 100

								          threshold: 0.35

								        value: 14.0

								      - name: Lesson7

								        completion_criteria:

								          measure: progress

								          behavior: Sorter

								          signal_smoothing: true

								          min_lesson_length: 100

								          threshold: 0.4

								        value: 16.0

								      - name: Lesson8

								        completion_criteria:

								          measure: progress

								          behavior: Sorter

								          signal_smoothing: true

								          min_lesson_length: 100

								          threshold: 0.45

								        value: 18.0

								      - name: Lesson9

								        value: 20.0

								env_settings:

								  num_envs: 8