behaviors:
  SmallWallJump:
    trainer_type: ppo
    hyperparameters:
      batch_size: 128
      buffer_size: 2048
      learning_rate: 0.0003
      beta: 0.005
      epsilon: 0.2
      lambd: 0.95
      num_epoch: 3
      learning_rate_schedule: linear
    network_settings:
      normalize: false
      hidden_units: 256
      num_layers: 2
      vis_encode_type: simple
    reward_signals:
      extrinsic:
        gamma: 0.99
        strength: 1.0
    keep_checkpoints: 5
    max_steps: 15000000
    time_horizon: 128
    summary_freq: 20000
    threaded: true
environment_parameters:
  big_wall_height:
    curriculum:
      - name: Lesson0
        completion_criteria:
          measure: progress
          behavior: SmallWallJump
          signal_smoothing: true
          min_lesson_length: 100
          threshold: 0.1
        value: 3
      - name: Lesson1
        completion_criteria:
          measure: progress
          behavior: SmallWallJump
          signal_smoothing: true
          min_lesson_length: 100
          threshold: 0.3
        value: 5
      - name: Lesson2
        completion_criteria:
          measure: progress
          behavior: SmallWallJump
          signal_smoothing: true
          min_lesson_length: 100
          threshold: 0.5
        value: 8
      - name: Lesson3
        completion_criteria:
          measure: progress
          behavior: SmallWallJump
          signal_smoothing: true
          min_lesson_length: 100
          threshold: 0.7
        value: 9
      - name: Lesson4
        value: 9