behaviors: Sorter: trainer_type: ppo hyperparameters: batch_size: 512 buffer_size: 40960 learning_rate: 0.0003 beta: 0.005 epsilon: 0.2 lambd: 0.95 num_epoch: 3 learning_rate_schedule: constant network_settings: normalize: False hidden_units: 128 num_layers: 2 vis_encode_type: simple reward_signals: extrinsic: gamma: 0.99 strength: 1.0 keep_checkpoints: 5 max_steps: 5000000 time_horizon: 256 summary_freq: 10000 environment_parameters: num_tiles: curriculum: - name: Lesson0 # The '-' is important as this is a list completion_criteria: measure: progress behavior: Sorter signal_smoothing: true min_lesson_length: 100 threshold: 0.3 value: 2.0 - name: Lesson1 completion_criteria: measure: progress behavior: Sorter signal_smoothing: true min_lesson_length: 100 threshold: 0.4 value: 4.0 - name: Lesson2 completion_criteria: measure: progress behavior: Sorter signal_smoothing: true min_lesson_length: 100 threshold: 0.45 value: 6.0 - name: Lesson3 completion_criteria: measure: progress behavior: Sorter signal_smoothing: true min_lesson_length: 100 threshold: 0.5 value: 8.0 - name: Lesson4 completion_criteria: measure: progress behavior: Sorter signal_smoothing: true min_lesson_length: 100 threshold: 0.55 value: 10.0 - name: Lesson5 completion_criteria: measure: progress behavior: Sorter signal_smoothing: true min_lesson_length: 100 threshold: 0.6 value: 12.0 - name: Lesson6 completion_criteria: measure: progress behavior: Sorter signal_smoothing: true min_lesson_length: 100 threshold: 0.65 value: 14.0 - name: Lesson7 completion_criteria: measure: progress behavior: Sorter signal_smoothing: true min_lesson_length: 100 threshold: 0.7 value: 16.0 - name: Lesson8 completion_criteria: measure: progress behavior: Sorter signal_smoothing: true min_lesson_length: 100 threshold: 0.75 value: 18.0 - name: Lesson9 value: 20.0