浏览代码

[refactor] Structure configuration files into classes (#3936)

/test-sampler
GitHub 5 年前
当前提交
e92b4f88
共有 98 个文件被更改,包括 3088 次插入3176 次删除
  1. 2
      com.unity.ml-agents/CHANGELOG.md
  2. 46
      config/imitation/CrawlerStatic.yaml
  3. 46
      config/imitation/FoodCollector.yaml
  4. 48
      config/imitation/Hallway.yaml
  5. 43
      config/imitation/PushBlock.yaml
  6. 30
      config/imitation/Pyramids.yaml
  7. 42
      config/ppo/3DBall.yaml
  8. 42
      config/ppo/3DBallHard.yaml
  9. 76
      config/ppo/3DBall_randomize.yaml
  10. 42
      config/ppo/Basic.yaml
  11. 42
      config/ppo/Bouncer.yaml
  12. 42
      config/ppo/CrawlerDynamic.yaml
  13. 42
      config/ppo/CrawlerStatic.yaml
  14. 42
      config/ppo/FoodCollector.yaml
  15. 42
      config/ppo/GridWorld.yaml
  16. 45
      config/ppo/Hallway.yaml
  17. 42
      config/ppo/PushBlock.yaml
  18. 45
      config/ppo/Pyramids.yaml
  19. 42
      config/ppo/Reacher.yaml
  20. 56
      config/ppo/SoccerTwos.yaml
  21. 99
      config/ppo/StrikersVsGoalie.yaml
  22. 49
      config/ppo/Tennis.yaml
  23. 45
      config/ppo/VisualHallway.yaml
  24. 45
      config/ppo/VisualPushBlock.yaml
  25. 45
      config/ppo/VisualPyramids.yaml
  26. 42
      config/ppo/Walker.yaml
  27. 83
      config/ppo/WallJump.yaml
  28. 115
      config/ppo/WallJump_curriculum.yaml
  29. 42
      config/ppo/WormDynamic.yaml
  30. 42
      config/ppo/WormStatic.yaml
  31. 44
      config/sac/3DBall.yaml
  32. 44
      config/sac/3DBallHard.yaml
  33. 44
      config/sac/Basic.yaml
  34. 44
      config/sac/Bouncer.yaml
  35. 44
      config/sac/CrawlerDynamic.yaml
  36. 44
      config/sac/CrawlerStatic.yaml
  37. 44
      config/sac/FoodCollector.yaml
  38. 44
      config/sac/GridWorld.yaml
  39. 47
      config/sac/Hallway.yaml
  40. 44
      config/sac/PushBlock.yaml
  41. 48
      config/sac/Pyramids.yaml
  42. 44
      config/sac/Reacher.yaml
  43. 50
      config/sac/Tennis.yaml
  44. 48
      config/sac/VisualHallway.yaml
  45. 48
      config/sac/VisualPushBlock.yaml
  46. 48
      config/sac/VisualPyramids.yaml
  47. 44
      config/sac/Walker.yaml
  48. 87
      config/sac/WallJump.yaml
  49. 44
      config/sac/WormDynamic.yaml
  50. 44
      config/sac/WormStatic.yaml
  51. 13
      docs/Migrating.md
  52. 113
      docs/Training-Configuration-File.md
  53. 141
      docs/Training-ML-Agents.md
  54. 233
      ml-agents/mlagents/trainers/cli_utils.py
  55. 39
      ml-agents/mlagents/trainers/components/bc/module.py
  56. 29
      ml-agents/mlagents/trainers/components/reward_signals/__init__.py
  57. 36
      ml-agents/mlagents/trainers/components/reward_signals/curiosity/signal.py
  58. 12
      ml-agents/mlagents/trainers/components/reward_signals/extrinsic/signal.py
  59. 47
      ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py
  60. 22
      ml-agents/mlagents/trainers/components/reward_signals/reward_signal_factory.py
  61. 66
      ml-agents/mlagents/trainers/curriculum.py
  62. 22
      ml-agents/mlagents/trainers/ghost/trainer.py
  63. 377
      ml-agents/mlagents/trainers/learn.py
  64. 7
      ml-agents/mlagents/trainers/meta_curriculum.py
  65. 29
      ml-agents/mlagents/trainers/optimizer/tf_optimizer.py
  66. 11
      ml-agents/mlagents/trainers/policy/nn_policy.py
  67. 41
      ml-agents/mlagents/trainers/policy/tf_policy.py
  68. 33
      ml-agents/mlagents/trainers/ppo/optimizer.py
  69. 42
      ml-agents/mlagents/trainers/ppo/trainer.py
  70. 6
      ml-agents/mlagents/trainers/run_experiment.py
  71. 38
      ml-agents/mlagents/trainers/sac/optimizer.py
  72. 100
      ml-agents/mlagents/trainers/sac/trainer.py
  73. 39
      ml-agents/mlagents/trainers/tests/test_barracuda_converter.py
  74. 100
      ml-agents/mlagents/trainers/tests/test_bcmodule.py
  75. 94
      ml-agents/mlagents/trainers/tests/test_curriculum.py
  76. 36
      ml-agents/mlagents/trainers/tests/test_distributions.py
  77. 40
      ml-agents/mlagents/trainers/tests/test_ghost.py
  78. 133
      ml-agents/mlagents/trainers/tests/test_learn.py
  79. 55
      ml-agents/mlagents/trainers/tests/test_meta_curriculum.py
  80. 72
      ml-agents/mlagents/trainers/tests/test_nn_policy.py
  81. 11
      ml-agents/mlagents/trainers/tests/test_policy.py
  82. 99
      ml-agents/mlagents/trainers/tests/test_ppo.py
  83. 141
      ml-agents/mlagents/trainers/tests/test_reward_signals.py
  84. 18
      ml-agents/mlagents/trainers/tests/test_rl_trainer.py
  85. 83
      ml-agents/mlagents/trainers/tests/test_sac.py
  86. 388
      ml-agents/mlagents/trainers/tests/test_simple_rl.py
  87. 4
      ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py
  88. 274
      ml-agents/mlagents/trainers/tests/test_trainer_util.py
  89. 10
      ml-agents/mlagents/trainers/trainer/rl_trainer.py
  90. 28
      ml-agents/mlagents/trainers/trainer/trainer.py
  91. 8
      ml-agents/mlagents/trainers/trainer_controller.py
  92. 106
      ml-agents/mlagents/trainers/trainer_util.py
  93. 2
      ml-agents/setup.py
  94. 8
      ml-agents/tests/yamato/training_int_tests.py
  95. 12
      ml-agents/tests/yamato/yamato_utils.py
  96. 110
      config/upgrade_config.py
  97. 373
      ml-agents/mlagents/trainers/settings.py
  98. 151
      ml-agents/mlagents/trainers/tests/test_settings.py

2
com.unity.ml-agents/CHANGELOG.md


- Curriculum and Parameter Randomization configurations have been merged
into the main training configuration file. Note that this means training
configuration files are now environment-specific. (#3791)
- The format for trainer configuration has changed, and the "default" behavior has been deprecated.
See the [Migration Guide](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Migrating.md) for more details. (#3936)
- Training artifacts (trained models, summaries) are now found in the `results/`
directory. (#3829)
- Unity Player logs are now written out to the results directory. (#3877)

46
config/imitation/CrawlerStatic.yaml


behaviors:
CrawlerStatic:
trainer: ppo
batch_size: 2024
beta: 0.005
buffer_size: 20240
epsilon: 0.2
hidden_units: 512
lambd: 0.95
learning_rate: 0.0003
max_steps: 1e7
memory_size: 256
normalize: true
num_epoch: 3
num_layers: 3
time_horizon: 1000
sequence_length: 64
summary_freq: 30000
use_recurrent: false
trainer_type: ppo
hyperparameters:
batch_size: 2024
buffer_size: 20240
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: true
hidden_units: 512
num_layers: 3
vis_encode_type: simple
strength: 1.0
strength: 1.0
learning_rate: 0.0003
use_actions: false
use_vail: false
output_path: default
keep_checkpoints: 5
max_steps: 10000000
time_horizon: 1000
summary_freq: 30000
threaded: true
steps: 50000
steps: 50000
samples_per_update: 0

46
config/imitation/FoodCollector.yaml


behaviors:
FoodCollector:
trainer: ppo
batch_size: 64
beta: 0.005
buffer_size: 10240
epsilon: 0.2
hidden_units: 128
lambd: 0.95
learning_rate: 0.0003
max_steps: 2.0e6
memory_size: 256
normalize: false
num_epoch: 3
num_layers: 2
time_horizon: 64
sequence_length: 32
summary_freq: 10000
use_recurrent: false
trainer_type: ppo
hyperparameters:
batch_size: 64
buffer_size: 10240
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: false
hidden_units: 128
num_layers: 2
vis_encode_type: simple
strength: 0.1
strength: 0.1
learning_rate: 0.0003
use_actions: false
use_vail: false
output_path: default
keep_checkpoints: 5
max_steps: 2000000
time_horizon: 64
summary_freq: 10000
threaded: true
steps: 0
steps: 0
samples_per_update: 0

48
config/imitation/Hallway.yaml


behaviors:
Hallway:
trainer: ppo
batch_size: 128
beta: 0.01
buffer_size: 1024
epsilon: 0.2
hidden_units: 128
lambd: 0.95
learning_rate: 0.0003
max_steps: 1.0e7
memory_size: 256
normalize: false
num_epoch: 3
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 10000
use_recurrent: true
trainer_type: ppo
hyperparameters:
batch_size: 128
buffer_size: 1024
learning_rate: 0.0003
beta: 0.01
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: false
hidden_units: 128
num_layers: 2
vis_encode_type: simple
memory:
sequence_length: 64
memory_size: 256
strength: 1.0
strength: 1.0
gamma: 0.99
gamma: 0.99
learning_rate: 0.0003
use_actions: false
use_vail: false
output_path: default
keep_checkpoints: 5
max_steps: 10000000
time_horizon: 64
summary_freq: 10000
threaded: true

43
config/imitation/PushBlock.yaml


behaviors:
PushBlock:
trainer: ppo
batch_size: 128
beta: 0.01
buffer_size: 2048
epsilon: 0.2
hidden_units: 256
lambd: 0.95
learning_rate: 0.0003
max_steps: 1.5e7
memory_size: 256
normalize: false
num_epoch: 3
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 60000
use_recurrent: false
trainer_type: ppo
hyperparameters:
batch_size: 128
buffer_size: 2048
learning_rate: 0.0003
beta: 0.01
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: false
hidden_units: 256
num_layers: 2
vis_encode_type: simple
gamma: 0.99
gamma: 0.99
learning_rate: 0.0003
use_actions: false
use_vail: false
output_path: default
keep_checkpoints: 5
max_steps: 15000000
time_horizon: 64
summary_freq: 60000
threaded: true

30
config/imitation/Pyramids.yaml


behaviors:
Pyramids:
trainer: ppo
batch_size: 128
beta: 0.01
buffer_size: 2048
epsilon: 0.2
hidden_units: 512
lambd: 0.95
learning_rate: 0.0003
max_steps: 1.0e7
memory_size: 256
normalize: false
num_epoch: 3
num_layers: 2
trainer_type: ppo
sequence_length: 64
summary_freq: 30000
use_recurrent: false
max_steps: 1.0e7
hyperparameters:
batch_size: 128
beta: 0.01
buffer_size: 2048
epsilon: 0.2
lambd: 0.95
learning_rate: 0.0003
num_epoch: 3
network_settings:
num_layers: 2
normalize: false
hidden_units: 512
reward_signals:
extrinsic:
strength: 1.0

42
config/ppo/3DBall.yaml


behaviors:
3DBall:
trainer: ppo
batch_size: 64
beta: 0.001
buffer_size: 12000
epsilon: 0.2
hidden_units: 128
lambd: 0.99
learning_rate: 0.0003
learning_rate_schedule: linear
max_steps: 5.0e5
memory_size: 128
normalize: true
num_epoch: 3
num_layers: 2
time_horizon: 1000
sequence_length: 64
summary_freq: 12000
use_recurrent: false
vis_encode_type: simple
trainer_type: ppo
hyperparameters:
batch_size: 64
buffer_size: 12000
learning_rate: 0.0003
beta: 0.001
epsilon: 0.2
lambd: 0.99
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: true
hidden_units: 128
num_layers: 2
vis_encode_type: simple
gamma: 0.99
gamma: 0.99
output_path: default
keep_checkpoints: 5
max_steps: 500000
time_horizon: 1000
summary_freq: 12000
threaded: true

42
config/ppo/3DBallHard.yaml


behaviors:
3DBallHard:
trainer: ppo
batch_size: 1200
beta: 0.001
buffer_size: 12000
epsilon: 0.2
hidden_units: 128
lambd: 0.95
learning_rate: 0.0003
learning_rate_schedule: linear
max_steps: 5.0e6
memory_size: 128
normalize: true
num_epoch: 3
num_layers: 2
time_horizon: 1000
sequence_length: 64
summary_freq: 12000
use_recurrent: false
vis_encode_type: simple
trainer_type: ppo
hyperparameters:
batch_size: 1200
buffer_size: 12000
learning_rate: 0.0003
beta: 0.001
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: true
hidden_units: 128
num_layers: 2
vis_encode_type: simple
gamma: 0.995
gamma: 0.995
output_path: default
keep_checkpoints: 5
max_steps: 5000000
time_horizon: 1000
summary_freq: 12000
threaded: true

76
config/ppo/3DBall_randomize.yaml


behaviors:
3DBall:
trainer: ppo
batch_size: 64
beta: 0.001
buffer_size: 12000
epsilon: 0.2
hidden_units: 128
lambd: 0.99
learning_rate: 3.0e-4
learning_rate_schedule: linear
max_steps: 5.0e5
memory_size: 128
normalize: true
num_epoch: 3
num_layers: 2
time_horizon: 1000
sequence_length: 64
summary_freq: 12000
use_recurrent: false
vis_encode_type: simple
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.99
3DBall:
trainer_type: ppo
hyperparameters:
batch_size: 64
buffer_size: 12000
learning_rate: 0.0003
beta: 0.001
epsilon: 0.2
lambd: 0.99
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: true
hidden_units: 128
num_layers: 2
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 500000
time_horizon: 1000
summary_freq: 12000
threaded: true
resampling-interval: 5000
mass:
sampler-type: "uniform"
min_value: 0.5
max_value: 10
gravity:
sampler-type: "uniform"
min_value: 7
max_value: 12
scale:
sampler-type: "uniform"
min_value: 0.75
max_value: 3
resampling-interval: 5000
mass:
sampler-type: uniform
min_value: 0.5
max_value: 10
gravity:
sampler-type: uniform
min_value: 7
max_value: 12
scale:
sampler-type: uniform
min_value: 0.75
max_value: 3

42
config/ppo/Basic.yaml


behaviors:
Basic:
trainer: ppo
batch_size: 32
beta: 0.005
buffer_size: 256
epsilon: 0.2
hidden_units: 20
lambd: 0.95
learning_rate: 0.0003
learning_rate_schedule: linear
max_steps: 5.0e5
memory_size: 128
normalize: false
num_epoch: 3
num_layers: 1
time_horizon: 3
sequence_length: 64
summary_freq: 2000
use_recurrent: false
vis_encode_type: simple
trainer_type: ppo
hyperparameters:
batch_size: 32
buffer_size: 256
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: false
hidden_units: 20
num_layers: 1
vis_encode_type: simple
gamma: 0.9
gamma: 0.9
output_path: default
keep_checkpoints: 5
max_steps: 500000
time_horizon: 3
summary_freq: 2000
threaded: true

42
config/ppo/Bouncer.yaml


behaviors:
Bouncer:
trainer: ppo
batch_size: 1024
beta: 0.005
buffer_size: 10240
epsilon: 0.2
hidden_units: 64
lambd: 0.95
learning_rate: 0.0003
learning_rate_schedule: linear
max_steps: 4.0e6
memory_size: 128
normalize: true
num_epoch: 3
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 10000
use_recurrent: false
vis_encode_type: simple
trainer_type: ppo
hyperparameters:
batch_size: 1024
buffer_size: 10240
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: true
hidden_units: 64
num_layers: 2
vis_encode_type: simple
gamma: 0.99
gamma: 0.99
output_path: default
keep_checkpoints: 5
max_steps: 4000000
time_horizon: 64
summary_freq: 10000
threaded: true

42
config/ppo/CrawlerDynamic.yaml


behaviors:
CrawlerDynamic:
trainer: ppo
batch_size: 2024
beta: 0.005
buffer_size: 20240
epsilon: 0.2
hidden_units: 512
lambd: 0.95
learning_rate: 0.0003
learning_rate_schedule: linear
max_steps: 1e7
memory_size: 128
normalize: true
num_epoch: 3
num_layers: 3
time_horizon: 1000
sequence_length: 64
summary_freq: 30000
use_recurrent: false
vis_encode_type: simple
trainer_type: ppo
hyperparameters:
batch_size: 2024
buffer_size: 20240
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: true
hidden_units: 512
num_layers: 3
vis_encode_type: simple
gamma: 0.995
gamma: 0.995
output_path: default
keep_checkpoints: 5
max_steps: 10000000
time_horizon: 1000
summary_freq: 30000
threaded: true

42
config/ppo/CrawlerStatic.yaml


behaviors:
CrawlerStatic:
trainer: ppo
batch_size: 2024
beta: 0.005
buffer_size: 20240
epsilon: 0.2
hidden_units: 512
lambd: 0.95
learning_rate: 0.0003
learning_rate_schedule: linear
max_steps: 1e7
memory_size: 128
normalize: true
num_epoch: 3
num_layers: 3
time_horizon: 1000
sequence_length: 64
summary_freq: 30000
use_recurrent: false
vis_encode_type: simple
trainer_type: ppo
hyperparameters:
batch_size: 2024
buffer_size: 20240
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: true
hidden_units: 512
num_layers: 3
vis_encode_type: simple
gamma: 0.995
gamma: 0.995
output_path: default
keep_checkpoints: 5
max_steps: 10000000
time_horizon: 1000
summary_freq: 30000
threaded: true

42
config/ppo/FoodCollector.yaml


behaviors:
FoodCollector:
trainer: ppo
batch_size: 1024
beta: 0.005
buffer_size: 10240
epsilon: 0.2
hidden_units: 128
lambd: 0.95
learning_rate: 0.0003
learning_rate_schedule: linear
max_steps: 2.0e6
memory_size: 128
normalize: false
num_epoch: 3
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 10000
use_recurrent: false
vis_encode_type: simple
trainer_type: ppo
hyperparameters:
batch_size: 1024
buffer_size: 10240
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: false
hidden_units: 128
num_layers: 2
vis_encode_type: simple
gamma: 0.99
gamma: 0.99
output_path: default
keep_checkpoints: 5
max_steps: 2000000
time_horizon: 64
summary_freq: 10000
threaded: true

42
config/ppo/GridWorld.yaml


behaviors:
GridWorld:
trainer: ppo
batch_size: 32
beta: 0.005
buffer_size: 256
epsilon: 0.2
hidden_units: 256
lambd: 0.95
learning_rate: 0.0003
learning_rate_schedule: linear
max_steps: 500000
memory_size: 128
normalize: false
num_epoch: 3
num_layers: 1
time_horizon: 5
sequence_length: 64
summary_freq: 20000
use_recurrent: false
vis_encode_type: simple
trainer_type: ppo
hyperparameters:
batch_size: 32
buffer_size: 256
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: false
hidden_units: 256
num_layers: 1
vis_encode_type: simple
gamma: 0.9
gamma: 0.9
output_path: default
keep_checkpoints: 5
max_steps: 500000
time_horizon: 5
summary_freq: 20000
threaded: true

45
config/ppo/Hallway.yaml


behaviors:
Hallway:
trainer: ppo
batch_size: 128
beta: 0.01
buffer_size: 1024
epsilon: 0.2
hidden_units: 128
lambd: 0.95
learning_rate: 0.0003
learning_rate_schedule: linear
max_steps: 1.0e7
memory_size: 128
normalize: false
num_epoch: 3
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 10000
use_recurrent: true
vis_encode_type: simple
trainer_type: ppo
hyperparameters:
batch_size: 128
buffer_size: 1024
learning_rate: 0.0003
beta: 0.01
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: false
hidden_units: 128
num_layers: 2
vis_encode_type: simple
memory:
sequence_length: 64
memory_size: 128
gamma: 0.99
gamma: 0.99
output_path: default
keep_checkpoints: 5
max_steps: 10000000
time_horizon: 64
summary_freq: 10000
threaded: true

42
config/ppo/PushBlock.yaml


behaviors:
PushBlock:
trainer: ppo
batch_size: 128
beta: 0.01
buffer_size: 2048
epsilon: 0.2
hidden_units: 256
lambd: 0.95
learning_rate: 0.0003
learning_rate_schedule: linear
max_steps: 2.0e6
memory_size: 128
normalize: false
num_epoch: 3
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 60000
use_recurrent: false
vis_encode_type: simple
trainer_type: ppo
hyperparameters:
batch_size: 128
buffer_size: 2048
learning_rate: 0.0003
beta: 0.01
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: false
hidden_units: 256
num_layers: 2
vis_encode_type: simple
gamma: 0.99
gamma: 0.99
output_path: default
keep_checkpoints: 5
max_steps: 2000000
time_horizon: 64
summary_freq: 60000
threaded: true

45
config/ppo/Pyramids.yaml


behaviors:
Pyramids:
trainer: ppo
batch_size: 128
beta: 0.01
buffer_size: 2048
epsilon: 0.2
hidden_units: 512
lambd: 0.95
learning_rate: 0.0003
learning_rate_schedule: linear
max_steps: 1.0e7
memory_size: 128
normalize: false
num_epoch: 3
num_layers: 2
time_horizon: 128
sequence_length: 64
summary_freq: 30000
use_recurrent: false
vis_encode_type: simple
trainer_type: ppo
hyperparameters:
batch_size: 128
buffer_size: 2048
learning_rate: 0.0003
beta: 0.01
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: false
hidden_units: 512
num_layers: 2
vis_encode_type: simple
strength: 1.0
strength: 1.0
strength: 0.02
strength: 0.02
learning_rate: 0.0003
output_path: default
keep_checkpoints: 5
max_steps: 10000000
time_horizon: 128
summary_freq: 30000
threaded: true

42
config/ppo/Reacher.yaml


behaviors:
Reacher:
trainer: ppo
batch_size: 2024
beta: 0.005
buffer_size: 20240
epsilon: 0.2
hidden_units: 128
lambd: 0.95
learning_rate: 0.0003
learning_rate_schedule: linear
max_steps: 2e7
memory_size: 128
normalize: true
num_epoch: 3
num_layers: 2
time_horizon: 1000
sequence_length: 64
summary_freq: 60000
use_recurrent: false
vis_encode_type: simple
trainer_type: ppo
hyperparameters:
batch_size: 2024
buffer_size: 20240
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: true
hidden_units: 128
num_layers: 2
vis_encode_type: simple
gamma: 0.995
gamma: 0.995
output_path: default
keep_checkpoints: 5
max_steps: 20000000
time_horizon: 1000
summary_freq: 60000
threaded: true

56
config/ppo/SoccerTwos.yaml


behaviors:
SoccerTwos:
trainer: ppo
batch_size: 2048
beta: 0.005
buffer_size: 20480
epsilon: 0.2
hidden_units: 512
lambd: 0.95
learning_rate: 0.0003
learning_rate_schedule: constant
max_steps: 5.0e7
memory_size: 128
normalize: false
num_epoch: 3
num_layers: 2
time_horizon: 1000
sequence_length: 64
summary_freq: 10000
use_recurrent: false
vis_encode_type: simple
trainer_type: ppo
hyperparameters:
batch_size: 2048
buffer_size: 20480
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: constant
network_settings:
normalize: false
hidden_units: 512
num_layers: 2
vis_encode_type: simple
strength: 1.0
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 50000000
time_horizon: 1000
summary_freq: 10000
threaded: true
window: 10
play_against_latest_model_ratio: 0.5
swap_steps: 50000
curriculum:
measure: progress
thresholds: [0.05, 0.1]
min_lesson_length: 100
signal_smoothing: true
parameters:
ball_touch: [1.0, 0.5, 0.0]
swap_steps: 50000
window: 10
play_against_latest_model_ratio: 0.5
initial_elo: 1200.0

99
config/ppo/StrikersVsGoalie.yaml


behaviors:
Goalie:
trainer: ppo
batch_size: 2048
beta: 0.005
buffer_size: 20480
epsilon: 0.2
hidden_units: 512
lambd: 0.95
learning_rate: 0.0003
learning_rate_schedule: constant
max_steps: 5.0e7
memory_size: 128
normalize: false
num_epoch: 3
num_layers: 2
time_horizon: 1000
sequence_length: 64
summary_freq: 10000
use_recurrent: false
vis_encode_type: simple
trainer_type: ppo
hyperparameters:
batch_size: 2048
buffer_size: 20480
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: constant
network_settings:
normalize: false
hidden_units: 512
num_layers: 2
vis_encode_type: simple
gamma: 0.99
gamma: 0.99
output_path: default
keep_checkpoints: 5
max_steps: 50000000
time_horizon: 1000
summary_freq: 10000
threaded: true
window: 10
play_against_latest_model_ratio: 0.5
swap_steps: 25000
swap_steps: 25000
window: 10
play_against_latest_model_ratio: 0.5
initial_elo: 1200.0
trainer: ppo
batch_size: 2048
beta: 0.005
buffer_size: 20480
epsilon: 0.2
hidden_units: 512
lambd: 0.95
learning_rate: 0.0003
learning_rate_schedule: constant
max_steps: 5.0e7
memory_size: 128
normalize: false
num_epoch: 3
num_layers: 2
time_horizon: 1000
sequence_length: 64
summary_freq: 10000
use_recurrent: false
vis_encode_type: simple
trainer_type: ppo
hyperparameters:
batch_size: 2048
buffer_size: 20480
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: constant
network_settings:
normalize: false
hidden_units: 512
num_layers: 2
vis_encode_type: simple
gamma: 0.99
gamma: 0.99
output_path: default
keep_checkpoints: 5
max_steps: 50000000
time_horizon: 1000
summary_freq: 10000
threaded: true
window: 10
play_against_latest_model_ratio: 0.5
team_change: 200000
team_change: 200000
window: 10
play_against_latest_model_ratio: 0.5
initial_elo: 1200.0

49
config/ppo/Tennis.yaml


behaviors:
Tennis:
trainer: ppo
batch_size: 1024
beta: 0.005
buffer_size: 10240
epsilon: 0.2
hidden_units: 256
lambd: 0.95
learning_rate: 0.0003
learning_rate_schedule: constant
max_steps: 5.0e7
memory_size: 128
normalize: true
num_epoch: 3
num_layers: 2
time_horizon: 1000
sequence_length: 64
summary_freq: 10000
use_recurrent: false
vis_encode_type: simple
trainer_type: ppo
hyperparameters:
batch_size: 1024
buffer_size: 10240
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: constant
network_settings:
normalize: true
hidden_units: 256
num_layers: 2
vis_encode_type: simple
gamma: 0.99
gamma: 0.99
output_path: default
keep_checkpoints: 5
max_steps: 50000000
time_horizon: 1000
summary_freq: 10000
threaded: true
window: 10
play_against_latest_model_ratio: 0.5
team_change: 100000
team_change: 100000
window: 10
play_against_latest_model_ratio: 0.5
initial_elo: 1200.0

45
config/ppo/VisualHallway.yaml


behaviors:
VisualHallway:
trainer: ppo
batch_size: 64
beta: 0.01
buffer_size: 1024
epsilon: 0.2
hidden_units: 128
lambd: 0.95
learning_rate: 0.0003
learning_rate_schedule: linear
max_steps: 1.0e7
memory_size: 128
normalize: false
num_epoch: 3
num_layers: 1
time_horizon: 64
sequence_length: 64
summary_freq: 10000
use_recurrent: true
vis_encode_type: simple
trainer_type: ppo
hyperparameters:
batch_size: 64
buffer_size: 1024
learning_rate: 0.0003
beta: 0.01
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: false
hidden_units: 128
num_layers: 1
vis_encode_type: simple
memory:
sequence_length: 64
memory_size: 128
gamma: 0.99
gamma: 0.99
output_path: default
keep_checkpoints: 5
max_steps: 10000000
time_horizon: 64
summary_freq: 10000
threaded: true

45
config/ppo/VisualPushBlock.yaml


behaviors:
VisualPushBlock:
trainer: ppo
batch_size: 64
beta: 0.01
buffer_size: 1024
epsilon: 0.2
hidden_units: 128
lambd: 0.95
learning_rate: 0.0003
learning_rate_schedule: linear
max_steps: 3.0e6
memory_size: 128
normalize: false
num_epoch: 3
num_layers: 1
time_horizon: 64
sequence_length: 32
summary_freq: 60000
use_recurrent: true
vis_encode_type: simple
trainer_type: ppo
hyperparameters:
batch_size: 64
buffer_size: 1024
learning_rate: 0.0003
beta: 0.01
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: false
hidden_units: 128
num_layers: 1
vis_encode_type: simple
memory:
sequence_length: 32
memory_size: 128
gamma: 0.99
gamma: 0.99
output_path: default
keep_checkpoints: 5
max_steps: 3000000
time_horizon: 64
summary_freq: 60000
threaded: true

45
config/ppo/VisualPyramids.yaml


behaviors:
VisualPyramids:
trainer: ppo
batch_size: 64
beta: 0.01
buffer_size: 2024
epsilon: 0.2
hidden_units: 256
lambd: 0.95
learning_rate: 0.0003
learning_rate_schedule: linear
max_steps: 1.0e7
memory_size: 128
normalize: false
num_epoch: 3
num_layers: 1
time_horizon: 128
sequence_length: 64
summary_freq: 10000
use_recurrent: false
vis_encode_type: simple
trainer_type: ppo
hyperparameters:
batch_size: 64
buffer_size: 2024
learning_rate: 0.0003
beta: 0.01
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: false
hidden_units: 256
num_layers: 1
vis_encode_type: simple
strength: 1.0
strength: 1.0
strength: 0.01
strength: 0.01
learning_rate: 0.0003
output_path: default
keep_checkpoints: 5
max_steps: 10000000
time_horizon: 128
summary_freq: 10000
threaded: true

42
config/ppo/Walker.yaml


behaviors:
Walker:
trainer: ppo
batch_size: 2048
beta: 0.005
buffer_size: 20480
epsilon: 0.2
hidden_units: 512
lambd: 0.95
learning_rate: 0.0003
learning_rate_schedule: linear
max_steps: 2e7
memory_size: 128
normalize: true
num_epoch: 3
num_layers: 3
time_horizon: 1000
sequence_length: 64
summary_freq: 30000
use_recurrent: false
vis_encode_type: simple
trainer_type: ppo
hyperparameters:
batch_size: 2048
buffer_size: 20480
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: true
hidden_units: 512
num_layers: 3
vis_encode_type: simple
gamma: 0.995
gamma: 0.995
output_path: default
keep_checkpoints: 5
max_steps: 20000000
time_horizon: 1000
summary_freq: 30000
threaded: true

83
config/ppo/WallJump.yaml


behaviors:
BigWallJump:
trainer: ppo
batch_size: 128
beta: 0.005
buffer_size: 2048
epsilon: 0.2
hidden_units: 256
lambd: 0.95
learning_rate: 0.0003
learning_rate_schedule: linear
max_steps: 2e7
memory_size: 128
normalize: false
num_epoch: 3
num_layers: 2
time_horizon: 128
sequence_length: 64
summary_freq: 20000
use_recurrent: false
vis_encode_type: simple
trainer_type: ppo
hyperparameters:
batch_size: 128
buffer_size: 2048
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: false
hidden_units: 256
num_layers: 2
vis_encode_type: simple
strength: 1.0
SmallWallJump:
trainer: ppo
batch_size: 128
beta: 0.005
buffer_size: 2048
epsilon: 0.2
hidden_units: 256
lambd: 0.95
learning_rate: 0.0003
learning_rate_schedule: linear
max_steps: 5e6
memory_size: 128
normalize: false
num_epoch: 3
num_layers: 2
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 20000000
sequence_length: 64
use_recurrent: false
vis_encode_type: simple
threaded: true
SmallWallJump:
trainer_type: ppo
hyperparameters:
batch_size: 128
buffer_size: 2048
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: false
hidden_units: 256
num_layers: 2
vis_encode_type: simple
gamma: 0.99
gamma: 0.99
output_path: default
keep_checkpoints: 5
max_steps: 5000000
time_horizon: 128
summary_freq: 20000
threaded: true

115
config/ppo/WallJump_curriculum.yaml


behaviors:
BigWallJump:
trainer: ppo
batch_size: 128
beta: 0.005
buffer_size: 2048
epsilon: 0.2
hidden_units: 256
lambd: 0.95
learning_rate: 0.0003
learning_rate_schedule: linear
max_steps: 2e7
memory_size: 128
normalize: false
num_epoch: 3
num_layers: 2
time_horizon: 128
sequence_length: 64
summary_freq: 20000
use_recurrent: false
vis_encode_type: simple
trainer_type: ppo
hyperparameters:
batch_size: 128
buffer_size: 2048
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: false
hidden_units: 256
num_layers: 2
vis_encode_type: simple
gamma: 0.99
gamma: 0.99
curriculum:
measure: progress
thresholds: [0.1, 0.3, 0.5]
min_lesson_length: 100
signal_smoothing: true
parameters:
big_wall_min_height: [0.0, 4.0, 6.0, 8.0]
big_wall_max_height: [4.0, 7.0, 8.0, 8.0]
SmallWallJump:
trainer: ppo
batch_size: 128
beta: 0.005
buffer_size: 2048
epsilon: 0.2
hidden_units: 256
lambd: 0.95
learning_rate: 0.0003
learning_rate_schedule: linear
max_steps: 5e6
memory_size: 128
normalize: false
num_epoch: 3
num_layers: 2
output_path: default
keep_checkpoints: 5
max_steps: 20000000
sequence_length: 64
use_recurrent: false
vis_encode_type: simple
threaded: true
SmallWallJump:
trainer_type: ppo
hyperparameters:
batch_size: 128
buffer_size: 2048
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: false
hidden_units: 256
num_layers: 2
vis_encode_type: simple
gamma: 0.99
gamma: 0.99
curriculum:
measure: progress
thresholds: [0.1, 0.3, 0.5]
min_lesson_length: 100
signal_smoothing: true
parameters:
small_wall_height: [1.5, 2.0, 2.5, 4.0]
output_path: default
keep_checkpoints: 5
max_steps: 5000000
time_horizon: 128
summary_freq: 20000
threaded: true
curriculum:
BigWallJump:
measure: progress
thresholds: [0.1, 0.3, 0.5]
min_lesson_length: 100
signal_smoothing: true
parameters:
big_wall_min_height: [0.0, 4.0, 6.0, 8.0]
big_wall_max_height: [4.0, 7.0, 8.0, 8.0]
SmallWallJump:
measure: progress
thresholds: [0.1, 0.3, 0.5]
min_lesson_length: 100
signal_smoothing: true
parameters:
small_wall_height: [1.5, 2.0, 2.5, 4.0]

42
config/ppo/WormDynamic.yaml


behaviors:
WormDynamic:
trainer: ppo
batch_size: 2024
beta: 0.005
buffer_size: 20240
epsilon: 0.2
hidden_units: 512
lambd: 0.95
learning_rate: 0.0003
learning_rate_schedule: linear
max_steps: 3.5e6
memory_size: 128
normalize: true
num_epoch: 3
num_layers: 3
time_horizon: 1000
sequence_length: 64
summary_freq: 30000
use_recurrent: false
vis_encode_type: simple
trainer_type: ppo
hyperparameters:
batch_size: 2024
buffer_size: 20240
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: true
hidden_units: 512
num_layers: 3
vis_encode_type: simple
gamma: 0.995
gamma: 0.995
output_path: default
keep_checkpoints: 5
max_steps: 3500000
time_horizon: 1000
summary_freq: 30000
threaded: true

42
config/ppo/WormStatic.yaml