GitHub
5 年前
当前提交
e92b4f88
共有 98 个文件被更改,包括 3088 次插入 和 3176 次删除
-
2com.unity.ml-agents/CHANGELOG.md
-
46config/imitation/CrawlerStatic.yaml
-
46config/imitation/FoodCollector.yaml
-
48config/imitation/Hallway.yaml
-
43config/imitation/PushBlock.yaml
-
30config/imitation/Pyramids.yaml
-
42config/ppo/3DBall.yaml
-
42config/ppo/3DBallHard.yaml
-
76config/ppo/3DBall_randomize.yaml
-
42config/ppo/Basic.yaml
-
42config/ppo/Bouncer.yaml
-
42config/ppo/CrawlerDynamic.yaml
-
42config/ppo/CrawlerStatic.yaml
-
42config/ppo/FoodCollector.yaml
-
42config/ppo/GridWorld.yaml
-
45config/ppo/Hallway.yaml
-
42config/ppo/PushBlock.yaml
-
45config/ppo/Pyramids.yaml
-
42config/ppo/Reacher.yaml
-
56config/ppo/SoccerTwos.yaml
-
99config/ppo/StrikersVsGoalie.yaml
-
49config/ppo/Tennis.yaml
-
45config/ppo/VisualHallway.yaml
-
45config/ppo/VisualPushBlock.yaml
-
45config/ppo/VisualPyramids.yaml
-
42config/ppo/Walker.yaml
-
83config/ppo/WallJump.yaml
-
115config/ppo/WallJump_curriculum.yaml
-
42config/ppo/WormDynamic.yaml
-
42config/ppo/WormStatic.yaml
-
44config/sac/3DBall.yaml
-
44config/sac/3DBallHard.yaml
-
44config/sac/Basic.yaml
-
44config/sac/Bouncer.yaml
-
44config/sac/CrawlerDynamic.yaml
-
44config/sac/CrawlerStatic.yaml
-
44config/sac/FoodCollector.yaml
-
44config/sac/GridWorld.yaml
-
47config/sac/Hallway.yaml
-
44config/sac/PushBlock.yaml
-
48config/sac/Pyramids.yaml
-
44config/sac/Reacher.yaml
-
50config/sac/Tennis.yaml
-
48config/sac/VisualHallway.yaml
-
48config/sac/VisualPushBlock.yaml
-
48config/sac/VisualPyramids.yaml
-
44config/sac/Walker.yaml
-
87config/sac/WallJump.yaml
-
44config/sac/WormDynamic.yaml
-
44config/sac/WormStatic.yaml
-
13docs/Migrating.md
-
113docs/Training-Configuration-File.md
-
141docs/Training-ML-Agents.md
-
233ml-agents/mlagents/trainers/cli_utils.py
-
39ml-agents/mlagents/trainers/components/bc/module.py
-
29ml-agents/mlagents/trainers/components/reward_signals/__init__.py
-
36ml-agents/mlagents/trainers/components/reward_signals/curiosity/signal.py
-
12ml-agents/mlagents/trainers/components/reward_signals/extrinsic/signal.py
-
47ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py
-
22ml-agents/mlagents/trainers/components/reward_signals/reward_signal_factory.py
-
66ml-agents/mlagents/trainers/curriculum.py
-
22ml-agents/mlagents/trainers/ghost/trainer.py
-
377ml-agents/mlagents/trainers/learn.py
-
7ml-agents/mlagents/trainers/meta_curriculum.py
-
29ml-agents/mlagents/trainers/optimizer/tf_optimizer.py
-
11ml-agents/mlagents/trainers/policy/nn_policy.py
-
41ml-agents/mlagents/trainers/policy/tf_policy.py
-
33ml-agents/mlagents/trainers/ppo/optimizer.py
-
42ml-agents/mlagents/trainers/ppo/trainer.py
-
6ml-agents/mlagents/trainers/run_experiment.py
-
38ml-agents/mlagents/trainers/sac/optimizer.py
-
100ml-agents/mlagents/trainers/sac/trainer.py
-
39ml-agents/mlagents/trainers/tests/test_barracuda_converter.py
-
100ml-agents/mlagents/trainers/tests/test_bcmodule.py
-
94ml-agents/mlagents/trainers/tests/test_curriculum.py
-
36ml-agents/mlagents/trainers/tests/test_distributions.py
-
40ml-agents/mlagents/trainers/tests/test_ghost.py
-
133ml-agents/mlagents/trainers/tests/test_learn.py
-
55ml-agents/mlagents/trainers/tests/test_meta_curriculum.py
-
72ml-agents/mlagents/trainers/tests/test_nn_policy.py
-
11ml-agents/mlagents/trainers/tests/test_policy.py
-
99ml-agents/mlagents/trainers/tests/test_ppo.py
-
141ml-agents/mlagents/trainers/tests/test_reward_signals.py
-
18ml-agents/mlagents/trainers/tests/test_rl_trainer.py
-
83ml-agents/mlagents/trainers/tests/test_sac.py
-
388ml-agents/mlagents/trainers/tests/test_simple_rl.py
-
4ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py
-
274ml-agents/mlagents/trainers/tests/test_trainer_util.py
-
10ml-agents/mlagents/trainers/trainer/rl_trainer.py
-
28ml-agents/mlagents/trainers/trainer/trainer.py
-
8ml-agents/mlagents/trainers/trainer_controller.py
-
106ml-agents/mlagents/trainers/trainer_util.py
-
2ml-agents/setup.py
-
8ml-agents/tests/yamato/training_int_tests.py
-
12ml-agents/tests/yamato/yamato_utils.py
-
110config/upgrade_config.py
-
373ml-agents/mlagents/trainers/settings.py
-
151ml-agents/mlagents/trainers/tests/test_settings.py
|
|||
behaviors: |
|||
CrawlerStatic: |
|||
trainer: ppo |
|||
batch_size: 2024 |
|||
beta: 0.005 |
|||
buffer_size: 20240 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
max_steps: 1e7 |
|||
memory_size: 256 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 3 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
use_recurrent: false |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 2024 |
|||
buffer_size: 20240 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 512 |
|||
num_layers: 3 |
|||
vis_encode_type: simple |
|||
strength: 1.0 |
|||
strength: 1.0 |
|||
learning_rate: 0.0003 |
|||
use_actions: false |
|||
use_vail: false |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 10000000 |
|||
time_horizon: 1000 |
|||
summary_freq: 30000 |
|||
threaded: true |
|||
steps: 50000 |
|||
steps: 50000 |
|||
samples_per_update: 0 |
|
|||
behaviors: |
|||
FoodCollector: |
|||
trainer: ppo |
|||
batch_size: 64 |
|||
beta: 0.005 |
|||
buffer_size: 10240 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
max_steps: 2.0e6 |
|||
memory_size: 256 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 32 |
|||
summary_freq: 10000 |
|||
use_recurrent: false |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 64 |
|||
buffer_size: 10240 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 128 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
strength: 0.1 |
|||
strength: 0.1 |
|||
learning_rate: 0.0003 |
|||
use_actions: false |
|||
use_vail: false |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 2000000 |
|||
time_horizon: 64 |
|||
summary_freq: 10000 |
|||
threaded: true |
|||
steps: 0 |
|||
steps: 0 |
|||
samples_per_update: 0 |
|
|||
behaviors: |
|||
Hallway: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.01 |
|||
buffer_size: 1024 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
max_steps: 1.0e7 |
|||
memory_size: 256 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: true |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 128 |
|||
buffer_size: 1024 |
|||
learning_rate: 0.0003 |
|||
beta: 0.01 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 128 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
memory: |
|||
sequence_length: 64 |
|||
memory_size: 256 |
|||
strength: 1.0 |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
learning_rate: 0.0003 |
|||
use_actions: false |
|||
use_vail: false |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 10000000 |
|||
time_horizon: 64 |
|||
summary_freq: 10000 |
|||
threaded: true |
|
|||
behaviors: |
|||
PushBlock: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.01 |
|||
buffer_size: 2048 |
|||
epsilon: 0.2 |
|||
hidden_units: 256 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
max_steps: 1.5e7 |
|||
memory_size: 256 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 60000 |
|||
use_recurrent: false |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 128 |
|||
buffer_size: 2048 |
|||
learning_rate: 0.0003 |
|||
beta: 0.01 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 256 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
learning_rate: 0.0003 |
|||
use_actions: false |
|||
use_vail: false |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 15000000 |
|||
time_horizon: 64 |
|||
summary_freq: 60000 |
|||
threaded: true |
|
|||
behaviors: |
|||
3DBall: |
|||
trainer: ppo |
|||
batch_size: 64 |
|||
beta: 0.001 |
|||
buffer_size: 12000 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.99 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 5.0e5 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 12000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 64 |
|||
buffer_size: 12000 |
|||
learning_rate: 0.0003 |
|||
beta: 0.001 |
|||
epsilon: 0.2 |
|||
lambd: 0.99 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 128 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 500000 |
|||
time_horizon: 1000 |
|||
summary_freq: 12000 |
|||
threaded: true |
|
|||
behaviors: |
|||
3DBallHard: |
|||
trainer: ppo |
|||
batch_size: 1200 |
|||
beta: 0.001 |
|||
buffer_size: 12000 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 5.0e6 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 12000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 1200 |
|||
buffer_size: 12000 |
|||
learning_rate: 0.0003 |
|||
beta: 0.001 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 128 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
gamma: 0.995 |
|||
gamma: 0.995 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 5000000 |
|||
time_horizon: 1000 |
|||
summary_freq: 12000 |
|||
threaded: true |
|
|||
behaviors: |
|||
3DBall: |
|||
trainer: ppo |
|||
batch_size: 64 |
|||
beta: 0.001 |
|||
buffer_size: 12000 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.99 |
|||
learning_rate: 3.0e-4 |
|||
learning_rate_schedule: linear |
|||
max_steps: 5.0e5 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 12000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
3DBall: |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 64 |
|||
buffer_size: 12000 |
|||
learning_rate: 0.0003 |
|||
beta: 0.001 |
|||
epsilon: 0.2 |
|||
lambd: 0.99 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 128 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
gamma: 0.99 |
|||
strength: 1.0 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 500000 |
|||
time_horizon: 1000 |
|||
summary_freq: 12000 |
|||
threaded: true |
|||
resampling-interval: 5000 |
|||
mass: |
|||
sampler-type: "uniform" |
|||
min_value: 0.5 |
|||
max_value: 10 |
|||
gravity: |
|||
sampler-type: "uniform" |
|||
min_value: 7 |
|||
max_value: 12 |
|||
scale: |
|||
sampler-type: "uniform" |
|||
min_value: 0.75 |
|||
max_value: 3 |
|||
resampling-interval: 5000 |
|||
mass: |
|||
sampler-type: uniform |
|||
min_value: 0.5 |
|||
max_value: 10 |
|||
gravity: |
|||
sampler-type: uniform |
|||
min_value: 7 |
|||
max_value: 12 |
|||
scale: |
|||
sampler-type: uniform |
|||
min_value: 0.75 |
|||
max_value: 3 |
|
|||
behaviors: |
|||
Basic: |
|||
trainer: ppo |
|||
batch_size: 32 |
|||
beta: 0.005 |
|||
buffer_size: 256 |
|||
epsilon: 0.2 |
|||
hidden_units: 20 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 5.0e5 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 1 |
|||
time_horizon: 3 |
|||
sequence_length: 64 |
|||
summary_freq: 2000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 32 |
|||
buffer_size: 256 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 20 |
|||
num_layers: 1 |
|||
vis_encode_type: simple |
|||
gamma: 0.9 |
|||
gamma: 0.9 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 500000 |
|||
time_horizon: 3 |
|||
summary_freq: 2000 |
|||
threaded: true |
|
|||
behaviors: |
|||
Bouncer: |
|||
trainer: ppo |
|||
batch_size: 1024 |
|||
beta: 0.005 |
|||
buffer_size: 10240 |
|||
epsilon: 0.2 |
|||
hidden_units: 64 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 4.0e6 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 1024 |
|||
buffer_size: 10240 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 64 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 4000000 |
|||
time_horizon: 64 |
|||
summary_freq: 10000 |
|||
threaded: true |
|
|||
behaviors: |
|||
CrawlerDynamic: |
|||
trainer: ppo |
|||
batch_size: 2024 |
|||
beta: 0.005 |
|||
buffer_size: 20240 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 1e7 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 3 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 2024 |
|||
buffer_size: 20240 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 512 |
|||
num_layers: 3 |
|||
vis_encode_type: simple |
|||
gamma: 0.995 |
|||
gamma: 0.995 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 10000000 |
|||
time_horizon: 1000 |
|||
summary_freq: 30000 |
|||
threaded: true |
|
|||
behaviors: |
|||
CrawlerStatic: |
|||
trainer: ppo |
|||
batch_size: 2024 |
|||
beta: 0.005 |
|||
buffer_size: 20240 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 1e7 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 3 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 2024 |
|||
buffer_size: 20240 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 512 |
|||
num_layers: 3 |
|||
vis_encode_type: simple |
|||
gamma: 0.995 |
|||
gamma: 0.995 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 10000000 |
|||
time_horizon: 1000 |
|||
summary_freq: 30000 |
|||
threaded: true |
|
|||
behaviors: |
|||
FoodCollector: |
|||
trainer: ppo |
|||
batch_size: 1024 |
|||
beta: 0.005 |
|||
buffer_size: 10240 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 2.0e6 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 1024 |
|||
buffer_size: 10240 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 128 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 2000000 |
|||
time_horizon: 64 |
|||
summary_freq: 10000 |
|||
threaded: true |
|
|||
behaviors: |
|||
GridWorld: |
|||
trainer: ppo |
|||
batch_size: 32 |
|||
beta: 0.005 |
|||
buffer_size: 256 |
|||
epsilon: 0.2 |
|||
hidden_units: 256 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 500000 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 1 |
|||
time_horizon: 5 |
|||
sequence_length: 64 |
|||
summary_freq: 20000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 32 |
|||
buffer_size: 256 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 256 |
|||
num_layers: 1 |
|||
vis_encode_type: simple |
|||
gamma: 0.9 |
|||
gamma: 0.9 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 500000 |
|||
time_horizon: 5 |
|||
summary_freq: 20000 |
|||
threaded: true |
|
|||
behaviors: |
|||
Hallway: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.01 |
|||
buffer_size: 1024 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 1.0e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: true |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 128 |
|||
buffer_size: 1024 |
|||
learning_rate: 0.0003 |
|||
beta: 0.01 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 128 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
memory: |
|||
sequence_length: 64 |
|||
memory_size: 128 |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 10000000 |
|||
time_horizon: 64 |
|||
summary_freq: 10000 |
|||
threaded: true |
|
|||
behaviors: |
|||
PushBlock: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.01 |
|||
buffer_size: 2048 |
|||
epsilon: 0.2 |
|||
hidden_units: 256 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 2.0e6 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 60000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 128 |
|||
buffer_size: 2048 |
|||
learning_rate: 0.0003 |
|||
beta: 0.01 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 256 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 2000000 |
|||
time_horizon: 64 |
|||
summary_freq: 60000 |
|||
threaded: true |
|
|||
behaviors: |
|||
Pyramids: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.01 |
|||
buffer_size: 2048 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 1.0e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 128 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 128 |
|||
buffer_size: 2048 |
|||
learning_rate: 0.0003 |
|||
beta: 0.01 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 512 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
strength: 1.0 |
|||
strength: 1.0 |
|||
strength: 0.02 |
|||
strength: 0.02 |
|||
learning_rate: 0.0003 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 10000000 |
|||
time_horizon: 128 |
|||
summary_freq: 30000 |
|||
threaded: true |
|
|||
behaviors: |
|||
Reacher: |
|||
trainer: ppo |
|||
batch_size: 2024 |
|||
beta: 0.005 |
|||
buffer_size: 20240 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 2e7 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 60000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 2024 |
|||
buffer_size: 20240 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 128 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
gamma: 0.995 |
|||
gamma: 0.995 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 20000000 |
|||
time_horizon: 1000 |
|||
summary_freq: 60000 |
|||
threaded: true |
|
|||
behaviors: |
|||
SoccerTwos: |
|||
trainer: ppo |
|||
batch_size: 2048 |
|||
beta: 0.005 |
|||
buffer_size: 20480 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 5.0e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 2048 |
|||
buffer_size: 20480 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: constant |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 512 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
strength: 1.0 |
|||
strength: 1.0 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 50000000 |
|||
time_horizon: 1000 |
|||
summary_freq: 10000 |
|||
threaded: true |
|||
window: 10 |
|||
play_against_latest_model_ratio: 0.5 |
|||
swap_steps: 50000 |
|||
curriculum: |
|||
measure: progress |
|||
thresholds: [0.05, 0.1] |
|||
min_lesson_length: 100 |
|||
signal_smoothing: true |
|||
parameters: |
|||
ball_touch: [1.0, 0.5, 0.0] |
|||
swap_steps: 50000 |
|||
window: 10 |
|||
play_against_latest_model_ratio: 0.5 |
|||
initial_elo: 1200.0 |
|
|||
behaviors: |
|||
Goalie: |
|||
trainer: ppo |
|||
batch_size: 2048 |
|||
beta: 0.005 |
|||
buffer_size: 20480 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 5.0e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 2048 |
|||
buffer_size: 20480 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: constant |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 512 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 50000000 |
|||
time_horizon: 1000 |
|||
summary_freq: 10000 |
|||
threaded: true |
|||
window: 10 |
|||
play_against_latest_model_ratio: 0.5 |
|||
swap_steps: 25000 |
|||
|
|||
swap_steps: 25000 |
|||
window: 10 |
|||
play_against_latest_model_ratio: 0.5 |
|||
initial_elo: 1200.0 |
|||
trainer: ppo |
|||
batch_size: 2048 |
|||
beta: 0.005 |
|||
buffer_size: 20480 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 5.0e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 2048 |
|||
buffer_size: 20480 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: constant |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 512 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 50000000 |
|||
time_horizon: 1000 |
|||
summary_freq: 10000 |
|||
threaded: true |
|||
window: 10 |
|||
play_against_latest_model_ratio: 0.5 |
|||
team_change: 200000 |
|||
team_change: 200000 |
|||
window: 10 |
|||
play_against_latest_model_ratio: 0.5 |
|||
initial_elo: 1200.0 |
|
|||
behaviors: |
|||
Tennis: |
|||
trainer: ppo |
|||
batch_size: 1024 |
|||
beta: 0.005 |
|||
buffer_size: 10240 |
|||
epsilon: 0.2 |
|||
hidden_units: 256 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 5.0e7 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 1024 |
|||
buffer_size: 10240 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: constant |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 256 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 50000000 |
|||
time_horizon: 1000 |
|||
summary_freq: 10000 |
|||
threaded: true |
|||
window: 10 |
|||
play_against_latest_model_ratio: 0.5 |
|||
team_change: 100000 |
|||
team_change: 100000 |
|||
window: 10 |
|||
play_against_latest_model_ratio: 0.5 |
|||
initial_elo: 1200.0 |
|
|||
behaviors: |
|||
VisualHallway: |
|||
trainer: ppo |
|||
batch_size: 64 |
|||
beta: 0.01 |
|||
buffer_size: 1024 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 1.0e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 1 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: true |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 64 |
|||
buffer_size: 1024 |
|||
learning_rate: 0.0003 |
|||
beta: 0.01 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 128 |
|||
num_layers: 1 |
|||
vis_encode_type: simple |
|||
memory: |
|||
sequence_length: 64 |
|||
memory_size: 128 |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 10000000 |
|||
time_horizon: 64 |
|||
summary_freq: 10000 |
|||
threaded: true |
|
|||
behaviors: |
|||
VisualPushBlock: |
|||
trainer: ppo |
|||
batch_size: 64 |
|||
beta: 0.01 |
|||
buffer_size: 1024 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 3.0e6 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 1 |
|||
time_horizon: 64 |
|||
sequence_length: 32 |
|||
summary_freq: 60000 |
|||
use_recurrent: true |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 64 |
|||
buffer_size: 1024 |
|||
learning_rate: 0.0003 |
|||
beta: 0.01 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 128 |
|||
num_layers: 1 |
|||
vis_encode_type: simple |
|||
memory: |
|||
sequence_length: 32 |
|||
memory_size: 128 |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 3000000 |
|||
time_horizon: 64 |
|||
summary_freq: 60000 |
|||
threaded: true |
|
|||
behaviors: |
|||
VisualPyramids: |
|||
trainer: ppo |
|||
batch_size: 64 |
|||
beta: 0.01 |
|||
buffer_size: 2024 |
|||
epsilon: 0.2 |
|||
hidden_units: 256 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 1.0e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 1 |
|||
time_horizon: 128 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 64 |
|||
buffer_size: 2024 |
|||
learning_rate: 0.0003 |
|||
beta: 0.01 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 256 |
|||
num_layers: 1 |
|||
vis_encode_type: simple |
|||
strength: 1.0 |
|||
strength: 1.0 |
|||
strength: 0.01 |
|||
strength: 0.01 |
|||
learning_rate: 0.0003 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 10000000 |
|||
time_horizon: 128 |
|||
summary_freq: 10000 |
|||
threaded: true |
|
|||
behaviors: |
|||
Walker: |
|||
trainer: ppo |
|||
batch_size: 2048 |
|||
beta: 0.005 |
|||
buffer_size: 20480 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 2e7 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 3 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 2048 |
|||
buffer_size: 20480 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 512 |
|||
num_layers: 3 |
|||
vis_encode_type: simple |
|||
gamma: 0.995 |
|||
gamma: 0.995 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 20000000 |
|||
time_horizon: 1000 |
|||
summary_freq: 30000 |
|||
threaded: true |
|
|||
behaviors: |
|||
BigWallJump: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.005 |
|||
buffer_size: 2048 |
|||
epsilon: 0.2 |
|||
hidden_units: 256 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 2e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 128 |
|||
sequence_length: 64 |
|||
summary_freq: 20000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 128 |
|||
buffer_size: 2048 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 256 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
strength: 1.0 |
|||
|
|||
SmallWallJump: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.005 |
|||
buffer_size: 2048 |
|||
epsilon: 0.2 |
|||
hidden_units: 256 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 5e6 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
strength: 1.0 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 20000000 |
|||
sequence_length: 64 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
threaded: true |
|||
SmallWallJump: |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 128 |
|||
buffer_size: 2048 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 256 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 5000000 |
|||
time_horizon: 128 |
|||
summary_freq: 20000 |
|||
threaded: true |
|
|||
behaviors: |
|||
BigWallJump: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.005 |
|||
buffer_size: 2048 |
|||
epsilon: 0.2 |
|||
hidden_units: 256 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 2e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 128 |
|||
sequence_length: 64 |
|||
summary_freq: 20000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 128 |
|||
buffer_size: 2048 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 256 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
curriculum: |
|||
measure: progress |
|||
thresholds: [0.1, 0.3, 0.5] |
|||
min_lesson_length: 100 |
|||
signal_smoothing: true |
|||
parameters: |
|||
big_wall_min_height: [0.0, 4.0, 6.0, 8.0] |
|||
big_wall_max_height: [4.0, 7.0, 8.0, 8.0] |
|||
|
|||
SmallWallJump: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.005 |
|||
buffer_size: 2048 |
|||
epsilon: 0.2 |
|||
hidden_units: 256 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 5e6 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 20000000 |
|||
sequence_length: 64 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
threaded: true |
|||
SmallWallJump: |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 128 |
|||
buffer_size: 2048 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 256 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
curriculum: |
|||
measure: progress |
|||
thresholds: [0.1, 0.3, 0.5] |
|||
min_lesson_length: 100 |
|||
signal_smoothing: true |
|||
parameters: |
|||
small_wall_height: [1.5, 2.0, 2.5, 4.0] |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 5000000 |
|||
time_horizon: 128 |
|||
summary_freq: 20000 |
|||
threaded: true |
|||
|
|||
curriculum: |
|||
BigWallJump: |
|||
measure: progress |
|||
thresholds: [0.1, 0.3, 0.5] |
|||
min_lesson_length: 100 |
|||
signal_smoothing: true |
|||
parameters: |
|||
big_wall_min_height: [0.0, 4.0, 6.0, 8.0] |
|||
big_wall_max_height: [4.0, 7.0, 8.0, 8.0] |
|||
SmallWallJump: |
|||
measure: progress |
|||
thresholds: [0.1, 0.3, 0.5] |
|||
min_lesson_length: 100 |
|||
signal_smoothing: true |
|||
parameters: |
|||
small_wall_height: [1.5, 2.0, 2.5, 4.0] |
|
|||
behaviors: |
|||
WormDynamic: |
|||
trainer: ppo |
|||
batch_size: 2024 |
|||
beta: 0.005 |
|||
buffer_size: 20240 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 3.5e6 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 3 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 2024 |
|||
buffer_size: 20240 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 512 |
|||
num_layers: 3 |
|||
vis_encode_type: simple |
|||
gamma: 0.995 |
|||
gamma: 0.995 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 3500000 |
|||
time_horizon: 1000 |
|||
summary_freq: 30000 |
|||
threaded: true |
|
|||
behaviors: |
|||
WormStatic: |
|||
trainer: ppo |
|||
batch_size: 2024 |
|||
beta: 0.005 |
|||
buffer_size: 20240 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 3.5e6 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 3 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 2024 |
|||
buffer_size: 20240 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 512 |
|||
num_layers: 3 |
|||
vis_encode_type: simple |
|||
gamma: 0.995 |
|||
gamma: 0.995 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 3500000 |
|||
time_horizon: 1000 |
|||
summary_freq: 30000 |
|||
threaded: true |
|
|||
behaviors: |
|||
3DBall: |
|||
trainer: sac |
|||
batch_size: 64 |
|||
buffer_size: 12000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 64 |
|||
init_entcoef: 0.5 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 5.0e5 |
|||
memory_size: 128 |
|||
normalize: true |
|||
steps_per_update: 10 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 12000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: sac |
|||
hyperparameters: |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
batch_size: 64 |
|||
buffer_size: 12000 |
|||
buffer_init_steps: 0 |
|||
tau: 0.005 |
|||
steps_per_update: 10.0 |
|||
save_replay_buffer: false |
|||
init_entcoef: 0.5 |
|||
reward_signal_steps_per_update: 10.0 |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 64 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 500000 |
|||
time_horizon: 1000 |
|||
summary_freq: 12000 |
|||
threaded: true |
|
|||
behaviors: |
|||
3DBallHard: |
|||
trainer: sac |
|||
batch_size: 256 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 128 |
|||
init_entcoef: 1.0 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 5.0e5 |
|||
memory_size: 128 |
|||
normalize: true |
|||
steps_per_update: 10 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 12000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: sac |
|||
hyperparameters: |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
batch_size: 256 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 0 |
|||
tau: 0.005 |
|||
steps_per_update: 10.0 |
|||
save_replay_buffer: false |
|||
init_entcoef: 1.0 |
|||
reward_signal_steps_per_update: 10.0 |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 128 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 500000 |
|||
time_horizon: 1000 |
|||
summary_freq: 12000 |
|||
threaded: true |
|
|||
behaviors: |
|||
Basic: |
|||
trainer: sac |
|||
batch_size: 64 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 20 |
|||
init_entcoef: 0.01 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 5.0e5 |
|||
memory_size: 128 |
|||
normalize: false |
|||
steps_per_update: 10 |
|||
num_layers: 2 |
|||
time_horizon: 10 |
|||
sequence_length: 64 |
|||
summary_freq: 2000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: sac |
|||
hyperparameters: |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
batch_size: 64 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 0 |
|||
tau: 0.005 |
|||
steps_per_update: 10.0 |
|||
save_replay_buffer: false |
|||
init_entcoef: 0.01 |
|||
reward_signal_steps_per_update: 10.0 |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 20 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 500000 |
|||
time_horizon: 10 |
|||
summary_freq: 2000 |
|||
threaded: true |
|
|||
behaviors: |
|||
Bouncer: |
|||
trainer: sac |
|||
batch_size: 128 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 64 |
|||
init_entcoef: 1.0 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 1.0e6 |
|||
memory_size: 128 |
|||
normalize: true |
|||
steps_per_update: 10 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 20000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: sac |
|||
hyperparameters: |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
batch_size: 128 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 0 |
|||
tau: 0.005 |
|||
steps_per_update: 10.0 |
|||
save_replay_buffer: false |
|||
init_entcoef: 1.0 |
|||
reward_signal_steps_per_update: 10.0 |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 64 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 1000000 |
|||
time_horizon: 64 |
|||
summary_freq: 20000 |
|||
threaded: true |
|
|||
behaviors: |
|||
CrawlerDynamic: |
|||
trainer: sac |
|||
batch_size: 256 |
|||
buffer_size: 500000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 512 |
|||
init_entcoef: 1.0 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 5e6 |
|||
memory_size: 128 |
|||
normalize: true |
|||
steps_per_update: 20 |
|||
num_layers: 3 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: sac |
|||
hyperparameters: |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
batch_size: 256 |
|||
buffer_size: 500000 |
|||
buffer_init_steps: 0 |
|||
tau: 0.005 |
|||
steps_per_update: 20.0 |
|||
save_replay_buffer: false |
|||
init_entcoef: 1.0 |
|||
reward_signal_steps_per_update: 20.0 |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 512 |
|||
num_layers: 3 |
|||
vis_encode_type: simple |
|||
gamma: 0.995 |
|||
gamma: 0.995 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 5000000 |
|||
time_horizon: 1000 |
|||
summary_freq: 30000 |
|||
threaded: true |
|
|||
behaviors: |
|||
CrawlerStatic: |
|||
trainer: sac |
|||
batch_size: 256 |
|||
buffer_size: 500000 |
|||
buffer_init_steps: 2000 |
|||
hidden_units: 512 |
|||
init_entcoef: 1.0 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 3e6 |
|||
memory_size: 128 |
|||
normalize: true |
|||
steps_per_update: 20 |
|||
num_layers: 3 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: sac |
|||
hyperparameters: |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
batch_size: 256 |
|||
buffer_size: 500000 |
|||
buffer_init_steps: 2000 |
|||
tau: 0.005 |
|||
steps_per_update: 20.0 |
|||
save_replay_buffer: false |
|||
init_entcoef: 1.0 |
|||
reward_signal_steps_per_update: 20.0 |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 512 |
|||
num_layers: 3 |
|||
vis_encode_type: simple |
|||
gamma: 0.995 |
|||
gamma: 0.995 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 3000000 |
|||
time_horizon: 1000 |
|||
summary_freq: 30000 |
|||
threaded: true |
|
|||
behaviors: |
|||
FoodCollector: |
|||
trainer: sac |
|||
batch_size: 256 |
|||
buffer_size: 500000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 128 |
|||
init_entcoef: 0.05 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 2.0e6 |
|||
memory_size: 128 |
|||
normalize: false |
|||
steps_per_update: 10 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: sac |
|||
hyperparameters: |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
batch_size: 256 |
|||
buffer_size: 500000 |
|||
buffer_init_steps: 0 |
|||
tau: 0.005 |
|||
steps_per_update: 10.0 |
|||
save_replay_buffer: false |
|||
init_entcoef: 0.05 |
|||
reward_signal_steps_per_update: 10.0 |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 128 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 2000000 |
|||
time_horizon: 64 |
|||
summary_freq: 10000 |
|||
threaded: true |
|
|||
behaviors: |
|||
GridWorld: |
|||
trainer: sac |
|||
batch_size: 128 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 1000 |
|||
hidden_units: 128 |
|||
init_entcoef: 0.5 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 500000 |
|||
memory_size: 128 |
|||
normalize: false |
|||
steps_per_update: 10 |
|||
num_layers: 1 |
|||
time_horizon: 5 |
|||
sequence_length: 64 |
|||
summary_freq: 20000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: sac |
|||
hyperparameters: |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
batch_size: 128 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 1000 |
|||
tau: 0.005 |
|||
steps_per_update: 10.0 |
|||
save_replay_buffer: false |
|||
init_entcoef: 0.5 |
|||
reward_signal_steps_per_update: 10.0 |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 128 |
|||
num_layers: 1 |
|||
vis_encode_type: simple |
|||
gamma: 0.9 |
|||
gamma: 0.9 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 500000 |
|||
time_horizon: 5 |
|||
summary_freq: 20000 |
|||
threaded: true |
|
|||
behaviors: |
|||
Hallway: |
|||
trainer: sac |
|||
batch_size: 128 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 128 |
|||
init_entcoef: 0.1 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 5.0e6 |
|||
memory_size: 128 |
|||
normalize: false |
|||
steps_per_update: 10 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 32 |
|||
summary_freq: 10000 |
|||
tau: 0.005 |
|||
use_recurrent: true |
|||
vis_encode_type: simple |
|||
trainer_type: sac |
|||
hyperparameters: |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
batch_size: 128 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 0 |
|||
tau: 0.005 |
|||
steps_per_update: 10.0 |
|||
save_replay_buffer: false |
|||
init_entcoef: 0.1 |
|||
reward_signal_steps_per_update: 10.0 |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 128 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
memory: |
|||
sequence_length: 32 |
|||
memory_size: 128 |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 5000000 |
|||
time_horizon: 64 |
|||
summary_freq: 10000 |
|||
threaded: true |
|
|||
behaviors: |
|||
PushBlock: |
|||
trainer: sac |
|||
batch_size: 128 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 256 |
|||
init_entcoef: 0.05 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 2e6 |
|||
memory_size: 128 |
|||
normalize: false |
|||
steps_per_update: 10 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 100000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: sac |
|||
hyperparameters: |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
batch_size: 128 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 0 |
|||
tau: 0.005 |
|||
steps_per_update: 10.0 |
|||
save_replay_buffer: false |
|||
init_entcoef: 0.05 |
|||
reward_signal_steps_per_update: 10.0 |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 256 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 2000000 |
|||
time_horizon: 64 |
|||
summary_freq: 100000 |
|||
threaded: true |
|
|||
behaviors: |
|||
Pyramids: |
|||
trainer: sac |
|||
batch_size: 128 |
|||
buffer_size: 500000 |
|||
buffer_init_steps: 10000 |
|||
hidden_units: 256 |
|||
init_entcoef: 0.01 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 1.0e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
steps_per_update: 10 |
|||
num_layers: 2 |
|||
time_horizon: 128 |
|||
sequence_length: 16 |
|||
summary_freq: 30000 |
|||
tau: 0.01 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: sac |
|||
hyperparameters: |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
batch_size: 128 |
|||
buffer_size: 500000 |
|||
buffer_init_steps: 10000 |
|||
tau: 0.01 |
|||
steps_per_update: 10.0 |
|||
save_replay_buffer: false |
|||
init_entcoef: 0.01 |
|||
reward_signal_steps_per_update: 10.0 |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 256 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
strength: 2.0 |
|||
strength: 2.0 |
|||
strength: 0.02 |
|||
strength: 0.02 |
|||
learning_rate: 0.0003 |
|||
use_vail: false |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 10000000 |
|||
time_horizon: 128 |
|||
summary_freq: 30000 |
|||
threaded: true |
|
|||
behaviors: |
|||
Reacher: |
|||
trainer: sac |
|||
batch_size: 128 |
|||
buffer_size: 500000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 128 |
|||
init_entcoef: 1.0 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 2e7 |
|||
memory_size: 128 |
|||
normalize: true |
|||
steps_per_update: 20 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 60000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: sac |
|||
hyperparameters: |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
batch_size: 128 |
|||
buffer_size: 500000 |
|||
buffer_init_steps: 0 |
|||
tau: 0.005 |
|||
steps_per_update: 20.0 |
|||
save_replay_buffer: false |
|||
init_entcoef: 1.0 |
|||
reward_signal_steps_per_update: 20.0 |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 128 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 20000000 |
|||
time_horizon: 1000 |
|||
summary_freq: 60000 |
|||
threaded: true |
|
|||
behaviors: |
|||
Tennis: |
|||
trainer: sac |
|||
batch_size: 128 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 256 |
|||
init_entcoef: 1.0 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 2e7 |
|||
memory_size: 128 |
|||
normalize: true |
|||
steps_per_update: 10 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: sac |
|||
hyperparameters: |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
batch_size: 128 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 0 |
|||
tau: 0.005 |
|||
steps_per_update: 10.0 |
|||
save_replay_buffer: false |
|||
init_entcoef: 1.0 |
|||
reward_signal_steps_per_update: 10.0 |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 256 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 20000000 |
|||
time_horizon: 64 |
|||
summary_freq: 10000 |
|||
threaded: true |
|||
window: 10 |
|||
play_against_current_self_ratio: 0.5 |
|||
team_change: 250000 |
|||
window: 10 |
|||
play_against_latest_model_ratio: 0.5 |
|||
initial_elo: 1200.0 |
|
|||
behaviors: |
|||
VisualHallway: |
|||
trainer: sac |
|||
batch_size: 64 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 128 |
|||
init_entcoef: 1.0 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 1.0e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
steps_per_update: 10 |
|||
num_layers: 1 |
|||
time_horizon: 64 |
|||
sequence_length: 32 |
|||
summary_freq: 10000 |
|||
tau: 0.005 |
|||
use_recurrent: true |
|||
vis_encode_type: simple |
|||
trainer_type: sac |
|||
hyperparameters: |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
batch_size: 64 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 0 |
|||
tau: 0.005 |
|||
steps_per_update: 10.0 |
|||
save_replay_buffer: false |
|||
init_entcoef: 1.0 |
|||
reward_signal_steps_per_update: 10.0 |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 128 |
|||
num_layers: 1 |
|||
vis_encode_type: simple |
|||
memory: |
|||
sequence_length: 32 |
|||
memory_size: 128 |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
strength: 1.0 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 10000000 |
|||
time_horizon: 64 |
|||
summary_freq: 10000 |
|||
threaded: true |
|
|||
behaviors: |
|||
VisualPushBlock: |
|||
trainer: sac |
|||
batch_size: 64 |
|||
buffer_size: 1024 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 128 |
|||
init_entcoef: 1.0 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 3.0e6 |
|||
memory_size: 128 |
|||
normalize: false |
|||
steps_per_update: 10 |
|||
num_layers: 1 |
|||
time_horizon: 64 |
|||
sequence_length: 32 |
|||
summary_freq: 60000 |
|||
tau: 0.005 |
|||
use_recurrent: true |
|||
vis_encode_type: simple |
|||
trainer_type: sac |
|||
hyperparameters: |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
batch_size: 64 |
|||
buffer_size: 1024 |
|||
buffer_init_steps: 0 |
|||
tau: 0.005 |
|||
steps_per_update: 10.0 |
|||
save_replay_buffer: false |
|||
init_entcoef: 1.0 |
|||
reward_signal_steps_per_update: 10.0 |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 128 |
|||
num_layers: 1 |
|||
vis_encode_type: simple |
|||
memory: |
|||
sequence_length: 32 |
|||
memory_size: 128 |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
strength: 1.0 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 3000000 |
|||
time_horizon: 64 |
|||
summary_freq: 60000 |
|||
threaded: true |
|
|||
behaviors: |
|||
VisualPyramids: |
|||
trainer: sac |
|||
batch_size: 64 |
|||
buffer_size: 500000 |
|||
buffer_init_steps: 1000 |
|||
hidden_units: 256 |
|||
init_entcoef: 0.01 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 1.0e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
steps_per_update: 10 |
|||
num_layers: 1 |
|||
time_horizon: 128 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
tau: 0.01 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: sac |
|||
hyperparameters: |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
batch_size: 64 |
|||
buffer_size: 500000 |
|||
buffer_init_steps: 1000 |
|||
tau: 0.01 |
|||
steps_per_update: 10.0 |
|||
save_replay_buffer: false |
|||
init_entcoef: 0.01 |
|||
reward_signal_steps_per_update: 10.0 |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 256 |
|||
num_layers: 1 |
|||
vis_encode_type: simple |
|||
strength: 2.0 |
|||
strength: 2.0 |
|||
strength: 0.02 |
|||
strength: 0.02 |
|||
learning_rate: 0.0003 |
|||
use_vail: false |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 10000000 |
|||
time_horizon: 128 |
|||
summary_freq: 10000 |
|||
threaded: true |
|
|||
behaviors: |
|||
Walker: |
|||
trainer: sac |
|||
batch_size: 256 |
|||
buffer_size: 500000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 512 |
|||
init_entcoef: 1.0 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 2e7 |
|||
memory_size: 128 |
|||
normalize: true |
|||
steps_per_update: 30 |
|||
num_layers: 4 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: sac |
|||
hyperparameters: |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
batch_size: 256 |
|||
buffer_size: 500000 |
|||
buffer_init_steps: 0 |
|||
tau: 0.005 |
|||
steps_per_update: 30.0 |
|||
save_replay_buffer: false |
|||
init_entcoef: 1.0 |
|||
reward_signal_steps_per_update: 30.0 |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 512 |
|||
num_layers: 4 |
|||
vis_encode_type: simple |
|||
gamma: 0.995 |
|||
gamma: 0.995 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 20000000 |
|||
time_horizon: 1000 |
|||
summary_freq: 30000 |
|||
threaded: true |
|
|||
behaviors: |
|||
BigWallJump: |
|||
trainer: sac |
|||
batch_size: 128 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 256 |
|||
init_entcoef: 0.1 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 2e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
steps_per_update: 10 |
|||
num_layers: 2 |
|||
time_horizon: 128 |
|||
sequence_length: 64 |
|||
summary_freq: 20000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: sac |
|||
hyperparameters: |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
batch_size: 128 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 0 |
|||
tau: 0.005 |
|||
steps_per_update: 10.0 |
|||
save_replay_buffer: false |
|||
init_entcoef: 0.1 |
|||
reward_signal_steps_per_update: 10.0 |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 256 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
|
|||
SmallWallJump: |
|||
trainer: sac |
|||
batch_size: 128 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 256 |
|||
init_entcoef: 0.1 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 5e6 |
|||
memory_size: 128 |
|||
normalize: false |
|||
steps_per_update: 10 |
|||
num_layers: 2 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 20000000 |
|||
sequence_length: 64 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
threaded: true |
|||
SmallWallJump: |
|||
trainer_type: sac |
|||
hyperparameters: |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
batch_size: 128 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 0 |
|||
tau: 0.005 |
|||
steps_per_update: 10.0 |
|||
save_replay_buffer: false |
|||
init_entcoef: 0.1 |
|||
reward_signal_steps_per_update: 10.0 |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 256 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 5000000 |
|||
time_horizon: 128 |
|||
summary_freq: 20000 |
|||
threaded: true |
|
|||
behaviors: |
|||
WormDynamic: |
|||
trainer: sac |
|||
batch_size: 256 |
|||
buffer_size: 500000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 512 |
|||
init_entcoef: 1.0 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 5e6 |
|||
memory_size: 128 |
|||
normalize: true |
|||
steps_per_update: 20 |
|||
num_layers: 3 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: sac |
|||
hyperparameters: |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
batch_size: 256 |
|||
buffer_size: 500000 |
|||
buffer_init_steps: 0 |
|||
tau: 0.005 |
|||
steps_per_update: 20.0 |
|||
save_replay_buffer: false |
|||
init_entcoef: 1.0 |
|||
reward_signal_steps_per_update: 20.0 |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 512 |
|||
num_layers: 3 |
|||
vis_encode_type: simple |
|||
gamma: 0.995 |
|||
gamma: 0.995 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 5000000 |
|||
time_horizon: 1000 |
|||
summary_freq: 30000 |
|||
threaded: true |
|
|||
behaviors: |
|||
WormStatic: |
|||
trainer: sac |
|||
batch_size: 256 |
|||
buffer_size: 500000 |
|||
buffer_init_steps: 2000 |
|||
hidden_units: 512 |
|||
init_entcoef: 1.0 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 3e6 |
|||
memory_size: 128 |
|||
normalize: true |
|||
steps_per_update: 20 |
|||
num_layers: 3 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: sac |
|||
hyperparameters: |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
batch_size: 256 |
|||
buffer_size: 500000 |
|||
buffer_init_steps: 2000 |
|||
tau: 0.005 |
|||
steps_per_update: 20.0 |
|||
save_replay_buffer: false |
|||
init_entcoef: 1.0 |
|||
reward_signal_steps_per_update: 20.0 |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 512 |
|||
num_layers: 3 |
|||
vis_encode_type: simple |
|||
gamma: 0.995 |
|||
gamma: 0.995 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 3000000 |
|||
time_horizon: 1000 |
|||
summary_freq: 30000 |
|||
threaded: true |
|
|||
import attr |
|||
import cattr |
|||
import yaml |
|||
from typing import Dict, Any |
|||
import argparse |
|||
from mlagents.trainers.settings import TrainerSettings, NetworkSettings, TrainerType |
|||
from mlagents.trainers.cli_utils import load_config |
|||
|
|||
|
|||
# Take an existing trainer config (e.g. trainer_config.yaml) and turn it into the new format. |
|||
def convert_behaviors(old_trainer_config: Dict[str, Any]) -> Dict[str, Any]: |
|||
all_behavior_config_dict = {} |
|||
default_config = old_trainer_config.get("default", {}) |
|||
for behavior_name, config in old_trainer_config.items(): |
|||
if behavior_name != "default": |
|||
config = default_config.copy() |
|||
config.update(old_trainer_config[behavior_name]) |
|||
|
|||
# Convert to split TrainerSettings, Hyperparameters, NetworkSettings |
|||
# Set trainer_type and get appropriate hyperparameter settings |
|||
trainer_type = config["trainer"] |
|||
new_config = {} |
|||
new_config["trainer_type"] = trainer_type |
|||
hyperparam_cls = TrainerType(trainer_type).to_settings() |
|||
# Try to absorb as much as possible into the hyperparam_cls |
|||
new_config["hyperparameters"] = cattr.structure(config, hyperparam_cls) |
|||
|
|||
# Try to absorb as much as possible into the network settings |
|||
new_config["network_settings"] = cattr.structure(config, NetworkSettings) |
|||
# Deal with recurrent |
|||
if config["use_recurrent"]: |
|||
new_config["network_settings"].memory = NetworkSettings.MemorySettings( |
|||
sequence_length=config["sequence_length"], |
|||
memory_size=config["memory_size"], |
|||
) |
|||
|
|||
# Absorb the rest into the base TrainerSettings |
|||
for key, val in config.items(): |
|||
if key in attr.fields_dict(TrainerSettings): |
|||
new_config[key] = val |
|||
|
|||
# Structure the whole thing |
|||
all_behavior_config_dict[behavior_name] = cattr.structure( |
|||
new_config, TrainerSettings |
|||
) |
|||
return all_behavior_config_dict |
|||
|
|||
|
|||
def write_to_yaml_file(config: Dict[str, Any], output_config: str): |
|||
unstructed_config = cattr.unstructure(config) |
|||
unstructed_config = remove_nones(unstructed_config) |
|||
with open(output_config, "w") as f: |
|||
try: |
|||
yaml.dump(unstructed_config, f, sort_keys=False) |
|||
except TypeError: # Older versions of pyyaml don't support sort_keys |
|||
yaml.dump(unstructed_config, f) |
|||
|
|||
|
|||
def remove_nones(config: Dict[Any, Any]): |
|||
new_config = {} |
|||
for key, val in config.items(): |
|||
if isinstance(val, dict): |
|||
new_config[key] = remove_nones(val) |
|||
elif val is not None: |
|||
new_config[key] = val |
|||
return new_config |
|||
|
|||
|
|||
if __name__ == "__main__": |
|||
|
|||
argparser = argparse.ArgumentParser( |
|||
formatter_class=argparse.ArgumentDefaultsHelpFormatter |
|||
) |
|||
argparser.add_argument( |
|||
"trainer_config_path", |
|||
help="Path to old format (<=0.16.X) trainer configuration YAML.", |
|||
) |
|||
argparser.add_argument( |
|||
"--curriculum", |
|||
help="Path to old format (<=0.16.X) curriculum configuration YAML.", |
|||
default=None, |
|||
) |
|||
argparser.add_argument( |
|||
"--sampler", |
|||
help="Path to old format (<=0.16.X) parameter randomization configuration YAML.", |
|||
default=None, |
|||
) |
|||
argparser.add_argument( |
|||
"output_config_path", help="Path to write converted YAML file." |
|||
) |
|||
args = argparser.parse_args() |
|||
print( |
|||
f"Converting {args.trainer_config_path} and saving to {args.output_config_path}." |
|||
) |
|||
|
|||
old_config = load_config(args.trainer_config_path) |
|||
behavior_config_dict = convert_behaviors(old_config) |
|||
full_config = {"behaviors": behavior_config_dict} |
|||
|
|||
# Convert curriculum and sampler. note that we don't validate these; if it was correct |
|||
# before it should be correct now. |
|||
if args.curriculum is not None: |
|||
curriculum_config_dict = load_config(args.curriculum) |
|||
full_config["curriculum"] = curriculum_config_dict |
|||
|
|||
if args.sampler is not None: |
|||
sampler_config_dict = load_config(args.curriculum) |
|||
full_config["parameter_randomization"] = sampler_config_dict |
|||
|
|||
write_to_yaml_file(full_config, args.output_config_path) |
|
|||
import attr |
|||
import cattr |
|||
from typing import Dict, Optional, List, Any, DefaultDict, Mapping |
|||
from enum import Enum |
|||
import collections |
|||
import argparse |
|||
|
|||
from mlagents.trainers.cli_utils import StoreConfigFile, DetectDefault, parser |
|||
from mlagents.trainers.cli_utils import load_config |
|||
from mlagents.trainers.exception import TrainerConfigError |
|||
from mlagents.trainers.models import ScheduleType, EncoderType |
|||
|
|||
|
|||
def check_and_structure(key: str, value: Any, class_type: type) -> Any: |
|||
attr_fields_dict = attr.fields_dict(class_type) |
|||
if key not in attr_fields_dict: |
|||
raise TrainerConfigError( |
|||
f"The option {key} was specified in your YAML file for {class_type.__name__}, but is invalid." |
|||
) |
|||
# Apply cattr structure to the values |
|||
return cattr.structure(value, attr_fields_dict[key].type) |
|||
|
|||
|
|||
def strict_to_cls(d: Mapping, t: type) -> Any: |
|||
if not isinstance(d, Mapping): |
|||
raise TrainerConfigError(f"Unsupported config {d} for {t.__name__}.") |
|||
d_copy: Dict[str, Any] = {} |
|||
d_copy.update(d) |
|||
for key, val in d_copy.items(): |
|||
d_copy[key] = check_and_structure(key, val, t) |
|||
return t(**d_copy) |
|||
|
|||
|
|||
def defaultdict_to_dict(d: DefaultDict) -> Dict: |
|||
return {key: cattr.unstructure(val) for key, val in d.items()} |
|||
|
|||
|
|||
@attr.s(auto_attribs=True) |
|||
class ExportableSettings: |
|||
def as_dict(self): |
|||
return cattr.unstructure(self) |
|||
|
|||
|
|||
@attr.s(auto_attribs=True) |
|||
class NetworkSettings: |
|||
@attr.s(auto_attribs=True) |
|||
class MemorySettings: |
|||
sequence_length: int = 64 |
|||
memory_size: int = 128 |
|||
|
|||
normalize: bool = False |
|||
hidden_units: int = 128 |
|||
num_layers: int = 2 |
|||
vis_encode_type: EncoderType = EncoderType.SIMPLE |
|||
memory: Optional[MemorySettings] = None |
|||
|
|||
|
|||
@attr.s(auto_attribs=True) |
|||
class BehavioralCloningSettings: |
|||
demo_path: str |
|||
steps: int = 0 |
|||
strength: float = 1.0 |
|||
samples_per_update: int = 0 |
|||
# Setting either of these to None will allow the Optimizer |
|||
# to decide these parameters, based on Trainer hyperparams |
|||
num_epoch: Optional[int] = None |
|||
batch_size: Optional[int] = None |
|||
|
|||
|
|||
@attr.s(auto_attribs=True) |
|||
class HyperparamSettings: |
|||
batch_size: int = 1024 |
|||
buffer_size: int = 10240 |
|||
learning_rate: float = 3.0e-4 |
|||
learning_rate_schedule: ScheduleType = ScheduleType.CONSTANT |
|||
|
|||
|
|||
@attr.s(auto_attribs=True) |
|||
class PPOSettings(HyperparamSettings): |
|||
beta: float = 5.0e-3 |
|||
epsilon: float = 0.2 |
|||
lambd: float = 0.95 |
|||
num_epoch: int = 3 |
|||
learning_rate_schedule: ScheduleType = ScheduleType.LINEAR |
|||
|
|||
|
|||
@attr.s(auto_attribs=True) |
|||
class SACSettings(HyperparamSettings): |
|||
batch_size: int = 128 |
|||
buffer_size: int = 50000 |
|||
buffer_init_steps: int = 0 |
|||
tau: float = 0.005 |
|||
steps_per_update: float = 1 |
|||
save_replay_buffer: bool = False |
|||
init_entcoef: float = 1.0 |
|||
reward_signal_steps_per_update: float = attr.ib() |
|||
|
|||
@reward_signal_steps_per_update.default |
|||
def _reward_signal_steps_per_update_default(self): |
|||
return self.steps_per_update |
|||
|
|||
|
|||
class RewardSignalType(Enum): |
|||
EXTRINSIC: str = "extrinsic" |
|||
GAIL: str = "gail" |
|||
CURIOSITY: str = "curiosity" |
|||
|
|||
def to_settings(self) -> type: |
|||
_mapping = { |
|||
RewardSignalType.EXTRINSIC: RewardSignalSettings, |
|||
RewardSignalType.GAIL: GAILSettings, |
|||
RewardSignalType.CURIOSITY: CuriositySettings, |
|||
} |
|||
return _mapping[self] |
|||
|
|||
|
|||
@attr.s(auto_attribs=True) |
|||
class RewardSignalSettings: |
|||
gamma: float = 0.99 |
|||
strength: float = 1.0 |
|||
|
|||
@staticmethod |
|||
def structure(d: Mapping, t: type) -> Any: |
|||
""" |
|||
Helper method to structure a Dict of RewardSignalSettings class. Meant to be registered with |
|||
cattr.register_structure_hook() and called with cattr.structure(). This is needed to handle |
|||
the special Enum selection of RewardSignalSettings classes. |
|||
""" |
|||
if not isinstance(d, Mapping): |
|||
raise TrainerConfigError(f"Unsupported reward signal configuration {d}.") |
|||
d_final: Dict[RewardSignalType, RewardSignalSettings] = {} |
|||
for key, val in d.items(): |
|||
enum_key = RewardSignalType(key) |
|||
t = enum_key.to_settings() |
|||
d_final[enum_key] = strict_to_cls(val, t) |
|||
return d_final |
|||
|
|||
|
|||
@attr.s(auto_attribs=True) |
|||
class GAILSettings(RewardSignalSettings): |
|||
encoding_size: int = 64 |
|||
learning_rate: float = 3e-4 |
|||
use_actions: bool = False |
|||
use_vail: bool = False |
|||
demo_path: str = attr.ib(kw_only=True) |
|||
|
|||
|
|||
@attr.s(auto_attribs=True) |
|||
class CuriositySettings(RewardSignalSettings): |
|||
encoding_size: int = 64 |
|||
learning_rate: float = 3e-4 |
|||
|
|||
|
|||
@attr.s(auto_attribs=True) |
|||
class SelfPlaySettings: |
|||
save_steps: int = 20000 |
|||
team_change: int = attr.ib() |
|||
|
|||
@team_change.default |
|||
def _team_change_default(self): |
|||
# Assign team_change to about 4x save_steps |
|||
return self.save_steps * 5 |
|||
|
|||
swap_steps: int = 10000 |
|||
window: int = 10 |
|||
play_against_latest_model_ratio: float = 0.5 |
|||
initial_elo: float = 1200.0 |
|||
|
|||
|
|||
class TrainerType(Enum): |
|||
PPO: str = "ppo" |
|||
SAC: str = "sac" |
|||
|
|||
def to_settings(self) -> type: |
|||
_mapping = {TrainerType.PPO: PPOSettings, TrainerType.SAC: SACSettings} |
|||
return _mapping[self] |
|||
|
|||
|
|||
@attr.s(auto_attribs=True) |
|||
class TrainerSettings(ExportableSettings): |
|||
trainer_type: TrainerType = TrainerType.PPO |
|||
hyperparameters: HyperparamSettings = attr.ib() |
|||
|
|||
@hyperparameters.default |
|||
def _set_default_hyperparameters(self): |
|||
return self.trainer_type.to_settings()() |
|||
|
|||
network_settings: NetworkSettings = attr.ib(factory=NetworkSettings) |
|||
reward_signals: Dict[RewardSignalType, RewardSignalSettings] = attr.ib( |
|||
factory=lambda: {RewardSignalType.EXTRINSIC: RewardSignalSettings()} |
|||
) |
|||
init_path: Optional[str] = None |
|||
output_path: str = "default" |
|||
keep_checkpoints: int = 5 |
|||
max_steps: int = 500000 |
|||
time_horizon: int = 64 |
|||
summary_freq: int = 50000 |
|||
threaded: bool = True |
|||
self_play: Optional[SelfPlaySettings] = None |
|||
behavioral_cloning: Optional[BehavioralCloningSettings] = None |
|||
|
|||
cattr.register_structure_hook( |
|||
Dict[RewardSignalType, RewardSignalSettings], RewardSignalSettings.structure |
|||
) |
|||
|
|||
@network_settings.validator |
|||
def _check_batch_size_seq_length(self, attribute, value): |
|||
if self.network_settings.memory is not None: |
|||
if ( |
|||
self.network_settings.memory.sequence_length |
|||
> self.hyperparameters.batch_size |
|||
): |
|||
raise TrainerConfigError( |
|||
"When using memory, sequence length must be less than or equal to batch size. " |
|||
) |
|||
|
|||
@staticmethod |
|||
def dict_to_defaultdict(d: Dict, t: type) -> DefaultDict: |
|||
return collections.defaultdict( |
|||
TrainerSettings, cattr.structure(d, Dict[str, TrainerSettings]) |
|||
) |
|||
|
|||
@staticmethod |
|||
def structure(d: Mapping, t: type) -> Any: |
|||
""" |
|||
Helper method to structure a TrainerSettings class. Meant to be registered with |
|||
cattr.register_structure_hook() and called with cattr.structure(). |
|||
""" |
|||
if not isinstance(d, Mapping): |
|||
raise TrainerConfigError(f"Unsupported config {d} for {t.__name__}.") |
|||
d_copy: Dict[str, Any] = {} |
|||
d_copy.update(d) |
|||
|
|||
for key, val in d_copy.items(): |
|||
if attr.has(type(val)): |
|||
# Don't convert already-converted attrs classes. |
|||
continue |
|||
if key == "hyperparameters": |
|||
if "trainer_type" not in d_copy: |
|||
raise TrainerConfigError( |
|||
"Hyperparameters were specified but no trainer_type was given." |
|||
) |
|||
else: |
|||
d_copy[key] = strict_to_cls( |
|||
d_copy[key], TrainerType(d_copy["trainer_type"]).to_settings() |
|||
) |
|||
elif key == "max_steps": |
|||
d_copy[key] = int(float(val)) |
|||
# In some legacy configs, max steps was specified as a float |
|||
else: |
|||
d_copy[key] = check_and_structure(key, val, t) |
|||
return t(**d_copy) |
|||
|
|||
|
|||
@attr.s(auto_attribs=True) |
|||
class CurriculumSettings: |
|||
class MeasureType: |
|||
PROGRESS: str = "progress" |
|||
REWARD: str = "reward" |
|||
|
|||
measure: str = attr.ib(default=MeasureType.REWARD) |
|||
thresholds: List[int] = attr.ib(factory=list) |
|||
min_lesson_length: int = 0 |
|||
signal_smoothing: bool = True |
|||
parameters: Dict[str, List[float]] = attr.ib(kw_only=True) |
|||
|
|||
|
|||
@attr.s(auto_attribs=True) |
|||
class CheckpointSettings: |
|||
save_freq: int = parser.get_default("save_freq") |
|||
run_id: str = parser.get_default("run_id") |
|||
initialize_from: str = parser.get_default("initialize_from") |
|||
load_model: bool = parser.get_default("load_model") |
|||
resume: bool = parser.get_default("resume") |
|||
force: bool = parser.get_default("force") |
|||
train_model: bool = parser.get_default("train_model") |
|||
inference: bool = parser.get_default("inference") |
|||
lesson: int = parser.get_default("lesson") |
|||
|
|||
|
|||
@attr.s(auto_attribs=True) |
|||
class EnvironmentSettings: |
|||
env_path: Optional[str] = parser.get_default("env_path") |
|||
env_args: Optional[List[str]] = parser.get_default("env_args") |
|||
base_port: int = parser.get_default("base_port") |
|||
num_envs: int = parser.get_default("num_envs") |
|||
seed: int = parser.get_default("seed") |
|||
|
|||
|
|||
@attr.s(auto_attribs=True) |
|||
class EngineSettings: |
|||
width: int = parser.get_default("width") |
|||
height: int = parser.get_default("height") |
|||
quality_level: int = parser.get_default("quality_level") |
|||
time_scale: float = parser.get_default("time_scale") |
|||
target_frame_rate: int = parser.get_default("target_frame_rate") |
|||
capture_frame_rate: int = parser.get_default("capture_frame_rate") |
|||
no_graphics: bool = parser.get_default("no_graphics") |
|||
|
|||
|
|||
@attr.s(auto_attribs=True) |
|||
class RunOptions(ExportableSettings): |
|||
behaviors: DefaultDict[str, TrainerSettings] = attr.ib( |
|||
factory=lambda: collections.defaultdict(TrainerSettings) |
|||
) |
|||
env_settings: EnvironmentSettings = attr.ib(factory=EnvironmentSettings) |
|||
engine_settings: EngineSettings = attr.ib(factory=EngineSettings) |
|||
parameter_randomization: Optional[Dict] = None |
|||
curriculum: Optional[Dict[str, CurriculumSettings]] = None |
|||
checkpoint_settings: CheckpointSettings = attr.ib(factory=CheckpointSettings) |
|||
|
|||
# These are options that are relevant to the run itself, and not the engine or environment. |
|||
# They will be left here. |
|||
debug: bool = parser.get_default("debug") |
|||
# Strict conversion |
|||
cattr.register_structure_hook(EnvironmentSettings, strict_to_cls) |
|||
cattr.register_structure_hook(EngineSettings, strict_to_cls) |
|||
cattr.register_structure_hook(CheckpointSettings, strict_to_cls) |
|||
cattr.register_structure_hook(CurriculumSettings, strict_to_cls) |
|||
cattr.register_structure_hook(TrainerSettings, TrainerSettings.structure) |
|||
cattr.register_structure_hook( |
|||
DefaultDict[str, TrainerSettings], TrainerSettings.dict_to_defaultdict |
|||
) |
|||
cattr.register_unstructure_hook(collections.defaultdict, defaultdict_to_dict) |
|||
|
|||
@staticmethod |
|||
def from_argparse(args: argparse.Namespace) -> "RunOptions": |
|||
""" |
|||
Takes an argparse.Namespace as specified in `parse_command_line`, loads input configuration files |
|||
from file paths, and converts to a RunOptions instance. |
|||
:param args: collection of command-line parameters passed to mlagents-learn |
|||
:return: RunOptions representing the passed in arguments, with trainer config, curriculum and sampler |
|||
configs loaded from files. |
|||
""" |
|||
argparse_args = vars(args) |
|||
config_path = StoreConfigFile.trainer_config_path |
|||
|
|||
# Load YAML |
|||
configured_dict: Dict[str, Any] = { |
|||
"checkpoint_settings": {}, |
|||
"env_settings": {}, |
|||
"engine_settings": {}, |
|||
} |
|||
if config_path is not None: |
|||
configured_dict.update(load_config(config_path)) |
|||
|
|||
# Use the YAML file values for all values not specified in the CLI. |
|||
for key in configured_dict.keys(): |
|||
# Detect bad config options |
|||
if key not in attr.fields_dict(RunOptions): |
|||
raise TrainerConfigError( |
|||
"The option {} was specified in your YAML file, but is invalid.".format( |
|||
key |
|||
) |
|||
) |
|||
# Override with CLI args |
|||
# Keep deprecated --load working, TODO: remove |
|||
argparse_args["resume"] = argparse_args["resume"] or argparse_args["load_model"] |
|||
for key, val in argparse_args.items(): |
|||
if key in DetectDefault.non_default_args: |
|||
if key in attr.fields_dict(CheckpointSettings): |
|||
configured_dict["checkpoint_settings"][key] = val |
|||
elif key in attr.fields_dict(EnvironmentSettings): |
|||
configured_dict["env_settings"][key] = val |
|||
elif key in attr.fields_dict(EngineSettings): |
|||
configured_dict["engine_settings"][key] = val |
|||
else: # Base options |
|||
configured_dict[key] = val |
|||
return RunOptions.from_dict(configured_dict) |
|||
|
|||
@staticmethod |
|||
def from_dict(options_dict: Dict[str, Any]) -> "RunOptions": |
|||
return cattr.structure(options_dict, RunOptions) |
|
|||
import attr |
|||
import pytest |
|||
|
|||
from typing import Dict |
|||
|
|||
from mlagents.trainers.settings import ( |
|||
RunOptions, |
|||
TrainerSettings, |
|||
PPOSettings, |
|||
SACSettings, |
|||
RewardSignalType, |
|||
RewardSignalSettings, |
|||
CuriositySettings, |
|||
TrainerType, |
|||
strict_to_cls, |
|||
) |
|||
from mlagents.trainers.exception import TrainerConfigError |
|||
|
|||
|
|||
def check_if_different(testobj1: object, testobj2: object) -> None: |
|||
assert testobj1 is not testobj2 |
|||
if attr.has(testobj1.__class__) and attr.has(testobj2.__class__): |
|||
for key, val in attr.asdict(testobj1, recurse=False).items(): |
|||
if isinstance(val, dict) or isinstance(val, list) or attr.has(val): |
|||
# Note: this check doesn't check the contents of mutables. |
|||
check_if_different(val, attr.asdict(testobj2, recurse=False)[key]) |
|||
|
|||
|
|||
def test_is_new_instance(): |
|||
""" |
|||
Verify that every instance of RunOptions() and its subclasses |
|||
is a new instance (i.e. all factory methods are used properly.) |
|||
""" |
|||
check_if_different(RunOptions(), RunOptions()) |
|||
check_if_different(TrainerSettings(), TrainerSettings()) |
|||
|
|||
|
|||
def test_no_configuration(): |
|||
""" |
|||
Verify that a new config will have a PPO trainer with extrinsic rewards. |
|||
""" |
|||
blank_runoptions = RunOptions() |
|||
assert isinstance(blank_runoptions.behaviors["test"], TrainerSettings) |
|||
assert isinstance(blank_runoptions.behaviors["test"].hyperparameters, PPOSettings) |
|||
|
|||
assert ( |
|||
RewardSignalType.EXTRINSIC in blank_runoptions.behaviors["test"].reward_signals |
|||
) |
|||
|
|||
|
|||
def test_strict_to_cls(): |
|||
""" |
|||
Test strict structuring method. |
|||
""" |
|||
|
|||
@attr.s(auto_attribs=True) |
|||
class TestAttrsClass: |
|||
field1: int = 0 |
|||
field2: str = "test" |
|||
|
|||
correct_dict = {"field1": 1, "field2": "test2"} |
|||
assert strict_to_cls(correct_dict, TestAttrsClass) == TestAttrsClass(**correct_dict) |
|||
|
|||
incorrect_dict = {"field3": 1, "field2": "test2"} |
|||
|
|||
with pytest.raises(TrainerConfigError): |
|||
strict_to_cls(incorrect_dict, TestAttrsClass) |
|||
|
|||
with pytest.raises(TrainerConfigError): |
|||
strict_to_cls("non_dict_input", TestAttrsClass) |
|||
|
|||
|
|||
def test_trainersettings_structure(): |
|||
""" |
|||
Test structuring method for TrainerSettings |
|||
""" |
|||
trainersettings_dict = { |
|||
"trainer_type": "sac", |
|||
"hyperparameters": {"batch_size": 1024}, |
|||
"max_steps": 1.0, |
|||
"reward_signals": {"curiosity": {"encoding_size": 64}}, |
|||
} |
|||
trainer_settings = TrainerSettings.structure(trainersettings_dict, TrainerSettings) |
|||
assert isinstance(trainer_settings.hyperparameters, SACSettings) |
|||
assert trainer_settings.trainer_type == TrainerType.SAC |
|||
assert isinstance(trainer_settings.max_steps, int) |
|||
assert RewardSignalType.CURIOSITY in trainer_settings.reward_signals |
|||
|
|||
# Check invalid trainer type |
|||
with pytest.raises(ValueError): |
|||
trainersettings_dict = { |
|||
"trainer_type": "puppo", |
|||
"hyperparameters": {"batch_size": 1024}, |
|||
"max_steps": 1.0, |
|||
} |
|||
TrainerSettings.structure(trainersettings_dict, TrainerSettings) |
|||
|
|||
# Check invalid hyperparameter |
|||
with pytest.raises(TrainerConfigError): |
|||
trainersettings_dict = { |
|||
"trainer_type": "ppo", |
|||
"hyperparameters": {"notahyperparam": 1024}, |
|||
"max_steps": 1.0, |
|||
} |
|||
TrainerSettings.structure(trainersettings_dict, TrainerSettings) |
|||
|
|||
# Check non-dict |
|||
with pytest.raises(TrainerConfigError): |
|||
TrainerSettings.structure("notadict", TrainerSettings) |
|||
|
|||
# Check hyperparameters specified but trainer type left as default. |
|||
# This shouldn't work as you could specify non-PPO hyperparameters. |
|||
with pytest.raises(TrainerConfigError): |
|||
trainersettings_dict = {"hyperparameters": {"batch_size": 1024}} |
|||
TrainerSettings.structure(trainersettings_dict, TrainerSettings) |
|||
|
|||
|
|||
def test_reward_signal_structure(): |
|||
""" |
|||
Tests the RewardSignalSettings structure method. This one is special b/c |
|||
it takes in a Dict[RewardSignalType, RewardSignalSettings]. |
|||
""" |
|||
reward_signals_dict = { |
|||
"extrinsic": {"strength": 1.0}, |
|||
"curiosity": {"strength": 1.0}, |
|||
} |
|||
reward_signals = RewardSignalSettings.structure( |
|||
reward_signals_dict, Dict[RewardSignalType, RewardSignalSettings] |
|||
) |
|||
assert isinstance(reward_signals[RewardSignalType.EXTRINSIC], RewardSignalSettings) |
|||
assert isinstance(reward_signals[RewardSignalType.CURIOSITY], CuriositySettings) |
|||
|
|||
# Check invalid reward signal type |
|||
reward_signals_dict = {"puppo": {"strength": 1.0}} |
|||
with pytest.raises(ValueError): |
|||
RewardSignalSettings.structure( |
|||
reward_signals_dict, Dict[RewardSignalType, RewardSignalSettings] |
|||
) |
|||
|
|||
# Check missing GAIL demo path |
|||
reward_signals_dict = {"gail": {"strength": 1.0}} |
|||
with pytest.raises(TypeError): |
|||
RewardSignalSettings.structure( |
|||
reward_signals_dict, Dict[RewardSignalType, RewardSignalSettings] |
|||
) |
|||
|
|||
# Check non-Dict input |
|||
with pytest.raises(TrainerConfigError): |
|||
RewardSignalSettings.structure( |
|||
"notadict", Dict[RewardSignalType, RewardSignalSettings] |
|||
) |
撰写
预览
正在加载...
取消
保存
Reference in new issue