当前提交
e92b4f88
共有 98 个文件被更改,包括 3088 次插入 和 3176 次删除
-
2com.unity.ml-agents/CHANGELOG.md
-
46config/imitation/CrawlerStatic.yaml
-
46config/imitation/FoodCollector.yaml
-
48config/imitation/Hallway.yaml
-
43config/imitation/PushBlock.yaml
-
30config/imitation/Pyramids.yaml
-
42config/ppo/3DBall.yaml
-
42config/ppo/3DBallHard.yaml
-
76config/ppo/3DBall_randomize.yaml
-
42config/ppo/Basic.yaml
-
42config/ppo/Bouncer.yaml
-
42config/ppo/CrawlerDynamic.yaml
-
42config/ppo/CrawlerStatic.yaml
-
42config/ppo/FoodCollector.yaml
-
42config/ppo/GridWorld.yaml
-
45config/ppo/Hallway.yaml
-
42config/ppo/PushBlock.yaml
-
45config/ppo/Pyramids.yaml
-
42config/ppo/Reacher.yaml
-
56config/ppo/SoccerTwos.yaml
-
99config/ppo/StrikersVsGoalie.yaml
-
49config/ppo/Tennis.yaml
-
45config/ppo/VisualHallway.yaml
-
45config/ppo/VisualPushBlock.yaml
-
45config/ppo/VisualPyramids.yaml
-
42config/ppo/Walker.yaml
-
83config/ppo/WallJump.yaml
-
115config/ppo/WallJump_curriculum.yaml
-
42config/ppo/WormDynamic.yaml
-
42config/ppo/WormStatic.yaml
-
44config/sac/3DBall.yaml
-
44config/sac/3DBallHard.yaml
-
44config/sac/Basic.yaml
-
44config/sac/Bouncer.yaml
-
44config/sac/CrawlerDynamic.yaml
-
44config/sac/CrawlerStatic.yaml
-
44config/sac/FoodCollector.yaml
-
44config/sac/GridWorld.yaml
-
47config/sac/Hallway.yaml
-
44config/sac/PushBlock.yaml
-
48config/sac/Pyramids.yaml
-
44config/sac/Reacher.yaml
-
50config/sac/Tennis.yaml
-
48config/sac/VisualHallway.yaml
-
48config/sac/VisualPushBlock.yaml
-
48config/sac/VisualPyramids.yaml
-
44config/sac/Walker.yaml
-
87config/sac/WallJump.yaml
-
44config/sac/WormDynamic.yaml
-
44config/sac/WormStatic.yaml
-
13docs/Migrating.md
-
113docs/Training-Configuration-File.md
-
141docs/Training-ML-Agents.md
-
233ml-agents/mlagents/trainers/cli_utils.py
-
39ml-agents/mlagents/trainers/components/bc/module.py
-
29ml-agents/mlagents/trainers/components/reward_signals/__init__.py
-
36ml-agents/mlagents/trainers/components/reward_signals/curiosity/signal.py
-
12ml-agents/mlagents/trainers/components/reward_signals/extrinsic/signal.py
-
47ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py
-
22ml-agents/mlagents/trainers/components/reward_signals/reward_signal_factory.py
-
66ml-agents/mlagents/trainers/curriculum.py
-
22ml-agents/mlagents/trainers/ghost/trainer.py
-
377ml-agents/mlagents/trainers/learn.py
-
7ml-agents/mlagents/trainers/meta_curriculum.py
-
29ml-agents/mlagents/trainers/optimizer/tf_optimizer.py
-
11ml-agents/mlagents/trainers/policy/nn_policy.py
-
41ml-agents/mlagents/trainers/policy/tf_policy.py
-
33ml-agents/mlagents/trainers/ppo/optimizer.py
-
42ml-agents/mlagents/trainers/ppo/trainer.py
-
6ml-agents/mlagents/trainers/run_experiment.py
-
38ml-agents/mlagents/trainers/sac/optimizer.py
-
100ml-agents/mlagents/trainers/sac/trainer.py
-
39ml-agents/mlagents/trainers/tests/test_barracuda_converter.py
-
100ml-agents/mlagents/trainers/tests/test_bcmodule.py
-
94ml-agents/mlagents/trainers/tests/test_curriculum.py
-
36ml-agents/mlagents/trainers/tests/test_distributions.py
-
40ml-agents/mlagents/trainers/tests/test_ghost.py
-
133ml-agents/mlagents/trainers/tests/test_learn.py
-
55ml-agents/mlagents/trainers/tests/test_meta_curriculum.py
-
72ml-agents/mlagents/trainers/tests/test_nn_policy.py
-
11ml-agents/mlagents/trainers/tests/test_policy.py
-
99ml-agents/mlagents/trainers/tests/test_ppo.py
-
141ml-agents/mlagents/trainers/tests/test_reward_signals.py
-
18ml-agents/mlagents/trainers/tests/test_rl_trainer.py
-
83ml-agents/mlagents/trainers/tests/test_sac.py
-
388ml-agents/mlagents/trainers/tests/test_simple_rl.py
-
4ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py
-
274ml-agents/mlagents/trainers/tests/test_trainer_util.py
-
10ml-agents/mlagents/trainers/trainer/rl_trainer.py
-
28ml-agents/mlagents/trainers/trainer/trainer.py
-
8ml-agents/mlagents/trainers/trainer_controller.py
-
106ml-agents/mlagents/trainers/trainer_util.py
-
2ml-agents/setup.py
-
8ml-agents/tests/yamato/training_int_tests.py
-
12ml-agents/tests/yamato/yamato_utils.py
-
110config/upgrade_config.py
-
373ml-agents/mlagents/trainers/settings.py
-
151ml-agents/mlagents/trainers/tests/test_settings.py
|
|||
behaviors: |
|||
CrawlerStatic: |
|||
trainer: ppo |
|||
batch_size: 2024 |
|||
beta: 0.005 |
|||
buffer_size: 20240 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
max_steps: 1e7 |
|||
memory_size: 256 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 3 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
use_recurrent: false |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 2024 |
|||
buffer_size: 20240 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 512 |
|||
num_layers: 3 |
|||
vis_encode_type: simple |
|||
strength: 1.0 |
|||
strength: 1.0 |
|||
learning_rate: 0.0003 |
|||
use_actions: false |
|||
use_vail: false |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 10000000 |
|||
time_horizon: 1000 |
|||
summary_freq: 30000 |
|||
threaded: true |
|||
steps: 50000 |
|||
steps: 50000 |
|||
samples_per_update: 0 |
|
|||
behaviors: |
|||
FoodCollector: |
|||
trainer: ppo |
|||
batch_size: 64 |
|||
beta: 0.005 |
|||
buffer_size: 10240 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
max_steps: 2.0e6 |
|||
memory_size: 256 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 32 |
|||
summary_freq: 10000 |
|||
use_recurrent: false |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 64 |
|||
buffer_size: 10240 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 128 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
strength: 0.1 |
|||
strength: 0.1 |
|||
learning_rate: 0.0003 |
|||
use_actions: false |
|||
use_vail: false |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 2000000 |
|||
time_horizon: 64 |
|||
summary_freq: 10000 |
|||
threaded: true |
|||
steps: 0 |
|||
steps: 0 |
|||
samples_per_update: 0 |
|
|||
behaviors: |
|||
Hallway: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.01 |
|||
buffer_size: 1024 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
max_steps: 1.0e7 |
|||
memory_size: 256 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: true |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 128 |
|||
buffer_size: 1024 |
|||
learning_rate: 0.0003 |
|||
beta: 0.01 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 128 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
memory: |
|||
sequence_length: 64 |
|||
memory_size: 256 |
|||
strength: 1.0 |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
learning_rate: 0.0003 |
|||
use_actions: false |
|||
use_vail: false |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 10000000 |
|||
time_horizon: 64 |
|||
summary_freq: 10000 |
|||
threaded: true |
|
|||
behaviors: |
|||
PushBlock: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.01 |
|||
buffer_size: 2048 |
|||
epsilon: 0.2 |
|||
hidden_units: 256 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
max_steps: 1.5e7 |
|||
memory_size: 256 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 60000 |
|||
use_recurrent: false |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 128 |
|||
buffer_size: 2048 |
|||
learning_rate: 0.0003 |
|||
beta: 0.01 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 256 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
learning_rate: 0.0003 |
|||
use_actions: false |
|||
use_vail: false |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 15000000 |
|||
time_horizon: 64 |
|||
summary_freq: 60000 |
|||
threaded: true |
|
|||
behaviors: |
|||
3DBall: |
|||
trainer: ppo |
|||
batch_size: 64 |
|||
beta: 0.001 |
|||
buffer_size: 12000 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.99 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 5.0e5 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 12000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 64 |
|||
buffer_size: 12000 |
|||
learning_rate: 0.0003 |
|||
beta: 0.001 |
|||
epsilon: 0.2 |
|||
lambd: 0.99 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 128 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 500000 |
|||
time_horizon: 1000 |
|||
summary_freq: 12000 |
|||
threaded: true |
|
|||
behaviors: |
|||
3DBallHard: |
|||
trainer: ppo |
|||
batch_size: 1200 |
|||
beta: 0.001 |
|||
buffer_size: 12000 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 5.0e6 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 12000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 1200 |
|||
buffer_size: 12000 |
|||
learning_rate: 0.0003 |
|||
beta: 0.001 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 128 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
gamma: 0.995 |
|||
gamma: 0.995 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 5000000 |
|||
time_horizon: 1000 |
|||
summary_freq: 12000 |
|||
threaded: true |
|
|||
behaviors: |
|||
3DBall: |
|||
trainer: ppo |
|||
batch_size: 64 |
|||
beta: 0.001 |
|||
buffer_size: 12000 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.99 |
|||
learning_rate: 3.0e-4 |
|||
learning_rate_schedule: linear |
|||
max_steps: 5.0e5 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 12000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
3DBall: |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 64 |
|||
buffer_size: 12000 |
|||
learning_rate: 0.0003 |
|||
beta: 0.001 |
|||
epsilon: 0.2 |
|||
lambd: 0.99 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 128 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
gamma: 0.99 |
|||
strength: 1.0 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 500000 |
|||
time_horizon: 1000 |
|||
summary_freq: 12000 |
|||
threaded: true |
|||
resampling-interval: 5000 |
|||
mass: |
|||
sampler-type: "uniform" |
|||
min_value: 0.5 |
|||
max_value: 10 |
|||
gravity: |
|||
sampler-type: "uniform" |
|||
min_value: 7 |
|||
max_value: 12 |
|||
scale: |
|||
sampler-type: "uniform" |
|||
min_value: 0.75 |
|||
max_value: 3 |
|||
resampling-interval: 5000 |
|||
mass: |
|||
sampler-type: uniform |
|||
min_value: 0.5 |
|||
max_value: 10 |
|||
gravity: |
|||
sampler-type: uniform |
|||
min_value: 7 |
|||
max_value: 12 |
|||
scale: |
|||
sampler-type: uniform |
|||
min_value: 0.75 |
|||
max_value: 3 |
|
|||
behaviors: |
|||
Basic: |
|||
trainer: ppo |
|||
batch_size: 32 |
|||
beta: 0.005 |
|||
buffer_size: 256 |
|||
epsilon: 0.2 |
|||
hidden_units: 20 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 5.0e5 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 1 |
|||
time_horizon: 3 |
|||
sequence_length: 64 |
|||
summary_freq: 2000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 32 |
|||
buffer_size: 256 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 20 |
|||
num_layers: 1 |
|||
vis_encode_type: simple |
|||
gamma: 0.9 |
|||
gamma: 0.9 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 500000 |
|||
time_horizon: 3 |
|||
summary_freq: 2000 |
|||
threaded: true |
|
|||
behaviors: |
|||
Bouncer: |
|||
trainer: ppo |
|||
batch_size: 1024 |
|||
beta: 0.005 |
|||
buffer_size: 10240 |
|||
epsilon: 0.2 |
|||
hidden_units: 64 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 4.0e6 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 1024 |
|||
buffer_size: 10240 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 64 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 4000000 |
|||
time_horizon: 64 |
|||
summary_freq: 10000 |
|||
threaded: true |
|
|||
behaviors: |
|||
CrawlerDynamic: |
|||
trainer: ppo |
|||
batch_size: 2024 |
|||
beta: 0.005 |
|||
buffer_size: 20240 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 1e7 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 3 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 2024 |
|||
buffer_size: 20240 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 512 |
|||
num_layers: 3 |
|||
vis_encode_type: simple |
|||
gamma: 0.995 |
|||
gamma: 0.995 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 10000000 |
|||
time_horizon: 1000 |
|||
summary_freq: 30000 |
|||
threaded: true |
|
|||
behaviors: |
|||
CrawlerStatic: |
|||
trainer: ppo |
|||
batch_size: 2024 |
|||
beta: 0.005 |
|||
buffer_size: 20240 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 1e7 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 3 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 2024 |
|||
buffer_size: 20240 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 512 |
|||
num_layers: 3 |
|||
vis_encode_type: simple |
|||
gamma: 0.995 |
|||
gamma: 0.995 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 10000000 |
|||
time_horizon: 1000 |
|||
summary_freq: 30000 |
|||
threaded: true |
|
|||
behaviors: |
|||
FoodCollector: |
|||
trainer: ppo |
|||
batch_size: 1024 |
|||
beta: 0.005 |
|||
buffer_size: 10240 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 2.0e6 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 1024 |
|||
buffer_size: 10240 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 128 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 2000000 |
|||
time_horizon: 64 |
|||
summary_freq: 10000 |
|||
threaded: true |
|
|||
behaviors: |
|||
GridWorld: |
|||
trainer: ppo |
|||
batch_size: 32 |
|||
beta: 0.005 |
|||
buffer_size: 256 |
|||
epsilon: 0.2 |
|||
hidden_units: 256 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 500000 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 1 |
|||
time_horizon: 5 |
|||
sequence_length: 64 |
|||
summary_freq: 20000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 32 |
|||
buffer_size: 256 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 256 |
|||
num_layers: 1 |
|||
vis_encode_type: simple |
|||
gamma: 0.9 |
|||
gamma: 0.9 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 500000 |
|||
time_horizon: 5 |
|||
summary_freq: 20000 |
|||
threaded: true |
|
|||
behaviors: |
|||
Hallway: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.01 |
|||
buffer_size: 1024 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 1.0e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: true |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 128 |
|||
buffer_size: 1024 |
|||
learning_rate: 0.0003 |
|||
beta: 0.01 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 128 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
memory: |
|||
sequence_length: 64 |
|||
memory_size: 128 |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 10000000 |
|||
time_horizon: 64 |
|||
summary_freq: 10000 |
|||
threaded: true |
|
|||
behaviors: |
|||
PushBlock: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.01 |
|||
buffer_size: 2048 |
|||
epsilon: 0.2 |
|||
hidden_units: 256 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 2.0e6 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 60000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 128 |
|||
buffer_size: 2048 |
|||
learning_rate: 0.0003 |
|||
beta: 0.01 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 256 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 2000000 |
|||
time_horizon: 64 |
|||
summary_freq: 60000 |
|||
threaded: true |
|
|||
behaviors: |
|||
Pyramids: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.01 |
|||
buffer_size: 2048 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 1.0e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 128 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 128 |
|||
buffer_size: 2048 |
|||
learning_rate: 0.0003 |
|||
beta: 0.01 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 512 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
strength: 1.0 |
|||
strength: 1.0 |
|||
strength: 0.02 |
|||
strength: 0.02 |
|||
learning_rate: 0.0003 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 10000000 |
|||
time_horizon: 128 |
|||
summary_freq: 30000 |
|||
threaded: true |
|
|||
behaviors: |
|||
Reacher: |
|||
trainer: ppo |
|||
batch_size: 2024 |
|||
beta: 0.005 |
|||
buffer_size: 20240 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 2e7 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 60000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 2024 |
|||
buffer_size: 20240 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 128 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
gamma: 0.995 |
|||
gamma: 0.995 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 20000000 |
|||
time_horizon: 1000 |
|||
summary_freq: 60000 |
|||
threaded: true |
|
|||
behaviors: |
|||
SoccerTwos: |
|||
trainer: ppo |
|||
batch_size: 2048 |
|||
beta: 0.005 |
|||
buffer_size: 20480 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 5.0e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 2048 |
|||
buffer_size: 20480 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: constant |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 512 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
strength: 1.0 |
|||
strength: 1.0 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 50000000 |
|||
time_horizon: 1000 |
|||
summary_freq: 10000 |
|||
threaded: true |
|||
window: 10 |
|||
play_against_latest_model_ratio: 0.5 |
|||
swap_steps: 50000 |
|||
curriculum: |
|||
measure: progress |
|||
thresholds: [0.05, 0.1] |
|||
min_lesson_length: 100 |
|||
signal_smoothing: true |
|||
parameters: |
|||
ball_touch: [1.0, 0.5, 0.0] |
|||
swap_steps: 50000 |
|||
window: 10 |
|||
play_against_latest_model_ratio: 0.5 |
|||
initial_elo: 1200.0 |
|
|||
behaviors: |
|||
Goalie: |
|||
trainer: ppo |
|||
batch_size: 2048 |
|||
beta: 0.005 |
|||
buffer_size: 20480 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 5.0e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 2048 |
|||
buffer_size: 20480 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: constant |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 512 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 50000000 |
|||
time_horizon: 1000 |
|||
summary_freq: 10000 |
|||
threaded: true |
|||
window: 10 |
|||
play_against_latest_model_ratio: 0.5 |
|||
swap_steps: 25000 |
|||
|
|||
swap_steps: 25000 |
|||
window: 10 |
|||
play_against_latest_model_ratio: 0.5 |
|||
initial_elo: 1200.0 |
|||
trainer: ppo |
|||
batch_size: 2048 |
|||
beta: 0.005 |
|||
buffer_size: 20480 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 5.0e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 2048 |
|||
buffer_size: 20480 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: constant |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 512 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 50000000 |
|||
time_horizon: 1000 |
|||
summary_freq: 10000 |
|||
threaded: true |
|||
window: 10 |
|||
play_against_latest_model_ratio: 0.5 |
|||
team_change: 200000 |
|||
team_change: 200000 |
|||
window: 10 |
|||
play_against_latest_model_ratio: 0.5 |
|||
initial_elo: 1200.0 |
|
|||
behaviors: |
|||
Tennis: |
|||
trainer: ppo |
|||
batch_size: 1024 |
|||
beta: 0.005 |
|||
buffer_size: 10240 |
|||
epsilon: 0.2 |
|||
hidden_units: 256 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 5.0e7 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 1024 |
|||
buffer_size: 10240 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: constant |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 256 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 50000000 |
|||
time_horizon: 1000 |
|||
summary_freq: 10000 |
|||
threaded: true |
|||
window: 10 |
|||
play_against_latest_model_ratio: 0.5 |
|||
team_change: 100000 |
|||
team_change: 100000 |
|||
window: 10 |
|||
play_against_latest_model_ratio: 0.5 |
|||
initial_elo: 1200.0 |
|
|||
behaviors: |
|||
VisualHallway: |
|||
trainer: ppo |
|||
batch_size: 64 |
|||
beta: 0.01 |
|||
buffer_size: 1024 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 1.0e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 1 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: true |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 64 |
|||
buffer_size: 1024 |
|||
learning_rate: 0.0003 |
|||
beta: 0.01 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 128 |
|||
num_layers: 1 |
|||
vis_encode_type: simple |
|||
memory: |
|||
sequence_length: 64 |
|||
memory_size: 128 |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 10000000 |
|||
time_horizon: 64 |
|||
summary_freq: 10000 |
|||
threaded: true |
|
|||
behaviors: |
|||
VisualPushBlock: |
|||
trainer: ppo |
|||
batch_size: 64 |
|||
beta: 0.01 |
|||
buffer_size: 1024 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 3.0e6 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 1 |
|||
time_horizon: 64 |
|||
sequence_length: 32 |
|||
summary_freq: 60000 |
|||
use_recurrent: true |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 64 |
|||
buffer_size: 1024 |
|||
learning_rate: 0.0003 |
|||
beta: 0.01 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 128 |
|||
num_layers: 1 |
|||
vis_encode_type: simple |
|||
memory: |
|||
sequence_length: 32 |
|||
memory_size: 128 |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 3000000 |
|||
time_horizon: 64 |
|||
summary_freq: 60000 |
|||
threaded: true |
|
|||
behaviors: |
|||
VisualPyramids: |
|||
trainer: ppo |
|||
batch_size: 64 |
|||
beta: 0.01 |
|||
buffer_size: 2024 |
|||
epsilon: 0.2 |
|||
hidden_units: 256 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 1.0e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 1 |
|||
time_horizon: 128 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 64 |
|||
buffer_size: 2024 |
|||
learning_rate: 0.0003 |
|||
beta: 0.01 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 256 |
|||
num_layers: 1 |
|||
vis_encode_type: simple |
|||
strength: 1.0 |
|||
strength: 1.0 |
|||
strength: 0.01 |
|||
strength: 0.01 |
|||
learning_rate: 0.0003 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 10000000 |
|||
time_horizon: 128 |
|||
summary_freq: 10000 |
|||
threaded: true |
|
|||
behaviors: |
|||
Walker: |
|||
trainer: ppo |
|||
batch_size: 2048 |
|||
beta: 0.005 |
|||
buffer_size: 20480 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 2e7 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 3 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 2048 |
|||
buffer_size: 20480 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 512 |
|||
num_layers: 3 |
|||
vis_encode_type: simple |
|||
gamma: 0.995 |
|||
gamma: 0.995 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 20000000 |
|||
time_horizon: 1000 |
|||
summary_freq: 30000 |
|||
threaded: true |
|
|||
behaviors: |
|||
BigWallJump: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.005 |
|||
buffer_size: 2048 |
|||
epsilon: 0.2 |
|||
hidden_units: 256 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 2e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 128 |
|||
sequence_length: 64 |
|||
summary_freq: 20000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 128 |
|||
buffer_size: 2048 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 256 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
strength: 1.0 |
|||
|
|||
SmallWallJump: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.005 |
|||
buffer_size: 2048 |
|||
epsilon: 0.2 |
|||
hidden_units: 256 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 5e6 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
strength: 1.0 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 20000000 |
|||
sequence_length: 64 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
threaded: true |
|||
SmallWallJump: |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 128 |
|||
buffer_size: 2048 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 256 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 5000000 |
|||
time_horizon: 128 |
|||
summary_freq: 20000 |
|||
threaded: true |
|
|||
behaviors: |
|||
BigWallJump: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.005 |
|||
buffer_size: 2048 |
|||
epsilon: 0.2 |
|||
hidden_units: 256 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 2e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 128 |
|||
sequence_length: 64 |
|||
summary_freq: 20000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 128 |
|||
buffer_size: 2048 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 256 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
curriculum: |
|||
measure: progress |
|||
thresholds: [0.1, 0.3, 0.5] |
|||
min_lesson_length: 100 |
|||
signal_smoothing: true |
|||
parameters: |
|||
big_wall_min_height: [0.0, 4.0, 6.0, 8.0] |
|||
big_wall_max_height: [4.0, 7.0, 8.0, 8.0] |
|||
|
|||
SmallWallJump: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.005 |
|||
buffer_size: 2048 |
|||
epsilon: 0.2 |
|||
hidden_units: 256 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 5e6 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 20000000 |
|||
sequence_length: 64 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
threaded: true |
|||
SmallWallJump: |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 128 |
|||
buffer_size: 2048 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 256 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|||
curriculum: |
|||
measure: progress |
|||
thresholds: [0.1, 0.3, 0.5] |
|||
min_lesson_length: 100 |
|||
signal_smoothing: true |
|||
parameters: |
|||
small_wall_height: [1.5, 2.0, 2.5, 4.0] |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 5000000 |
|||
time_horizon: 128 |
|||
summary_freq: 20000 |
|||
threaded: true |
|||
|
|||
curriculum: |
|||
BigWallJump: |
|||
measure: progress |
|||
thresholds: [0.1, 0.3, 0.5] |
|||
min_lesson_length: 100 |
|||
signal_smoothing: true |
|||
parameters: |
|||
big_wall_min_height: [0.0, 4.0, 6.0, 8.0] |
|||
big_wall_max_height: [4.0, 7.0, 8.0, 8.0] |
|||
SmallWallJump: |
|||
measure: progress |
|||
thresholds: [0.1, 0.3, 0.5] |
|||
min_lesson_length: 100 |
|||
signal_smoothing: true |
|||
parameters: |
|||
small_wall_height: [1.5, 2.0, 2.5, 4.0] |
|
|||
behaviors: |
|||
WormDynamic: |
|||
trainer: ppo |
|||
batch_size: 2024 |
|||
beta: 0.005 |
|||
buffer_size: 20240 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 3.5e6 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 3 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 2024 |
|||
buffer_size: 20240 |
|||
learning_rate: 0.0003 |
|||
beta: 0.005 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 512 |
|||
num_layers: 3 |
|||
vis_encode_type: simple |
|||
gamma: 0.995 |
|||
gamma: 0.995 |
|||
output_path: default |
|||
keep_checkpoints: 5 |
|||
max_steps: 3500000 |
|||
time_horizon: 1000 |
|||
summary_freq: 30000 |
|||
threaded: true |