GitHub
5 年前
当前提交
f86fc81d
共有 76 个文件被更改,包括 1650 次插入 和 641 次删除
-
3com.unity.ml-agents/CHANGELOG.md
-
2docs/Feature-Memory.md
-
10docs/Getting-Started.md
-
43docs/Learning-Environment-Create-New.md
-
4docs/Learning-Environment-Examples.md
-
6docs/Learning-Environment-Executable.md
-
15docs/Migrating.md
-
4docs/Reward-Signals.md
-
61docs/Training-Curriculum-Learning.md
-
52docs/Training-Environment-Parameter-Randomization.md
-
3docs/Training-Imitation-Learning.md
-
36docs/Training-ML-Agents.md
-
2docs/Training-Using-Concurrent-Unity-Instances.md
-
2gym-unity/README.md
-
51ml-agents/mlagents/trainers/learn.py
-
45ml-agents/mlagents/trainers/tests/test_learn.py
-
41ml-agents/mlagents/trainers/tests/test_trainer_util.py
-
25ml-agents/mlagents/trainers/trainer_util.py
-
2ml-agents/tests/yamato/training_int_tests.py
-
3ml-agents/tests/yamato/yamato_utils.py
-
29config/imitation/CrawlerStatic.yaml
-
29config/imitation/FoodCollector.yaml
-
28config/imitation/Hallway.yaml
-
25config/imitation/PushBlock.yaml
-
36config/imitation/Pyramids.yaml
-
25config/ppo/3DBall.yaml
-
25config/ppo/3DBallHard.yaml
-
40config/ppo/3DBall_randomize.yaml
-
25config/ppo/Basic.yaml
-
25config/ppo/Bouncer.yaml
-
25config/ppo/CrawlerDynamic.yaml
-
25config/ppo/CrawlerStatic.yaml
-
25config/ppo/FoodCollector.yaml
-
25config/ppo/GridWorld.yaml
-
25config/ppo/Hallway.yaml
-
25config/ppo/PushBlock.yaml
-
29config/ppo/Pyramids.yaml
-
25config/ppo/Reacher.yaml
-
38config/ppo/SoccerTwos.yaml
-
62config/ppo/StrikersVsGoalie.yaml
-
31config/ppo/Tennis.yaml
-
25config/ppo/VisualHallway.yaml
-
25config/ppo/VisualPushBlock.yaml
-
29config/ppo/VisualPyramids.yaml
-
25config/ppo/Walker.yaml
-
50config/ppo/WallJump.yaml
-
65config/ppo/WallJump_curriculum.yaml
-
25config/ppo/WormDynamic.yaml
-
25config/ppo/WormStatic.yaml
-
25config/sac/3DBall.yaml
-
25config/sac/3DBallHard.yaml
-
25config/sac/Basic.yaml
-
25config/sac/Bouncer.yaml
-
25config/sac/CrawlerDynamic.yaml
-
25config/sac/CrawlerStatic.yaml
-
25config/sac/FoodCollector.yaml
-
25config/sac/GridWorld.yaml
-
25config/sac/Hallway.yaml
-
25config/sac/PushBlock.yaml
-
31config/sac/Pyramids.yaml
-
25config/sac/Reacher.yaml
-
30config/sac/Tennis.yaml
-
26config/sac/VisualHallway.yaml
-
26config/sac/VisualPushBlock.yaml
-
31config/sac/VisualPyramids.yaml
-
25config/sac/Walker.yaml
-
50config/sac/WallJump.yaml
-
129config/gail_config.yaml
-
16config/3dball_randomize.yaml
-
351config/trainer_config.yaml
|
|||
behaviors: |
|||
CrawlerStatic: |
|||
trainer: ppo |
|||
batch_size: 2024 |
|||
beta: 0.005 |
|||
buffer_size: 20240 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
max_steps: 1e7 |
|||
memory_size: 256 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 3 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
use_recurrent: false |
|||
reward_signals: |
|||
gail: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
encoding_size: 128 |
|||
demo_path: Project/Assets/ML-Agents/Examples/Crawler/Demos/ExpertCrawlerSta.demo |
|||
behavioral_cloning: |
|||
demo_path: Project/Assets/ML-Agents/Examples/Crawler/Demos/ExpertCrawlerSta.demo |
|||
strength: 0.5 |
|||
steps: 50000 |
|
|||
behaviors: |
|||
FoodCollector: |
|||
trainer: ppo |
|||
batch_size: 64 |
|||
beta: 0.005 |
|||
buffer_size: 10240 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
max_steps: 2.0e6 |
|||
memory_size: 256 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 32 |
|||
summary_freq: 10000 |
|||
use_recurrent: false |
|||
reward_signals: |
|||
gail: |
|||
strength: 0.1 |
|||
gamma: 0.99 |
|||
encoding_size: 128 |
|||
demo_path: Project/Assets/ML-Agents/Examples/FoodCollector/Demos/ExpertFood.demo |
|||
behavioral_cloning: |
|||
demo_path: Project/Assets/ML-Agents/Examples/FoodCollector/Demos/ExpertFood.demo |
|||
strength: 1.0 |
|||
steps: 0 |
|
|||
behaviors: |
|||
Hallway: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.01 |
|||
buffer_size: 1024 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
max_steps: 1.0e7 |
|||
memory_size: 256 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: true |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
gail: |
|||
strength: 0.1 |
|||
gamma: 0.99 |
|||
encoding_size: 128 |
|||
demo_path: Project/Assets/ML-Agents/Examples/Hallway/Demos/ExpertHallway.demo |
|
|||
behaviors: |
|||
PushBlock: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.01 |
|||
buffer_size: 2048 |
|||
epsilon: 0.2 |
|||
hidden_units: 256 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
max_steps: 1.5e7 |
|||
memory_size: 256 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 60000 |
|||
use_recurrent: false |
|||
reward_signals: |
|||
gail: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
encoding_size: 128 |
|||
demo_path: Project/Assets/ML-Agents/Examples/PushBlock/Demos/ExpertPush.demo |
|
|||
behaviors: |
|||
Pyramids: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.01 |
|||
buffer_size: 2048 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
max_steps: 1.0e7 |
|||
memory_size: 256 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 128 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
use_recurrent: false |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
curiosity: |
|||
strength: 0.02 |
|||
gamma: 0.99 |
|||
encoding_size: 256 |
|||
gail: |
|||
strength: 0.01 |
|||
gamma: 0.99 |
|||
encoding_size: 128 |
|||
demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo |
|||
behavioral_cloning: |
|||
demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo |
|||
strength: 0.5 |
|||
steps: 150000 |
|
|||
behaviors: |
|||
3DBall: |
|||
trainer: ppo |
|||
batch_size: 64 |
|||
beta: 0.001 |
|||
buffer_size: 12000 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.99 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 5.0e5 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 12000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|
|||
behaviors: |
|||
3DBallHard: |
|||
trainer: ppo |
|||
batch_size: 1200 |
|||
beta: 0.001 |
|||
buffer_size: 12000 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 5.0e6 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 12000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.995 |
|
|||
behaviors: |
|||
3DBall: |
|||
trainer: ppo |
|||
batch_size: 64 |
|||
beta: 0.001 |
|||
buffer_size: 12000 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.99 |
|||
learning_rate: 3.0e-4 |
|||
learning_rate_schedule: linear |
|||
max_steps: 5.0e5 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 12000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
|
|||
parameter_randomization: |
|||
resampling-interval: 500 |
|||
mass: |
|||
sampler-type: "uniform" |
|||
min_value: 0.5 |
|||
max_value: 10 |
|||
gravity: |
|||
sampler-type: "uniform" |
|||
min_value: 7 |
|||
max_value: 12 |
|||
scale: |
|||
sampler-type: "uniform" |
|||
min_value: 0.75 |
|||
max_value: 3 |
|
|||
behaviors: |
|||
Basic: |
|||
trainer: ppo |
|||
batch_size: 32 |
|||
beta: 0.005 |
|||
buffer_size: 256 |
|||
epsilon: 0.2 |
|||
hidden_units: 20 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 5.0e5 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 1 |
|||
time_horizon: 3 |
|||
sequence_length: 64 |
|||
summary_freq: 2000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.9 |
|
|||
behaviors: |
|||
Bouncer: |
|||
trainer: ppo |
|||
batch_size: 1024 |
|||
beta: 0.005 |
|||
buffer_size: 10240 |
|||
epsilon: 0.2 |
|||
hidden_units: 64 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 4.0e6 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|
|||
behaviors: |
|||
CrawlerDynamic: |
|||
trainer: ppo |
|||
batch_size: 2024 |
|||
beta: 0.005 |
|||
buffer_size: 20240 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 1e7 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 3 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.995 |
|
|||
behaviors: |
|||
CrawlerStatic: |
|||
trainer: ppo |
|||
batch_size: 2024 |
|||
beta: 0.005 |
|||
buffer_size: 20240 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 1e7 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 3 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.995 |
|
|||
behaviors: |
|||
FoodCollector: |
|||
trainer: ppo |
|||
batch_size: 1024 |
|||
beta: 0.005 |
|||
buffer_size: 10240 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 2.0e6 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|
|||
behaviors: |
|||
GridWorld: |
|||
trainer: ppo |
|||
batch_size: 32 |
|||
beta: 0.005 |
|||
buffer_size: 256 |
|||
epsilon: 0.2 |
|||
hidden_units: 256 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 500000 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 1 |
|||
time_horizon: 5 |
|||
sequence_length: 64 |
|||
summary_freq: 20000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.9 |
|
|||
behaviors: |
|||
Hallway: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.01 |
|||
buffer_size: 1024 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 1.0e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: true |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|
|||
behaviors: |
|||
PushBlock: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.01 |
|||
buffer_size: 2048 |
|||
epsilon: 0.2 |
|||
hidden_units: 256 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 2.0e6 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 60000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|
|||
behaviors: |
|||
Pyramids: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.01 |
|||
buffer_size: 2048 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 1.0e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 128 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
curiosity: |
|||
strength: 0.02 |
|||
gamma: 0.99 |
|||
encoding_size: 256 |
|
|||
behaviors: |
|||
Reacher: |
|||
trainer: ppo |
|||
batch_size: 2024 |
|||
beta: 0.005 |
|||
buffer_size: 20240 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 2e7 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 60000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.995 |
|
|||
behaviors: |
|||
SoccerTwos: |
|||
trainer: ppo |
|||
batch_size: 2048 |
|||
beta: 0.005 |
|||
buffer_size: 20480 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 5.0e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
self_play: |
|||
window: 10 |
|||
play_against_latest_model_ratio: 0.5 |
|||
save_steps: 50000 |
|||
swap_steps: 50000 |
|||
team_change: 200000 |
|||
curriculum: |
|||
measure: progress |
|||
thresholds: [0.05, 0.1] |
|||
min_lesson_length: 100 |
|||
signal_smoothing: true |
|||
parameters: |
|||
ball_touch: [1.0, 0.5, 0.0] |
|
|||
behaviors: |
|||
Goalie: |
|||
trainer: ppo |
|||
batch_size: 2048 |
|||
beta: 0.005 |
|||
buffer_size: 20480 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 5.0e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
self_play: |
|||
window: 10 |
|||
play_against_latest_model_ratio: 0.5 |
|||
save_steps: 50000 |
|||
swap_steps: 25000 |
|||
team_change: 200000 |
|||
|
|||
Striker: |
|||
trainer: ppo |
|||
batch_size: 2048 |
|||
beta: 0.005 |
|||
buffer_size: 20480 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 5.0e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
self_play: |
|||
window: 10 |
|||
play_against_latest_model_ratio: 0.5 |
|||
save_steps: 50000 |
|||
swap_steps: 100000 |
|||
team_change: 200000 |
|
|||
behaviors: |
|||
Tennis: |
|||
trainer: ppo |
|||
batch_size: 1024 |
|||
beta: 0.005 |
|||
buffer_size: 10240 |
|||
epsilon: 0.2 |
|||
hidden_units: 256 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 5.0e7 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
self_play: |
|||
window: 10 |
|||
play_against_latest_model_ratio: 0.5 |
|||
save_steps: 50000 |
|||
swap_steps: 50000 |
|||
team_change: 100000 |
|
|||
behaviors: |
|||
VisualHallway: |
|||
trainer: ppo |
|||
batch_size: 64 |
|||
beta: 0.01 |
|||
buffer_size: 1024 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 1.0e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 1 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: true |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|
|||
behaviors: |
|||
VisualPushBlock: |
|||
trainer: ppo |
|||
batch_size: 64 |
|||
beta: 0.01 |
|||
buffer_size: 1024 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 3.0e6 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 1 |
|||
time_horizon: 64 |
|||
sequence_length: 32 |
|||
summary_freq: 60000 |
|||
use_recurrent: true |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|
|||
behaviors: |
|||
VisualPyramids: |
|||
trainer: ppo |
|||
batch_size: 64 |
|||
beta: 0.01 |
|||
buffer_size: 2024 |
|||
epsilon: 0.2 |
|||
hidden_units: 256 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 1.0e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 1 |
|||
time_horizon: 128 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
curiosity: |
|||
strength: 0.01 |
|||
gamma: 0.99 |
|||
encoding_size: 256 |
|
|||
behaviors: |
|||
Walker: |
|||
trainer: ppo |
|||
batch_size: 2048 |
|||
beta: 0.005 |
|||
buffer_size: 20480 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 2e7 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 3 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.995 |
|
|||
behaviors: |
|||
BigWallJump: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.005 |
|||
buffer_size: 2048 |
|||
epsilon: 0.2 |
|||
hidden_units: 256 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 2e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 128 |
|||
sequence_length: 64 |
|||
summary_freq: 20000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
|
|||
SmallWallJump: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.005 |
|||
buffer_size: 2048 |
|||
epsilon: 0.2 |
|||
hidden_units: 256 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 5e6 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 128 |
|||
sequence_length: 64 |
|||
summary_freq: 20000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|
|||
behaviors: |
|||
BigWallJump: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.005 |
|||
buffer_size: 2048 |
|||
epsilon: 0.2 |
|||
hidden_units: 256 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 2e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 128 |
|||
sequence_length: 64 |
|||
summary_freq: 20000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
curriculum: |
|||
measure: progress |
|||
thresholds: [0.1, 0.3, 0.5] |
|||
min_lesson_length: 100 |
|||
signal_smoothing: true |
|||
parameters: |
|||
big_wall_min_height: [0.0, 4.0, 6.0, 8.0] |
|||
big_wall_max_height: [4.0, 7.0, 8.0, 8.0] |
|||
|
|||
SmallWallJump: |
|||
trainer: ppo |
|||
batch_size: 128 |
|||
beta: 0.005 |
|||
buffer_size: 2048 |
|||
epsilon: 0.2 |
|||
hidden_units: 256 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 5e6 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 128 |
|||
sequence_length: 64 |
|||
summary_freq: 20000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
curriculum: |
|||
measure: progress |
|||
thresholds: [0.1, 0.3, 0.5] |
|||
min_lesson_length: 100 |
|||
signal_smoothing: true |
|||
parameters: |
|||
small_wall_height: [1.5, 2.0, 2.5, 4.0] |
|
|||
behaviors: |
|||
WormDynamic: |
|||
trainer: ppo |
|||
batch_size: 2024 |
|||
beta: 0.005 |
|||
buffer_size: 20240 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 3.5e6 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 3 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.995 |
|
|||
behaviors: |
|||
WormStatic: |
|||
trainer: ppo |
|||
batch_size: 2024 |
|||
beta: 0.005 |
|||
buffer_size: 20240 |
|||
epsilon: 0.2 |
|||
hidden_units: 512 |
|||
lambd: 0.95 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: linear |
|||
max_steps: 3.5e6 |
|||
memory_size: 128 |
|||
normalize: true |
|||
num_epoch: 3 |
|||
num_layers: 3 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.995 |
|
|||
behaviors: |
|||
3DBall: |
|||
trainer: sac |
|||
batch_size: 64 |
|||
buffer_size: 12000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 64 |
|||
init_entcoef: 0.5 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 5.0e5 |
|||
memory_size: 128 |
|||
normalize: true |
|||
steps_per_update: 10 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 12000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|
|||
behaviors: |
|||
3DBallHard: |
|||
trainer: sac |
|||
batch_size: 256 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 128 |
|||
init_entcoef: 1.0 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 5.0e5 |
|||
memory_size: 128 |
|||
normalize: true |
|||
steps_per_update: 10 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 12000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|
|||
behaviors: |
|||
Basic: |
|||
trainer: sac |
|||
batch_size: 64 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 20 |
|||
init_entcoef: 0.01 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 5.0e5 |
|||
memory_size: 128 |
|||
normalize: false |
|||
steps_per_update: 10 |
|||
num_layers: 2 |
|||
time_horizon: 10 |
|||
sequence_length: 64 |
|||
summary_freq: 2000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|
|||
behaviors: |
|||
Bouncer: |
|||
trainer: sac |
|||
batch_size: 128 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 64 |
|||
init_entcoef: 1.0 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 1.0e6 |
|||
memory_size: 128 |
|||
normalize: true |
|||
steps_per_update: 10 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 20000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|
|||
behaviors: |
|||
CrawlerDynamic: |
|||
trainer: sac |
|||
batch_size: 256 |
|||
buffer_size: 500000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 512 |
|||
init_entcoef: 1.0 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 5e6 |
|||
memory_size: 128 |
|||
normalize: true |
|||
steps_per_update: 20 |
|||
num_layers: 3 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.995 |
|
|||
behaviors: |
|||
CrawlerStatic: |
|||
trainer: sac |
|||
batch_size: 256 |
|||
buffer_size: 500000 |
|||
buffer_init_steps: 2000 |
|||
hidden_units: 512 |
|||
init_entcoef: 1.0 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 3e6 |
|||
memory_size: 128 |
|||
normalize: true |
|||
steps_per_update: 20 |
|||
num_layers: 3 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.995 |
|
|||
behaviors: |
|||
FoodCollector: |
|||
trainer: sac |
|||
batch_size: 256 |
|||
buffer_size: 500000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 128 |
|||
init_entcoef: 0.05 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 2.0e6 |
|||
memory_size: 128 |
|||
normalize: false |
|||
steps_per_update: 10 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|
|||
behaviors: |
|||
GridWorld: |
|||
trainer: sac |
|||
batch_size: 128 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 1000 |
|||
hidden_units: 128 |
|||
init_entcoef: 0.5 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 500000 |
|||
memory_size: 128 |
|||
normalize: false |
|||
steps_per_update: 10 |
|||
num_layers: 1 |
|||
time_horizon: 5 |
|||
sequence_length: 64 |
|||
summary_freq: 20000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.9 |
|
|||
behaviors: |
|||
Hallway: |
|||
trainer: sac |
|||
batch_size: 128 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 128 |
|||
init_entcoef: 0.1 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 5.0e6 |
|||
memory_size: 128 |
|||
normalize: false |
|||
steps_per_update: 10 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 32 |
|||
summary_freq: 10000 |
|||
tau: 0.005 |
|||
use_recurrent: true |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|
|||
behaviors: |
|||
PushBlock: |
|||
trainer: sac |
|||
batch_size: 128 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 256 |
|||
init_entcoef: 0.05 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 2e6 |
|||
memory_size: 128 |
|||
normalize: false |
|||
steps_per_update: 10 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 100000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|
|||
behaviors: |
|||
Pyramids: |
|||
trainer: sac |
|||
batch_size: 128 |
|||
buffer_size: 500000 |
|||
buffer_init_steps: 10000 |
|||
hidden_units: 256 |
|||
init_entcoef: 0.01 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 1.0e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
steps_per_update: 10 |
|||
num_layers: 2 |
|||
time_horizon: 128 |
|||
sequence_length: 16 |
|||
summary_freq: 30000 |
|||
tau: 0.01 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 2.0 |
|||
gamma: 0.99 |
|||
gail: |
|||
strength: 0.02 |
|||
gamma: 0.99 |
|||
encoding_size: 128 |
|||
use_actions: true |
|||
demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo |
|
|||
behaviors: |
|||
Reacher: |
|||
trainer: sac |
|||
batch_size: 128 |
|||
buffer_size: 500000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 128 |
|||
init_entcoef: 1.0 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 2e7 |
|||
memory_size: 128 |
|||
normalize: true |
|||
steps_per_update: 20 |
|||
num_layers: 2 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 60000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|
|||
behaviors: |
|||
Tennis: |
|||
trainer: sac |
|||
batch_size: 128 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 256 |
|||
init_entcoef: 1.0 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 2e7 |
|||
memory_size: 128 |
|||
normalize: true |
|||
steps_per_update: 10 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
self_play: |
|||
window: 10 |
|||
play_against_current_self_ratio: 0.5 |
|||
save_steps: 50000 |
|||
swap_steps: 50000 |
|
|||
behaviors: |
|||
VisualHallway: |
|||
trainer: sac |
|||
batch_size: 64 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 128 |
|||
init_entcoef: 1.0 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 1.0e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
steps_per_update: 10 |
|||
num_layers: 1 |
|||
time_horizon: 64 |
|||
sequence_length: 32 |
|||
summary_freq: 10000 |
|||
tau: 0.005 |
|||
use_recurrent: true |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|
|||
behaviors: |
|||
VisualPushBlock: |
|||
trainer: sac |
|||
batch_size: 64 |
|||
buffer_size: 1024 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 128 |
|||
init_entcoef: 1.0 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 3.0e6 |
|||
memory_size: 128 |
|||
normalize: false |
|||
steps_per_update: 10 |
|||
num_layers: 1 |
|||
time_horizon: 64 |
|||
sequence_length: 32 |
|||
summary_freq: 60000 |
|||
tau: 0.005 |
|||
use_recurrent: true |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
gamma: 0.99 |
|
|||
behaviors: |
|||
VisualPyramids: |
|||
trainer: sac |
|||
batch_size: 64 |
|||
buffer_size: 500000 |
|||
buffer_init_steps: 1000 |
|||
hidden_units: 256 |
|||
init_entcoef: 0.01 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 1.0e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
steps_per_update: 10 |
|||
num_layers: 1 |
|||
time_horizon: 128 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
tau: 0.01 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 2.0 |
|||
gamma: 0.99 |
|||
gail: |
|||
strength: 0.02 |
|||
gamma: 0.99 |
|||
encoding_size: 128 |
|||
use_actions: true |
|||
demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo |
|
|||
behaviors: |
|||
Walker: |
|||
trainer: sac |
|||
batch_size: 256 |
|||
buffer_size: 500000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 512 |
|||
init_entcoef: 1.0 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 2e7 |
|||
memory_size: 128 |
|||
normalize: true |
|||
steps_per_update: 30 |
|||
num_layers: 4 |
|||
time_horizon: 1000 |
|||
sequence_length: 64 |
|||
summary_freq: 30000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.995 |
|
|||
behaviors: |
|||
BigWallJump: |
|||
trainer: sac |
|||
batch_size: 128 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 256 |
|||
init_entcoef: 0.1 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 2e7 |
|||
memory_size: 128 |
|||
normalize: false |
|||
steps_per_update: 10 |
|||
num_layers: 2 |
|||
time_horizon: 128 |
|||
sequence_length: 64 |
|||
summary_freq: 20000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
|
|||
SmallWallJump: |
|||
trainer: sac |
|||
batch_size: 128 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 256 |
|||
init_entcoef: 0.1 |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
max_steps: 5e6 |
|||
memory_size: 128 |
|||
normalize: false |
|||
steps_per_update: 10 |
|||
num_layers: 2 |
|||
time_horizon: 128 |
|||
sequence_length: 64 |
|||
summary_freq: 20000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|
|||
default: |
|||
trainer: ppo |
|||
batch_size: 1024 |
|||
beta: 5.0e-3 |
|||
buffer_size: 10240 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 3.0e-4 |
|||
max_steps: 5.0e5 |
|||
memory_size: 256 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: false |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
|
|||
Pyramids: |
|||
summary_freq: 30000 |
|||
time_horizon: 128 |
|||
batch_size: 128 |
|||
buffer_size: 2048 |
|||
hidden_units: 512 |
|||
num_layers: 2 |
|||
beta: 1.0e-2 |
|||
max_steps: 1.0e7 |
|||
num_epoch: 3 |
|||
behavioral_cloning: |
|||
demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo |
|||
strength: 0.5 |
|||
steps: 150000 |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
curiosity: |
|||
strength: 0.02 |
|||
gamma: 0.99 |
|||
encoding_size: 256 |
|||
gail: |
|||
strength: 0.01 |
|||
gamma: 0.99 |
|||
encoding_size: 128 |
|||
demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo |
|||
|
|||
CrawlerStatic: |
|||
normalize: true |
|||
num_epoch: 3 |
|||
time_horizon: 1000 |
|||
batch_size: 2024 |
|||
buffer_size: 20240 |
|||
max_steps: 1e7 |
|||
summary_freq: 30000 |
|||
num_layers: 3 |
|||
hidden_units: 512 |
|||
behavioral_cloning: |
|||
demo_path: Project/Assets/ML-Agents/Examples/Crawler/Demos/ExpertCrawlerSta.demo |
|||
strength: 0.5 |
|||
steps: 50000 |
|||
reward_signals: |
|||
gail: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
encoding_size: 128 |
|||
demo_path: Project/Assets/ML-Agents/Examples/Crawler/Demos/ExpertCrawlerSta.demo |
|||
|
|||
PushBlock: |
|||
max_steps: 1.5e7 |
|||
batch_size: 128 |
|||
buffer_size: 2048 |
|||
beta: 1.0e-2 |
|||
hidden_units: 256 |
|||
summary_freq: 60000 |
|||
time_horizon: 64 |
|||
num_layers: 2 |
|||
reward_signals: |
|||
gail: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
encoding_size: 128 |
|||
demo_path: Project/Assets/ML-Agents/Examples/PushBlock/Demos/ExpertPush.demo |
|||
|
|||
Hallway: |
|||
use_recurrent: true |
|||
sequence_length: 64 |
|||
num_layers: 2 |
|||
hidden_units: 128 |
|||
memory_size: 256 |
|||
beta: 1.0e-2 |
|||
num_epoch: 3 |
|||
buffer_size: 1024 |
|||
batch_size: 128 |
|||
max_steps: 1.0e7 |
|||
summary_freq: 10000 |
|||
time_horizon: 64 |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
gail: |
|||
strength: 0.1 |
|||
gamma: 0.99 |
|||
encoding_size: 128 |
|||
demo_path: Project/Assets/ML-Agents/Examples/Hallway/Demos/ExpertHallway.demo |
|||
|
|||
FoodCollector: |
|||
batch_size: 64 |
|||
max_steps: 2.0e6 |
|||
use_recurrent: false |
|||
hidden_units: 128 |
|||
learning_rate: 3.0e-4 |
|||
num_layers: 2 |
|||
sequence_length: 32 |
|||
reward_signals: |
|||
gail: |
|||
strength: 0.1 |
|||
gamma: 0.99 |
|||
encoding_size: 128 |
|||
demo_path: Project/Assets/ML-Agents/Examples/FoodCollector/Demos/ExpertFood.demo |
|||
behavioral_cloning: |
|||
demo_path: Project/Assets/ML-Agents/Examples/FoodCollector/Demos/ExpertFood.demo |
|||
strength: 1.0 |
|||
steps: 0 |
|
|||
resampling-interval: 5000 |
|||
|
|||
mass: |
|||
sampler-type: "uniform" |
|||
min_value: 0.5 |
|||
max_value: 10 |
|||
|
|||
gravity: |
|||
sampler-type: "uniform" |
|||
min_value: 7 |
|||
max_value: 12 |
|||
|
|||
scale: |
|||
sampler-type: "uniform" |
|||
min_value: 0.75 |
|||
max_value: 3 |
|
|||
default: |
|||
trainer: ppo |
|||
batch_size: 1024 |
|||
beta: 5.0e-3 |
|||
buffer_size: 10240 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 3.0e-4 |
|||
learning_rate_schedule: linear |
|||
max_steps: 5.0e5 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
|
|||
FoodCollector: |
|||
normalize: false |
|||
beta: 5.0e-3 |
|||
batch_size: 1024 |
|||
buffer_size: 10240 |
|||
max_steps: 2.0e6 |
|||
|
|||
Bouncer: |
|||
normalize: true |
|||
max_steps: 4.0e6 |
|||
num_layers: 2 |
|||
hidden_units: 64 |
|||
|
|||
PushBlock: |
|||
max_steps: 2.0e6 |
|||
batch_size: 128 |
|||
buffer_size: 2048 |
|||
beta: 1.0e-2 |
|||
hidden_units: 256 |
|||
summary_freq: 60000 |
|||
time_horizon: 64 |
|||
num_layers: 2 |
|||
|
|||
SmallWallJump: |
|||
max_steps: 5e6 |
|||
batch_size: 128 |
|||
buffer_size: 2048 |
|||
beta: 5.0e-3 |
|||
hidden_units: 256 |
|||
summary_freq: 20000 |
|||
time_horizon: 128 |
|||
num_layers: 2 |
|||
normalize: false |
|||
|
|||
BigWallJump: |
|||
max_steps: 2e7 |
|||
batch_size: 128 |
|||
buffer_size: 2048 |
|||
beta: 5.0e-3 |
|||
hidden_units: 256 |
|||
summary_freq: 20000 |
|||
time_horizon: 128 |
|||
num_layers: 2 |
|||
normalize: false |
|||
|
|||
Pyramids: |
|||
summary_freq: 30000 |
|||
time_horizon: 128 |
|||
batch_size: 128 |
|||
buffer_size: 2048 |
|||
hidden_units: 512 |
|||
num_layers: 2 |
|||
beta: 1.0e-2 |
|||
max_steps: 1.0e7 |
|||
num_epoch: 3 |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
curiosity: |
|||
strength: 0.02 |
|||
gamma: 0.99 |
|||
encoding_size: 256 |
|||
|
|||
VisualPyramids: |
|||
time_horizon: 128 |
|||
batch_size: 64 |
|||
buffer_size: 2024 |
|||
hidden_units: 256 |
|||
num_layers: 1 |
|||
beta: 1.0e-2 |
|||
max_steps: 1.0e7 |
|||
num_epoch: 3 |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
curiosity: |
|||
strength: 0.01 |
|||
gamma: 0.99 |
|||
encoding_size: 256 |
|||
|
|||
3DBall: |
|||
normalize: true |
|||
batch_size: 64 |
|||
buffer_size: 12000 |
|||
summary_freq: 12000 |
|||
time_horizon: 1000 |
|||
lambd: 0.99 |
|||
beta: 0.001 |
|||
|
|||
3DBallHard: |
|||
normalize: true |
|||
batch_size: 1200 |
|||
buffer_size: 12000 |
|||
summary_freq: 12000 |
|||
time_horizon: 1000 |
|||
max_steps: 5.0e6 |
|||
beta: 0.001 |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.995 |
|||
|
|||
Tennis: |
|||
normalize: true |
|||
max_steps: 5.0e7 |
|||
learning_rate_schedule: constant |
|||
batch_size: 1024 |
|||
buffer_size: 10240 |
|||
hidden_units: 256 |
|||
time_horizon: 1000 |
|||
self_play: |
|||
window: 10 |
|||
play_against_latest_model_ratio: 0.5 |
|||
save_steps: 50000 |
|||
swap_steps: 50000 |
|||
team_change: 100000 |
|||
|
|||
Goalie: |
|||
normalize: false |
|||
max_steps: 5.0e7 |
|||
learning_rate_schedule: constant |
|||
batch_size: 2048 |
|||
buffer_size: 20480 |
|||
hidden_units: 512 |
|||
time_horizon: 1000 |
|||
num_layers: 2 |
|||
self_play: |
|||
window: 10 |
|||
play_against_latest_model_ratio: 0.5 |
|||
save_steps: 50000 |
|||
swap_steps: 25000 |
|||
team_change: 200000 |
|||
|
|||
Striker: |
|||
normalize: false |
|||
max_steps: 5.0e7 |
|||
learning_rate_schedule: constant |
|||
batch_size: 2048 |
|||
buffer_size: 20480 |
|||
hidden_units: 512 |
|||
time_horizon: 1000 |
|||
num_layers: 2 |
|||
self_play: |
|||
window: 10 |
|||
play_against_latest_model_ratio: 0.5 |
|||
save_steps: 50000 |
|||
swap_steps: 100000 |
|||
team_change: 200000 |
|||
|
|||
SoccerTwos: |
|||
normalize: false |
|||
max_steps: 5.0e7 |
|||
learning_rate_schedule: constant |
|||
batch_size: 2048 |
|||
buffer_size: 20480 |
|||
hidden_units: 512 |
|||
time_horizon: 1000 |
|||
num_layers: 2 |
|||
self_play: |
|||
window: 10 |
|||
play_against_latest_model_ratio: 0.5 |
|||
save_steps: 50000 |
|||
swap_steps: 50000 |
|||
team_change: 200000 |
|||
|
|||
CrawlerStatic: |
|||
normalize: true |
|||
num_epoch: 3 |
|||
time_horizon: 1000 |
|||
batch_size: 2024 |
|||
buffer_size: 20240 |
|||
max_steps: 1e7 |
|||
summary_freq: 30000 |
|||
num_layers: 3 |
|||
hidden_units: 512 |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.995 |
|||
|
|||
CrawlerDynamic: |
|||
normalize: true |
|||
num_epoch: 3 |
|||
time_horizon: 1000 |
|||
batch_size: 2024 |
|||
buffer_size: 20240 |
|||
max_steps: 1e7 |
|||
summary_freq: 30000 |
|||
num_layers: 3 |
|||
hidden_units: 512 |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.995 |
|||
|
|||
WormDynamic: |
|||
normalize: true |
|||
num_epoch: 3 |
|||
time_horizon: 1000 |
|||
batch_size: 2024 |
|||
buffer_size: 20240 |
|||
max_steps: 3.5e6 |
|||
summary_freq: 30000 |
|||
num_layers: 3 |
|||
hidden_units: 512 |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.995 |
|||
|
|||
WormStatic: |
|||
normalize: true |
|||
num_epoch: 3 |
|||
time_horizon: 1000 |
|||
batch_size: 2024 |
|||
buffer_size: 20240 |
|||
max_steps: 3.5e6 |
|||
summary_freq: 30000 |
|||
num_layers: 3 |
|||
hidden_units: 512 |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.995 |
|||
|
|||
Walker: |
|||
normalize: true |
|||
num_epoch: 3 |
|||
time_horizon: 1000 |
|||
batch_size: 2048 |
|||
buffer_size: 20480 |
|||
max_steps: 2e7 |
|||
summary_freq: 30000 |
|||
num_layers: 3 |
|||
hidden_units: 512 |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.995 |
|||
|
|||
Reacher: |
|||
normalize: true |
|||
num_epoch: 3 |
|||
time_horizon: 1000 |
|||
batch_size: 2024 |
|||
buffer_size: 20240 |
|||
max_steps: 2e7 |
|||
summary_freq: 60000 |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.995 |
|||
|
|||
Hallway: |
|||
use_recurrent: true |
|||
sequence_length: 64 |
|||
num_layers: 2 |
|||
hidden_units: 128 |
|||
memory_size: 128 |
|||
beta: 1.0e-2 |
|||
num_epoch: 3 |
|||
buffer_size: 1024 |
|||
batch_size: 128 |
|||
max_steps: 1.0e7 |
|||
summary_freq: 10000 |
|||
time_horizon: 64 |
|||
|
|||
VisualHallway: |
|||
use_recurrent: true |
|||
sequence_length: 64 |
|||
num_layers: 1 |
|||
hidden_units: 128 |
|||
memory_size: 128 |
|||
beta: 1.0e-2 |
|||
num_epoch: 3 |
|||
buffer_size: 1024 |
|||
batch_size: 64 |
|||
max_steps: 1.0e7 |
|||
summary_freq: 10000 |
|||
time_horizon: 64 |
|||
|
|||
VisualPushBlock: |
|||
use_recurrent: true |
|||
sequence_length: 32 |
|||
num_layers: 1 |
|||
hidden_units: 128 |
|||
memory_size: 128 |
|||
beta: 1.0e-2 |
|||
num_epoch: 3 |
|||
buffer_size: 1024 |
|||
batch_size: 64 |
|||
max_steps: 3.0e6 |
|||
summary_freq: 60000 |
|||
time_horizon: 64 |
|||
|
|||
GridWorld: |
|||
batch_size: 32 |
|||
normalize: false |
|||
num_layers: 1 |
|||
hidden_units: 256 |
|||
beta: 5.0e-3 |
|||
buffer_size: 256 |
|||
max_steps: 500000 |
|||
summary_freq: 20000 |
|||
time_horizon: 5 |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.9 |
|||
|
|||
Basic: |
|||
batch_size: 32 |
|||
normalize: false |
|||
num_layers: 1 |
|||
hidden_units: 20 |
|||
beta: 5.0e-3 |
|||
buffer_size: 256 |
|||
max_steps: 5.0e5 |
|||
summary_freq: 2000 |
|||
time_horizon: 3 |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.9 |
部分文件因为文件数量过多而无法显示
撰写
预览
正在加载...
取消
保存
Reference in new issue