浏览代码

Worm SAC configs (#3912)

/whitepaper-experiments
GitHub 5 年前
当前提交
2f80dd02
共有 3 个文件被更改,包括 50 次插入272 次删除
  1. 25
      config/sac/WormDynamic.yaml
  2. 25
      config/sac/WormStatic.yaml
  3. 272
      config/sac_trainer_config.yaml

25
config/sac/WormDynamic.yaml


behaviors:
WormDynamic:
trainer: sac
batch_size: 256
buffer_size: 500000
buffer_init_steps: 0
hidden_units: 512
init_entcoef: 1.0
learning_rate: 0.0003
learning_rate_schedule: constant
max_steps: 5e6
memory_size: 128
normalize: true
steps_per_update: 20
num_layers: 3
time_horizon: 1000
sequence_length: 64
summary_freq: 30000
tau: 0.005
use_recurrent: false
vis_encode_type: simple
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.995

25
config/sac/WormStatic.yaml


behaviors:
WormStatic:
trainer: sac
batch_size: 256
buffer_size: 500000
buffer_init_steps: 2000
hidden_units: 512
init_entcoef: 1.0
learning_rate: 0.0003
learning_rate_schedule: constant
max_steps: 3e6
memory_size: 128
normalize: true
steps_per_update: 20
num_layers: 3
time_horizon: 1000
sequence_length: 64
summary_freq: 30000
tau: 0.005
use_recurrent: false
vis_encode_type: simple
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.995

272
config/sac_trainer_config.yaml


default:
trainer: sac
batch_size: 128
buffer_size: 50000
buffer_init_steps: 0
hidden_units: 128
init_entcoef: 1.0
learning_rate: 3.0e-4
learning_rate_schedule: constant
max_steps: 5.0e5
memory_size: 128
normalize: false
steps_per_update: 10
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 10000
tau: 0.005
use_recurrent: false
vis_encode_type: simple
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.99
FoodCollector:
normalize: false
batch_size: 256
buffer_size: 500000
max_steps: 2.0e6
init_entcoef: 0.05
Bouncer:
normalize: true
max_steps: 1.0e6
num_layers: 2
hidden_units: 64
summary_freq: 20000
PushBlock:
max_steps: 2e6
init_entcoef: 0.05
hidden_units: 256
summary_freq: 100000
time_horizon: 64
num_layers: 2
SmallWallJump:
max_steps: 5e6
hidden_units: 256
summary_freq: 20000
time_horizon: 128
init_entcoef: 0.1
num_layers: 2
normalize: false
BigWallJump:
max_steps: 2e7
hidden_units: 256
summary_freq: 20000
time_horizon: 128
num_layers: 2
init_entcoef: 0.1
normalize: false
Striker:
max_steps: 5.0e6
learning_rate: 1e-3
hidden_units: 256
summary_freq: 20000
time_horizon: 128
init_entcoef: 0.1
num_layers: 2
normalize: false
Goalie:
max_steps: 5.0e6
learning_rate: 1e-3
hidden_units: 256
summary_freq: 20000
time_horizon: 128
init_entcoef: 0.1
num_layers: 2
normalize: false
Pyramids:
summary_freq: 30000
time_horizon: 128
batch_size: 128
buffer_init_steps: 10000
buffer_size: 500000
hidden_units: 256
num_layers: 2
init_entcoef: 0.01
max_steps: 1.0e7
sequence_length: 16
tau: 0.01
use_recurrent: false
reward_signals:
extrinsic:
strength: 2.0
gamma: 0.99
gail:
strength: 0.02
gamma: 0.99
encoding_size: 128
use_actions: true
demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
VisualPyramids:
time_horizon: 128
batch_size: 64
hidden_units: 256
buffer_init_steps: 1000
num_layers: 1
max_steps: 1.0e7
buffer_size: 500000
init_entcoef: 0.01
tau: 0.01
reward_signals:
extrinsic:
strength: 2.0
gamma: 0.99
gail:
strength: 0.02
gamma: 0.99
encoding_size: 128
use_actions: true
demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
3DBall:
normalize: true
batch_size: 64
buffer_size: 12000
summary_freq: 12000
time_horizon: 1000
hidden_units: 64
init_entcoef: 0.5
3DBallHard:
normalize: true
batch_size: 256
summary_freq: 12000
time_horizon: 1000
Tennis:
normalize: true
max_steps: 2e7
hidden_units: 256
self_play:
window: 10
play_against_current_self_ratio: 0.5
save_steps: 50000
swap_steps: 50000
CrawlerStatic:
normalize: true
time_horizon: 1000
batch_size: 256
steps_per_update: 20
buffer_size: 500000
buffer_init_steps: 2000
max_steps: 3e6
summary_freq: 30000
init_entcoef: 1.0
num_layers: 3
hidden_units: 512
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.995
CrawlerDynamic:
normalize: true
time_horizon: 1000
batch_size: 256
buffer_size: 500000
summary_freq: 30000
steps_per_update: 20
num_layers: 3
max_steps: 5e6
hidden_units: 512
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.995
Walker:
normalize: true
time_horizon: 1000
batch_size: 256
buffer_size: 500000
max_steps: 2e7
summary_freq: 30000
num_layers: 4
steps_per_update: 30
hidden_units: 512
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.995
Reacher:
normalize: true
time_horizon: 1000
batch_size: 128
buffer_size: 500000
max_steps: 2e7
steps_per_update: 20
summary_freq: 60000
Hallway:
sequence_length: 32
num_layers: 2
hidden_units: 128
memory_size: 128
init_entcoef: 0.1
max_steps: 5.0e6
summary_freq: 10000
time_horizon: 64
use_recurrent: true
VisualHallway:
sequence_length: 32
num_layers: 1
hidden_units: 128
memory_size: 128
gamma: 0.99
batch_size: 64
max_steps: 1.0e7
summary_freq: 10000
time_horizon: 64
use_recurrent: true
VisualPushBlock:
use_recurrent: true
sequence_length: 32
num_layers: 1
hidden_units: 128
memory_size: 128
gamma: 0.99
buffer_size: 1024
batch_size: 64
max_steps: 3.0e6
summary_freq: 60000
time_horizon: 64
GridWorld:
batch_size: 128
normalize: false
num_layers: 1
hidden_units: 128
init_entcoef: 0.5
buffer_init_steps: 1000
buffer_size: 50000
max_steps: 500000
summary_freq: 20000
time_horizon: 5
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.9
Basic:
batch_size: 64
normalize: false
num_layers: 2
init_entcoef: 0.01
hidden_units: 20
max_steps: 5.0e5
summary_freq: 2000
time_horizon: 10
正在加载...
取消
保存