HH
5 年前
当前提交
9570a5fe
共有 1 个文件被更改,包括 0 次插入 和 357 次删除
|
|||
default: |
|||
trainer: ppo |
|||
batch_size: 1024 |
|||
beta: 5.0e-3 |
|||
buffer_size: 10240 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 3.0e-4 |
|||
learning_rate_schedule: linear |
|||
max_steps: 5.0e5 |
|||
memory_size: 128 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
|
|||
FoodCollector: |
|||
normalize: false |
|||
beta: 5.0e-3 |
|||
batch_size: 1024 |
|||
buffer_size: 10240 |
|||
max_steps: 2.0e6 |
|||
|
|||
Bouncer: |
|||
normalize: true |
|||
max_steps: 4.0e6 |
|||
num_layers: 2 |
|||
hidden_units: 64 |
|||
|
|||
PushBlock: |
|||
max_steps: 2.0e6 |
|||
batch_size: 128 |
|||
buffer_size: 2048 |
|||
beta: 1.0e-2 |
|||
hidden_units: 256 |
|||
summary_freq: 60000 |
|||
time_horizon: 64 |
|||
num_layers: 2 |
|||
|
|||
SmallWallJump: |
|||
max_steps: 5e6 |
|||
batch_size: 128 |
|||
buffer_size: 2048 |
|||
beta: 5.0e-3 |
|||
hidden_units: 256 |
|||
summary_freq: 20000 |
|||
time_horizon: 128 |
|||
num_layers: 2 |
|||
normalize: false |
|||
|
|||
BigWallJump: |
|||
max_steps: 2e7 |
|||
batch_size: 128 |
|||
buffer_size: 2048 |
|||
beta: 5.0e-3 |
|||
hidden_units: 256 |
|||
summary_freq: 20000 |
|||
time_horizon: 128 |
|||
num_layers: 2 |
|||
normalize: false |
|||
|
|||
Pyramids: |
|||
summary_freq: 30000 |
|||
time_horizon: 128 |
|||
batch_size: 128 |
|||
buffer_size: 2048 |
|||
hidden_units: 512 |
|||
num_layers: 2 |
|||
beta: 1.0e-2 |
|||
max_steps: 1.0e7 |
|||
num_epoch: 3 |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
curiosity: |
|||
strength: 0.02 |
|||
gamma: 0.99 |
|||
encoding_size: 256 |
|||
|
|||
VisualPyramids: |
|||
time_horizon: 128 |
|||
batch_size: 64 |
|||
buffer_size: 2024 |
|||
hidden_units: 256 |
|||
num_layers: 1 |
|||
beta: 1.0e-2 |
|||
max_steps: 1.0e7 |
|||
num_epoch: 3 |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
curiosity: |
|||
strength: 0.01 |
|||
gamma: 0.99 |
|||
encoding_size: 256 |
|||
|
|||
3DBall: |
|||
normalize: true |
|||
batch_size: 64 |
|||
buffer_size: 12000 |
|||
summary_freq: 12000 |
|||
time_horizon: 1000 |
|||
lambd: 0.99 |
|||
beta: 0.001 |
|||
|
|||
3DBallHard: |
|||
normalize: true |
|||
batch_size: 1200 |
|||
buffer_size: 12000 |
|||
summary_freq: 12000 |
|||
time_horizon: 1000 |
|||
max_steps: 5.0e6 |
|||
beta: 0.001 |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.995 |
|||
|
|||
Tennis: |
|||
normalize: true |
|||
max_steps: 5.0e7 |
|||
learning_rate_schedule: constant |
|||
batch_size: 1024 |
|||
buffer_size: 10240 |
|||
hidden_units: 256 |
|||
time_horizon: 1000 |
|||
self_play: |
|||
window: 10 |
|||
play_against_latest_model_ratio: 0.5 |
|||
save_steps: 50000 |
|||
swap_steps: 50000 |
|||
team_change: 100000 |
|||
|
|||
Soccer: |
|||
normalize: false |
|||
max_steps: 5.0e7 |
|||
learning_rate_schedule: constant |
|||
batch_size: 2048 |
|||
buffer_size: 20480 |
|||
hidden_units: 512 |
|||
time_horizon: 1000 |
|||
num_layers: 2 |
|||
self_play: |
|||
window: 10 |
|||
play_against_latest_model_ratio: 0.5 |
|||
save_steps: 50000 |
|||
swap_steps: 50000 |
|||
team_change: 100000 |
|||
|
|||
CrawlerStatic: |
|||
normalize: true |
|||
num_epoch: 3 |
|||
time_horizon: 1000 |
|||
batch_size: 2024 |
|||
buffer_size: 20240 |
|||
max_steps: 1e7 |
|||
summary_freq: 30000 |
|||
num_layers: 3 |
|||
hidden_units: 512 |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.995 |
|||
|
|||
CrawlerDynamic: |
|||
normalize: true |
|||
num_epoch: 3 |
|||
time_horizon: 1000 |
|||
batch_size: 2024 |
|||
buffer_size: 20240 |
|||
max_steps: 3.5e6 |
|||
# max_steps: 1e7 |
|||
summary_freq: 30000 |
|||
num_layers: 3 |
|||
hidden_units: 512 |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.995 |
|||
|
|||
WormDynamic: |
|||
normalize: true |
|||
num_epoch: 3 |
|||
time_horizon: 1000 |
|||
batch_size: 2024 |
|||
buffer_size: 20240 |
|||
max_steps: 3.5e6 |
|||
summary_freq: 30000 |
|||
num_layers: 3 |
|||
hidden_units: 512 |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.995 |
|||
|
|||
WormStatic: |
|||
normalize: true |
|||
num_epoch: 3 |
|||
time_horizon: 1000 |
|||
batch_size: 2024 |
|||
buffer_size: 20240 |
|||
max_steps: 3.5e6 |
|||
summary_freq: 30000 |
|||
num_layers: 3 |
|||
hidden_units: 512 |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.995 |
|||
|
|||
WalkerDynamic: |
|||
normalize: true |
|||
# learning_rate_schedule: constant |
|||
num_epoch: 10 |
|||
time_horizon: 1000 |
|||
batch_size: 2048 |
|||
buffer_size: 20480 |
|||
# batch_size: 24576 #2048 |
|||
# buffer_size: 245760 #20480 |
|||
max_steps: 2e7 |
|||
beta: 9.0e-3 |
|||
epsilon: 0.3 |
|||
# max_steps: 1e8 |
|||
summary_freq: 30000 |
|||
# learning_rate: 1.0e-3 |
|||
num_layers: 3 |
|||
hidden_units: 512 |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.995 |
|||
|
|||
# WalkerDynamic: |
|||
# normalize: true |
|||
# num_epoch: 3 |
|||
# time_horizon: 1000 |
|||
# batch_size: 2048 |
|||
# buffer_size: 20480 |
|||
# max_steps: 2e7 |
|||
# summary_freq: 30000 |
|||
# num_layers: 3 |
|||
# hidden_units: 512 |
|||
# reward_signals: |
|||
# extrinsic: |
|||
# strength: 1.0 |
|||
# gamma: 0.995 |
|||
|
|||
WalkerStatic: |
|||
normalize: true |
|||
num_epoch: 3 |
|||
time_horizon: 1000 |
|||
batch_size: 2048 |
|||
buffer_size: 20480 |
|||
max_steps: 2e7 |
|||
summary_freq: 30000 |
|||
num_layers: 3 |
|||
hidden_units: 512 |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.995 |
|||
|
|||
Reacher: |
|||
normalize: true |
|||
num_epoch: 3 |
|||
time_horizon: 1000 |
|||
batch_size: 2024 |
|||
buffer_size: 20240 |
|||
max_steps: 2e7 |
|||
summary_freq: 60000 |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.995 |
|||
|
|||
Hallway: |
|||
use_recurrent: true |
|||
sequence_length: 64 |
|||
num_layers: 2 |
|||
hidden_units: 128 |
|||
memory_size: 128 |
|||
beta: 1.0e-2 |
|||
num_epoch: 3 |
|||
buffer_size: 1024 |
|||
batch_size: 128 |
|||
max_steps: 1.0e7 |
|||
summary_freq: 10000 |
|||
time_horizon: 64 |
|||
|
|||
VisualHallway: |
|||
use_recurrent: true |
|||
sequence_length: 64 |
|||
num_layers: 1 |
|||
hidden_units: 128 |
|||
memory_size: 128 |
|||
beta: 1.0e-2 |
|||
num_epoch: 3 |
|||
buffer_size: 1024 |
|||
batch_size: 64 |
|||
max_steps: 1.0e7 |
|||
summary_freq: 10000 |
|||
time_horizon: 64 |
|||
|
|||
VisualPushBlock: |
|||
use_recurrent: true |
|||
sequence_length: 32 |
|||
num_layers: 1 |
|||
hidden_units: 128 |
|||
memory_size: 128 |
|||
beta: 1.0e-2 |
|||
num_epoch: 3 |
|||
buffer_size: 1024 |
|||
batch_size: 64 |
|||
max_steps: 3.0e6 |
|||
summary_freq: 60000 |
|||
time_horizon: 64 |
|||
|
|||
GridWorld: |
|||
batch_size: 32 |
|||
normalize: false |
|||
num_layers: 1 |
|||
hidden_units: 256 |
|||
beta: 5.0e-3 |
|||
buffer_size: 256 |
|||
max_steps: 500000 |
|||
summary_freq: 20000 |
|||
time_horizon: 5 |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.9 |
|||
|
|||
Basic: |
|||
batch_size: 32 |
|||
normalize: false |
|||
num_layers: 1 |
|||
hidden_units: 20 |
|||
beta: 5.0e-3 |
|||
buffer_size: 256 |
|||
max_steps: 5.0e5 |
|||
summary_freq: 2000 |
|||
time_horizon: 3 |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.9 |
撰写
预览
正在加载...
取消
保存
Reference in new issue