Unity 机器学习代理工具包 (ML-Agents) 是一个开源项目,它使游戏和模拟能够作为训练智能代理的环境。
您最多选择25个主题 主题必须以中文或者字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
 
 
 
 
 

287 行
5.4 KiB

default:
trainer: ppo
batch_size: 1024
beta: 5.0e-3
buffer_size: 10240
epsilon: 0.2
hidden_units: 128
lambd: 0.95
learning_rate: 3.0e-4
learning_rate_schedule: linear
max_steps: 5.0e5
memory_size: 128
normalize: false
num_epoch: 3
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 10000
use_recurrent: false
vis_encode_type: simple
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.99
FoodCollector:
normalize: false
beta: 5.0e-3
batch_size: 1024
buffer_size: 10240
max_steps: 2.0e6
Bouncer:
normalize: true
max_steps: 4.0e6
num_layers: 2
hidden_units: 64
PushBlock:
max_steps: 2.0e6
batch_size: 128
buffer_size: 2048
beta: 1.0e-2
hidden_units: 256
summary_freq: 60000
time_horizon: 64
num_layers: 2
SmallWallJump:
max_steps: 5e6
batch_size: 128
buffer_size: 2048
beta: 5.0e-3
hidden_units: 256
summary_freq: 20000
time_horizon: 128
num_layers: 2
normalize: false
BigWallJump:
max_steps: 2e7
batch_size: 128
buffer_size: 2048
beta: 5.0e-3
hidden_units: 256
summary_freq: 20000
time_horizon: 128
num_layers: 2
normalize: false
Pyramids:
summary_freq: 30000
time_horizon: 128
batch_size: 128
buffer_size: 2048
hidden_units: 512
num_layers: 2
beta: 1.0e-2
max_steps: 1.0e7
num_epoch: 3
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.99
curiosity:
strength: 0.02
gamma: 0.99
encoding_size: 256
VisualPyramids:
time_horizon: 128
batch_size: 64
buffer_size: 2024
hidden_units: 256
num_layers: 1
beta: 1.0e-2
max_steps: 1.0e7
num_epoch: 3
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.99
curiosity:
strength: 0.01
gamma: 0.99
encoding_size: 256
3DBall:
normalize: true
batch_size: 64
buffer_size: 12000
summary_freq: 12000
time_horizon: 1000
lambd: 0.99
beta: 0.001
3DBallHard:
normalize: true
batch_size: 1200
buffer_size: 12000
summary_freq: 12000
time_horizon: 1000
max_steps: 5.0e5
beta: 0.001
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.995
Tennis:
normalize: true
max_steps: 5.0e7
learning_rate_schedule: constant
batch_size: 1024
buffer_size: 10240
hidden_units: 256
time_horizon: 1000
self_play:
window: 10
play_against_current_self_ratio: 0.5
save_steps: 50000
swap_steps: 50000
Soccer:
normalize: false
max_steps: 5.0e7
learning_rate_schedule: constant
batch_size: 2048
buffer_size: 20480
hidden_units: 512
time_horizon: 1000
num_layers: 2
self_play:
window: 10
play_against_current_self_ratio: 0.5
save_steps: 50000
swap_steps: 50000
CrawlerStatic:
normalize: true
num_epoch: 3
time_horizon: 1000
batch_size: 2024
buffer_size: 20240
max_steps: 1e7
summary_freq: 30000
num_layers: 3
hidden_units: 512
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.995
CrawlerDynamic:
normalize: true
num_epoch: 3
time_horizon: 1000
batch_size: 2024
buffer_size: 20240
max_steps: 1e7
summary_freq: 30000
num_layers: 3
hidden_units: 512
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.995
Walker:
normalize: true
num_epoch: 3
time_horizon: 1000
batch_size: 2048
buffer_size: 20480
max_steps: 2e7
summary_freq: 30000
num_layers: 3
hidden_units: 512
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.995
Reacher:
normalize: true
num_epoch: 3
time_horizon: 1000
batch_size: 2024
buffer_size: 20240
max_steps: 2e7
summary_freq: 60000
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.995
Hallway:
use_recurrent: true
sequence_length: 64
num_layers: 2
hidden_units: 128
memory_size: 128
beta: 1.0e-2
num_epoch: 3
buffer_size: 1024
batch_size: 128
max_steps: 1.0e7
summary_freq: 10000
time_horizon: 64
VisualHallway:
use_recurrent: true
sequence_length: 64
num_layers: 1
hidden_units: 128
memory_size: 128
beta: 1.0e-2
num_epoch: 3
buffer_size: 1024
batch_size: 64
max_steps: 1.0e7
summary_freq: 10000
time_horizon: 64
VisualPushBlock:
use_recurrent: true
sequence_length: 32
num_layers: 1
hidden_units: 128
memory_size: 128
beta: 1.0e-2
num_epoch: 3
buffer_size: 1024
batch_size: 64
max_steps: 3.0e6
summary_freq: 60000
time_horizon: 64
GridWorld:
batch_size: 32
normalize: false
num_layers: 1
hidden_units: 256
beta: 5.0e-3
buffer_size: 256
max_steps: 500000
summary_freq: 20000
time_horizon: 5
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.9
Basic:
batch_size: 32
normalize: false
num_layers: 1
hidden_units: 20
beta: 5.0e-3
buffer_size: 256
max_steps: 5.0e5
summary_freq: 2000
time_horizon: 3
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.9