Unity 机器学习代理工具包 (ML-Agents) 是一个开源项目,它使游戏和模拟能够作为训练智能代理的环境。
您最多选择25个主题 主题必须以中文或者字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
 
 
 
 
 

269 行
5.2 KiB

default:
trainer: sac
batch_size: 128
buffer_size: 50000
buffer_init_steps: 0
hidden_units: 128
init_entcoef: 1.0
learning_rate: 3.0e-4
learning_rate_schedule: constant
max_steps: 5.0e4
memory_size: 256
normalize: false
num_update: 1
train_interval: 1
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 1000
tau: 0.005
use_recurrent: false
vis_encode_type: simple
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.99
BananaLearning:
normalize: false
batch_size: 256
buffer_size: 500000
max_steps: 1.0e5
init_entcoef: 0.05
train_interval: 1
BouncerLearning:
normalize: true
beta: 0.0
max_steps: 5.0e5
num_layers: 2
hidden_units: 64
summary_freq: 1000
PushBlockLearning:
max_steps: 5.0e4
init_entcoef: 0.05
beta: 1.0e-2
hidden_units: 256
summary_freq: 2000
time_horizon: 64
num_layers: 2
SmallWallJumpLearning:
max_steps: 1.0e6
hidden_units: 256
summary_freq: 2000
time_horizon: 128
init_entcoef: 0.1
num_layers: 2
normalize: false
BigWallJumpLearning:
max_steps: 1.0e6
hidden_units: 256
summary_freq: 2000
time_horizon: 128
num_layers: 2
init_entcoef: 0.1
normalize: false
StrikerLearning:
max_steps: 5.0e5
learning_rate: 1e-3
beta: 1.0e-2
hidden_units: 256
summary_freq: 2000
time_horizon: 128
init_entcoef: 0.1
num_layers: 2
normalize: false
GoalieLearning:
max_steps: 5.0e5
learning_rate: 1e-3
beta: 1.0e-2
hidden_units: 256
summary_freq: 2000
time_horizon: 128
init_entcoef: 0.1
num_layers: 2
normalize: false
PyramidsLearning:
summary_freq: 2000
time_horizon: 128
batch_size: 128
buffer_init_steps: 10000
buffer_size: 500000
hidden_units: 256
num_layers: 2
init_entcoef: 0.01
max_steps: 5.0e5
sequence_length: 16
tau: 0.01
use_recurrent: false
reward_signals:
extrinsic:
strength: 2.0
gamma: 0.99
gail:
strength: 0.02
gamma: 0.99
encoding_size: 128
use_actions: true
demo_path: demos/ExpertPyramid.demo
VisualPyramidsLearning:
time_horizon: 128
batch_size: 64
hidden_units: 256
buffer_init_steps: 1000
num_layers: 1
beta: 1.0e-2
max_steps: 5.0e5
buffer_size: 500000
init_entcoef: 0.01
tau: 0.01
reward_signals:
extrinsic:
strength: 2.0
gamma: 0.99
gail:
strength: 0.02
gamma: 0.99
encoding_size: 128
use_actions: true
demo_path: demos/ExpertPyramid.demo
3DBallLearning:
normalize: true
batch_size: 64
buffer_size: 12000
summary_freq: 1000
time_horizon: 1000
hidden_units: 64
init_entcoef: 0.5
3DBallHardLearning:
normalize: true
batch_size: 256
summary_freq: 1000
time_horizon: 1000
TennisLearning:
buffer_size: 500000
normalize: true
max_steps: 2e5
CrawlerStaticLearning:
normalize: true
time_horizon: 1000
batch_size: 256
train_interval: 3
buffer_size: 500000
buffer_init_steps: 2000
max_steps: 5e5
summary_freq: 3000
init_entcoef: 1.0
num_layers: 3
hidden_units: 512
CrawlerDynamicLearning:
normalize: true
time_horizon: 1000
batch_size: 256
buffer_size: 500000
summary_freq: 3000
train_interval: 3
num_layers: 3
max_steps: 1e6
hidden_units: 512
WalkerLearning:
normalize: true
time_horizon: 1000
batch_size: 256
buffer_size: 500000
max_steps: 2e6
summary_freq: 3000
num_layers: 3
train_interval: 3
hidden_units: 512
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.995
ReacherLearning:
normalize: true
time_horizon: 1000
batch_size: 128
buffer_size: 500000
max_steps: 2e5
summary_freq: 3000
HallwayLearning:
use_recurrent: true
sequence_length: 32
num_layers: 2
hidden_units: 128
memory_size: 256
beta: 0.0
init_entcoef: 0.1
max_steps: 5.0e5
summary_freq: 1000
time_horizon: 64
use_recurrent: true
VisualHallwayLearning:
use_recurrent: true
sequence_length: 32
num_layers: 1
hidden_units: 128
memory_size: 256
beta: 1.0e-2
gamma: 0.99
batch_size: 64
max_steps: 5.0e5
summary_freq: 1000
time_horizon: 64
use_recurrent: true
VisualPushBlockLearning:
use_recurrent: true
sequence_length: 32
num_layers: 1
hidden_units: 128
memory_size: 256
beta: 1.0e-2
gamma: 0.99
buffer_size: 1024
batch_size: 64
max_steps: 5.0e5
summary_freq: 1000
time_horizon: 64
GridWorldLearning:
batch_size: 128
normalize: false
num_layers: 1
hidden_units: 128
init_entcoef: 0.01
buffer_size: 50000
max_steps: 5.0e5
summary_freq: 2000
time_horizon: 5
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.9
BasicLearning:
batch_size: 64
normalize: false
num_layers: 2
init_entcoef: 0.01
hidden_units: 20
max_steps: 5.0e5
summary_freq: 2000
time_horizon: 10