Unity 机器学习代理工具包 (ML-Agents) 是一个开源项目,它使游戏和模拟能够作为训练智能代理的环境。
您最多选择25个主题 主题必须以中文或者字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
 
 
 
 
 

83 行
2.0 KiB

# TouchCube:
# trainer: sac
# hyperparameters:
# batch_size: 128
# buffer_size: 50000
# buffer_init_steps: 0
# hidden_units: 128
# init_entcoef: 1.0
# learning_rate: 3.0e-4
# learning_rate_schedule: constant
# max_steps: 5.0e5
# memory_size: 128
# normalize: false
# num_update: 1
# train_interval: 1
# steps_per_update: 1
# num_layers: 2
# time_horizon: 64
# sequence_length: 64
# summary_freq: 5000
# tau: 0.005
# use_recurrent: false
# vis_encode_type: simple
# reward_signals:
# extrinsic:
# strength: 1.0
# gamma: 0.95
behaviors:
TouchCube:
trainer_type: ppo
hyperparameters:
batch_size: 512 #2048
buffer_size: 5120 #20480
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: true
hidden_units: 512
num_layers: 3
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.995
strength: 1.0
output_path: default
keep_checkpoints: 5
max_steps: 20000000
time_horizon: 500 #1000
summary_freq: 30000
threaded: true
# behaviors:
# TouchCube:
# trainer_type: sac
# hyperparameters:
# learning_rate: 0.0003
# learning_rate_schedule: constant
# batch_size: 256
# buffer_size: 500000
# buffer_init_steps: 0
# tau: 0.005
# steps_per_update: 30.0
# save_replay_buffer: false
# init_entcoef: 1.0
# reward_signal_steps_per_update: 30.0
# network_settings:
# normalize: true
# hidden_units: 512
# num_layers: 4
# vis_encode_type: simple
# reward_signals:
# extrinsic:
# gamma: 0.995
# strength: 1.0
# output_path: default
# keep_checkpoints: 5
# max_steps: 20000000
# time_horizon: 1000
# summary_freq: 30000
# threaded: true