Ruo-Ping Dong
4 年前
当前提交
224d2087
共有 4 个文件被更改,包括 105 次插入 和 10 次删除
-
37Project/Assets/ML-Agents/Examples/PushBlock/Scripts/PushBlockTeamManager.cs
-
45Project/Assets/ML-Agents/Examples/PushBlock/Scripts/ZombiePushBlockDeathEnvController.cs
-
5com.unity.ml-agents/Runtime/Agent.cs
-
28config/ppo/PushBlockZombieTeamReward.yaml
|
|||
using System.Collections.Generic; |
|||
using Unity.MLAgents; |
|||
using System.Linq; |
|||
List<Agent> m_AgentList = new List<Agent> { }; |
|||
Dictionary<Agent, bool> m_AgentDone = new Dictionary<Agent, bool> { }; |
|||
m_AgentList.Add(agent); |
|||
m_AgentDone[agent] = false; |
|||
} |
|||
|
|||
public override void OnAgentDone(Agent agent, Agent.DoneReason doneReason, List<ISensor> sensors) |
|||
{ |
|||
m_AgentDone[agent] = true; |
|||
} |
|||
|
|||
public void OnTeamDone() |
|||
{ |
|||
foreach (var agent in m_AgentDone.Keys.ToList()) |
|||
{ |
|||
if (m_AgentDone[agent]) |
|||
{ |
|||
agent.SendDoneToTrainer(); |
|||
m_AgentDone[agent] = false; |
|||
} |
|||
} |
|||
} |
|||
|
|||
public void AddTeamReward(float reward) |
|||
{ |
|||
foreach (var agent in m_AgentDone.Keys) |
|||
{ |
|||
if (m_AgentDone[agent]) |
|||
{ |
|||
agent.AddRewardAfterDeath(reward); |
|||
} |
|||
else |
|||
{ |
|||
agent.AddReward(reward); |
|||
} |
|||
} |
|||
} |
|||
} |
|
|||
behaviors: |
|||
PushBlock: |
|||
trainer_type: ppo |
|||
hyperparameters: |
|||
batch_size: 128 |
|||
buffer_size: 2048 |
|||
learning_rate: 0.0003 |
|||
beta: 0.01 |
|||
epsilon: 0.2 |
|||
lambd: 0.95 |
|||
num_epoch: 3 |
|||
learning_rate_schedule: linear |
|||
network_settings: |
|||
normalize: false |
|||
hidden_units: 256 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
gamma: 0.99 |
|||
strength: 1.0 |
|||
keep_checkpoints: 5 |
|||
max_steps: 15000000 #2000000 |
|||
time_horizon: 64 |
|||
summary_freq: 60000 |
|||
threaded: true |
|||
env_settings: |
|||
num_envs: 3 |
撰写
预览
正在加载...
取消
保存
Reference in new issue