浏览代码

add team reward

/develop/teammanager
Ruo-Ping Dong 4 年前
当前提交
224d2087
共有 4 个文件被更改,包括 105 次插入10 次删除
  1. 37
      Project/Assets/ML-Agents/Examples/PushBlock/Scripts/PushBlockTeamManager.cs
  2. 45
      Project/Assets/ML-Agents/Examples/PushBlock/Scripts/ZombiePushBlockDeathEnvController.cs
  3. 5
      com.unity.ml-agents/Runtime/Agent.cs
  4. 28
      config/ppo/PushBlockZombieTeamReward.yaml

37
Project/Assets/ML-Agents/Examples/PushBlock/Scripts/PushBlockTeamManager.cs


using System.Collections.Generic;
using Unity.MLAgents;
using System.Linq;
List<Agent> m_AgentList = new List<Agent> { };
Dictionary<Agent, bool> m_AgentDone = new Dictionary<Agent, bool> { };
m_AgentList.Add(agent);
m_AgentDone[agent] = false;
}
public override void OnAgentDone(Agent agent, Agent.DoneReason doneReason, List<ISensor> sensors)
{
m_AgentDone[agent] = true;
}
public void OnTeamDone()
{
foreach (var agent in m_AgentDone.Keys.ToList())
{
if (m_AgentDone[agent])
{
agent.SendDoneToTrainer();
m_AgentDone[agent] = false;
}
}
}
public void AddTeamReward(float reward)
{
foreach (var agent in m_AgentDone.Keys)
{
if (m_AgentDone[agent])
{
agent.AddRewardAfterDeath(reward);
}
else
{
agent.AddReward(reward);
}
}
}
}

45
Project/Assets/ML-Agents/Examples/PushBlock/Scripts/ZombiePushBlockDeathEnvController.cs


public bool UseRandomBlockRotation = true;
public bool UseRandomBlockPosition = true;
public bool UseTeamManager = true;
// public bool UseTeamReward = true;
public bool UseTeamReward = true;
// public bool DiscountTeamReward = true;
PushBlockSettings m_PushBlockSettings;

// Initialize TeamManager
if (UseTeamManager)
{
m_TeamManager = new BaseTeamManager();
if (UseTeamReward)
{
m_TeamManager = new PushBlockTeamManager();
}
else
{
m_TeamManager = new BaseTeamManager();
}
}
foreach (var item in AgentsList)

public void ScoredAGoal(Collider col, float score)
{
// Give Agent Rewards
foreach (var item in AgentsList)
if (UseTeamManager && UseTeamReward)
{
var pushManager = (PushBlockTeamManager)m_TeamManager;
pushManager.AddTeamReward(score);
}
else
if (item.Agent.gameObject.activeInHierarchy)
foreach (var item in AgentsList)
item.Agent.AddReward(score);
if (item.Agent.gameObject.activeInHierarchy)
{
item.Agent.AddReward(score);
}
}
}

public void ZombieTouchedBlock()
{
//Give Agent Rewards
foreach (var item in AgentsList)
if (UseTeamManager && UseTeamReward)
if (item.Agent.gameObject.activeInHierarchy)
var pushManager = (PushBlockTeamManager)m_TeamManager;
pushManager.AddTeamReward(-1);
}
else
{
foreach (var item in AgentsList)
item.Agent.AddReward(-1);
if (item.Agent.gameObject.activeInHierarchy)
{
item.Agent.AddReward(-1);
}
}
}

return;
}
item.Agent.gameObject.SetActive(false);
}
if (UseTeamManager && UseTeamReward)
{
var pushManager = (PushBlockTeamManager)m_TeamManager;
pushManager.OnTeamDone();
}
// Reset Agents

5
com.unity.ml-agents/Runtime/Agent.cs


m_CumulativeReward += increment;
}
public void AddRewardAfterDeath(float increment)
{
m_Info.reward += increment;
}
/// <summary>
/// Retrieves the episode reward for the Agent.
/// </summary>

28
config/ppo/PushBlockZombieTeamReward.yaml


behaviors:
PushBlock:
trainer_type: ppo
hyperparameters:
batch_size: 128
buffer_size: 2048
learning_rate: 0.0003
beta: 0.01
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
network_settings:
normalize: false
hidden_units: 256
num_layers: 2
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 15000000 #2000000
time_horizon: 64
summary_freq: 60000
threaded: true
env_settings:
num_envs: 3
正在加载...
取消
保存