浏览代码

comms agent and fixed hallway

/comms-grad
Andrew Cohen 4 年前
当前提交
e81e68de
共有 11 个文件被更改,包括 861 次插入155 次删除
  1. 764
      Project/Assets/ML-Agents/Examples/Hallway/Prefabs/SymbolFinderArea.prefab
  2. 2
      Project/Assets/ML-Agents/Examples/Hallway/Scenes/Hallway.unity.meta
  3. 12
      Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs
  4. 37
      Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayCollabAgent.cs
  5. 22
      Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs
  6. 24
      ml-agents/mlagents/trainers/policy/torch_policy.py
  7. 13
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  8. 12
      ml-agents/mlagents/trainers/torch/action_model.py
  9. 17
      ml-agents/mlagents/trainers/torch/distributions.py
  10. 105
      ml-agents/mlagents/trainers/torch/networks.py
  11. 8
      ml-agents/mlagents/trainers/trajectory.py

764
Project/Assets/ML-Agents/Examples/Hallway/Prefabs/SymbolFinderArea.prefab
文件差异内容过多而无法显示
查看文件

2
Project/Assets/ML-Agents/Examples/Hallway/Scenes/Hallway.unity.meta


fileFormatVersion: 2
guid: d6d6a33ed0e18459a8d61817d600978a
guid: 71d1487c63f604ff6985cc2f23c7afbf
DefaultImporter:
externalObjects: {}
userData:

12
Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs


public GameObject symbolO;
public GameObject symbolX;
public bool useVectorObs;
Rigidbody m_AgentRb;
Material m_GroundMaterial;
Renderer m_GroundRenderer;
HallwaySettings m_HallwaySettings;
int m_Selection;
protected Rigidbody m_AgentRb;
protected Material m_GroundMaterial;
protected Renderer m_GroundRenderer;
protected HallwaySettings m_HallwaySettings;
protected int m_Selection;
public override void Initialize()
{

}
}
IEnumerator GoalScoredSwapGroundMaterial(Material mat, float time)
protected IEnumerator GoalScoredSwapGroundMaterial(Material mat, float time)
{
m_GroundRenderer.material = mat;
yield return new WaitForSeconds(time);

37
Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayCollabAgent.cs


public GameObject symbolS;
public HallwayCollabAgent teammate;
public bool isSpotter = true;
TextMesh m_MessageText;
public override void Initialize()
{
base.Initialize();
if (isSpotter)
{
m_MessageText = gameObject.GetComponentInChildren<TextMesh>();
}
}
var agentOffset = 10f;
if (isSpotter)
{

}
public override void CollectObservations(VectorSensor sensor)
{
if (useVectorObs)
{
sensor.AddObservation(StepCount / (float)MaxStep);
}
//if (useVectorObs)
//{
// sensor.AddObservation(StepCount / (float)MaxStep);
//}
sensor.AddObservation(toOnehot(m_Message));
}

}
int comm_act = actionBuffers.DiscreteActions[1];
if (isSpotter)
{
m_MessageText.text = "Message:" + comm_act.ToString();
}
teammate.tellAgent(comm_act);
// if (isSpotter) // Test
// {

}
}
}
public override void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
{
// Mask the necessary actions if selected by the user.
if (!isSpotter)
{
// Prevents the agent from picking an action that would make it collide with a wall
actionMask.WriteMask(1, new[] {0});
}
}
}

22
Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs


public class WallJumpAgent : Agent
{
// Depending on this value, the wall will have different height
int m_Configuration;
protected int m_Configuration;
// Brain to use when no wall is present
public NNModel noWallBrain;
// Brain to use when a jumpable wall is present

public GameObject ground;
public GameObject spawnArea;
Bounds m_SpawnAreaBounds;
protected Bounds m_SpawnAreaBounds;
Rigidbody m_ShortBlockRb;
Rigidbody m_AgentRb;
Material m_GroundMaterial;
Renderer m_GroundRenderer;
WallJumpSettings m_WallJumpSettings;
protected Rigidbody m_ShortBlockRb;
protected Rigidbody m_AgentRb;
protected Material m_GroundMaterial;
protected Renderer m_GroundRenderer;
protected WallJumpSettings m_WallJumpSettings;
public float jumpingTime;
public float jumpTime;

string m_SmallWallBehaviorName = "SmallWallJump";
string m_BigWallBehaviorName = "BigWallJump";
EnvironmentParameters m_ResetParams;
protected EnvironmentParameters m_ResetParams;
public override void Initialize()
{

/// <returns>The Enumerator to be used in a Coroutine.</returns>
/// <param name="mat">The material to be swapped.</param>
/// <param name="time">The time the material will remain.</param>
IEnumerator GoalScoredSwapGroundMaterial(Material mat, float time)
protected IEnumerator GoalScoredSwapGroundMaterial(Material mat, float time)
{
m_GroundRenderer.material = mat;
yield return new WaitForSeconds(time); //wait for 2 sec

}
// Detect when the agent hits the goal
void OnTriggerStay(Collider col)
protected virtual void OnTriggerStay(Collider col)
{
if (col.gameObject.CompareTag("goal") && DoGroundCheck(true))
{

/// If 1: Small wall and smallWallBrain.
/// Other : Tall wall and BigWallBrain.
/// </param>
void ConfigureAgent(int config)
protected virtual void ConfigureAgent(int config)
{
var localScale = wall.transform.localScale;
if (config == 0)

24
ml-agents/mlagents/trainers/policy/torch_policy.py


from mlagents.trainers.settings import TrainerSettings
from mlagents.trainers.trajectory import SplitObservations
from mlagents.trainers.torch.networks import (
SharedActorCritic,
SeparateActorCritic,
GlobalSteps,
)
from mlagents.trainers.torch.networks import SeparateActorCritic, GlobalSteps
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.torch.agent_action import AgentAction

obs, masks, memories, seq_len
)
return (actions, log_probs, entropies, memories)
def get_comms(
self,
obs: List[torch.Tensor],
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
seq_len: int = 1,
) -> Tuple[torch.Tensor]:
"""
:param vec_obs: List of vector observations.
:param vis_obs: List of visual observations.
:param masks: Loss masks for RNN, else None.
:param memories: Input memories when using RNN, else None.
:param seq_len: Sequence length when using RNN.
:return: Tuple of AgentAction, ActionLogProbs, entropies, and output memories.
"""
comms = self.actor_critic.get_comms(obs, masks, memories, seq_len)
return comms
def evaluate_actions(
self,

13
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


critic_obs = [
ModelUtils.list_to_tensor_list(_agent_obs) for _agent_obs in critic_obs_np
]
comm_obs_np = AgentBuffer.obs_list_list_to_obs_batch(batch["comm_obs"])
comm_obs = [
ModelUtils.list_to_tensor_list(_agent_obs) for _agent_obs in comm_obs_np
]
act_masks = ModelUtils.list_to_tensor(batch["action_mask"])
actions = AgentAction.from_dict(batch)

]
if len(memories) > 0:
memories = torch.stack(memories).unsqueeze(0)
comms = self.policy.get_comms(
comm_obs[0],
masks=act_masks,
memories=memories,
seq_len=self.policy.sequence_length,
)
obs[-1] = comms[1]
log_probs, entropy, values = self.policy.evaluate_actions(
obs,

12
ml-agents/mlagents/trainers/torch/action_model.py


action_out_deprecated = None
return continuous_out, discrete_out, action_out_deprecated
def get_comms(self, inputs: torch.Tensor, masks: torch.Tensor) -> torch.Tensor:
"""
The forward method of this module. Outputs the action, log probs,
and entropies given the encoding from the network body.
:params inputs: The encoding from the network body
:params masks: Action masks for discrete actions
:return: Given the input, an AgentAction of the actions generated by the policy and the corresponding
ActionLogProbs and entropies.
"""
comms = self._discrete_distribution.differentiable_forward(inputs, masks)
return comms
def forward(
self, inputs: torch.Tensor, masks: torch.Tensor
) -> Tuple[AgentAction, ActionLogProbs, torch.Tensor]:

17
ml-agents/mlagents/trainers/torch/distributions.py


) -> torch.Tensor:
# Zero out masked logits, then subtract a large value. Technique mentionend here:
# https://arxiv.org/abs/2006.14171. Our implementation is ONNX and Barracuda-friendly.
if allow_mask.shape[1] == 3:
allow_mask = allow_mask[:, :1].expand(-1, 3)
block_mask = -1.0 * allow_mask + 1.0
# We do -1 * tensor + constant instead of constant - tensor because it seems
# Barracuda might swap the inputs of a "Sub" operation

end = int(np.sum(self.act_sizes[: idx + 1]))
split_masks.append(masks[:, start:end])
return split_masks
def differentiable_forward(
self, inputs: torch.Tensor, masks: torch.Tensor
) -> List[DistInstance]:
# Todo - Support multiple branches in mask code
branch_distributions = []
masks = self._split_masks(masks)
for idx, branch in enumerate(self.branches):
logits = branch(inputs)
norm_logits = self._mask_branch(logits, masks[idx])
distribution = torch.nn.functional.gumbel_softmax(
norm_logits, tau=0.1, hard=True, dim=1
)
branch_distributions.append(distribution)
return branch_distributions
def forward(self, inputs: torch.Tensor, masks: torch.Tensor) -> List[DistInstance]:
# Todo - Support multiple branches in mask code

105
ml-agents/mlagents/trainers/torch/networks.py


memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,
critic_obs: Optional[List[List[torch.Tensor]]] = None,
diff=False,
) -> Tuple[
AgentAction, ActionLogProbs, torch.Tensor, Dict[str, torch.Tensor], torch.Tensor
]:

return action, log_probs, entropies, memories
class SharedActorCritic(SimpleActor, ActorCritic):
def __init__(
self,
observation_shapes: List[Tuple[int, ...]],
network_settings: NetworkSettings,
action_spec: ActionSpec,
stream_names: List[str],
conditional_sigma: bool = False,
tanh_squash: bool = False,
):
self.use_lstm = network_settings.memory is not None
super().__init__(
observation_shapes,
network_settings,
action_spec,
conditional_sigma,
tanh_squash,
)
self.stream_names = stream_names
self.value_heads = ValueHeads(stream_names, self.encoding_size)
def critic_pass(
self,
net_inputs: List[torch.Tensor],
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,
) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]:
encoding, memories_out = self.network_body(
net_inputs, memories=memories, sequence_length=sequence_length
)
return self.value_heads(encoding), memories_out
def get_stats_and_value(
self,
net_inputs: List[torch.Tensor],
actions: AgentAction,
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,
critic_obs: Optional[List[List[torch.Tensor]]] = None,
) -> Tuple[ActionLogProbs, torch.Tensor, Dict[str, torch.Tensor]]:
encoding, memories = self.network_body(
net_inputs, memories=memories, sequence_length=sequence_length
)
log_probs, entropies = self.action_model.evaluate(encoding, masks, actions)
value_outputs = self.value_heads(encoding)
return log_probs, entropies, value_outputs
def get_action_stats_and_value(
self,
net_inputs: List[torch.Tensor],
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,
) -> Tuple[
AgentAction, ActionLogProbs, torch.Tensor, Dict[str, torch.Tensor], torch.Tensor
]:
encoding, memories = self.network_body(
net_inputs, memories=memories, sequence_length=sequence_length
)
action, log_probs, entropies = self.action_model(encoding, masks)
value_outputs = self.value_heads(encoding)
return action, log_probs, entropies, value_outputs, memories
class SeparateActorCritic(SimpleActor, ActorCritic):
def __init__(
self,

tanh_squash,
)
self.stream_names = stream_names
self.critic = CentralizedValueNetwork(
stream_names, observation_shapes, network_settings, num_agents=2
)
self.critic = ValueNetwork(stream_names, observation_shapes, network_settings)
# self.critic = CentralizedValueNetwork(
# stream_names, observation_shapes, network_settings, num_agents=2
# )
@property
def memory_size(self) -> int:

if self.use_lstm:
# Use only the back half of memories for critic
actor_mem, critic_mem = torch.split(memories, self.memory_size // 2, -1)
all_net_inputs = [net_inputs]
if critic_obs is not None:
all_net_inputs.extend(critic_obs)
all_net_inputs = net_inputs
# if critic_obs is not None:
# all_net_inputs.extend(critic_obs)
value_outputs, critic_mem_out = self.critic(
all_net_inputs, memories=critic_mem, sequence_length=sequence_length
)

net_inputs, memories=actor_mem, sequence_length=sequence_length
)
log_probs, entropies = self.action_model.evaluate(encoding, masks, actions)
all_net_inputs = [net_inputs]
if critic_obs is not None:
all_net_inputs.extend(critic_obs)
all_net_inputs = net_inputs
# if critic_obs is not None:
# all_net_inputs.extend(critic_obs)
value_outputs, critic_mem_outs = self.critic(
all_net_inputs, memories=critic_mem, sequence_length=sequence_length
)

critic_mem = None
actor_mem = None
all_net_inputs = [net_inputs]
if critic_obs is not None:
all_net_inputs.extend(critic_obs)
all_net_inputs = net_inputs
# if critic_obs is not None:
# all_net_inputs.extend(critic_obs)
encoding, actor_mem_outs = self.network_body(
net_inputs, memories=actor_mem, sequence_length=sequence_length

else:
mem_out = None
return action, log_probs, entropies, value_outputs, mem_out
def get_comms(
self,
net_inputs: List[torch.Tensor],
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,
) -> Tuple[torch.Tensor]:
encoding, memories = self.network_body(
net_inputs, memories=memories, sequence_length=sequence_length
)
comms = self.action_model.get_comms(encoding, masks)
return comms
def update_normalization(self, net_inputs: List[torch.Tensor]) -> None:
super().update_normalization(net_inputs)

8
ml-agents/mlagents/trainers/trajectory.py


agent_buffer_trajectory = AgentBuffer()
curr_obs = self.steps[0].obs
for step, exp in enumerate(self.steps):
if step == 0:
# this initial all zeros creates the offset for comms
agent_buffer_trajectory["comm_obs"].append(
np.zeros_like(exp.collab_obs)
)
if step < len(self.steps) - 1:
next_obs = self.steps[step + 1].obs
else:

agent_buffer_trajectory["critic_obs"].append(exp.collab_obs)
# to avoid error of different sized bufferfields
if step < len(self.steps) - 1:
agent_buffer_trajectory["comm_obs"].append(exp.collab_obs)
if exp.memory is not None:
agent_buffer_trajectory["memory"].append(exp.memory)

正在加载...
取消
保存