浏览代码

Some minor tweaks but still broken

/comms-grad
Ervin Teng 4 年前
当前提交
cb4b7ed3
共有 3 个文件被更改,包括 11 次插入8 次删除
  1. 5
      ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
  2. 6
      ml-agents/mlagents/trainers/policy/torch_policy.py
  3. 8
      ml-agents/mlagents/trainers/torch/networks.py

5
ml-agents/mlagents/trainers/optimizer/torch_optimizer.py


)
next_obs = ModelUtils.list_to_tensor_list(next_obs)
# This line doesn't work
critic_obs = [ModelUtils.list_to_tensor_list(AgentBuffer.obs_list_to_obs_batch(agent_obs)) for agent_obs in batch["critic_obs"]]
obs, memory, sequence_length=batch.num_experiences
obs, memory, sequence_length=batch.num_experiences, critic_obs=critic_obs
)
next_value_estimate, _ = self.policy.actor_critic.critic_pass(

6
ml-agents/mlagents/trainers/policy/torch_policy.py


actions: torch.Tensor,
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
critic_obs: Optional[List[List[torch.Tensor]]] = None,
critic_obs: Optional[List[List[torch.Tensor]]] = None,
obs, masks, memories, critic_obs, seq_len
obs, masks, memories, seq_len, critic_obs
)
action_list = [actions[..., i] for i in range(actions.shape[-1])]
log_probs, entropies, _ = ModelUtils.get_probs_and_entropy(action_list, dists)

obs, masks = self._split_decision_step(decision_requests)
memories = torch.as_tensor(self.retrieve_memories(global_agent_ids)).unsqueeze(
0
)
) if self.use_recurrent else None
run_out = {}
with torch.no_grad():
action, clipped_action, log_probs, entropy, memories = self.sample_actions(

8
ml-agents/mlagents/trainers/torch/networks.py


net_inputs: List[torch.Tensor],
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
critic_obs: Optional[List[List[torch.Tensor]]] = None,
critic_obs: Optional[List[List[torch.Tensor]]] = None,
) -> Tuple[List[DistInstance], Dict[str, torch.Tensor], torch.Tensor]:
"""
Returns distributions, from which actions can be sampled, and value estimates.

net_inputs: List[torch.Tensor],
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
critic_obs: Optional[List[List[torch.Tensor]]] = None,
critic_obs: Optional[List[List[torch.Tensor]]] = None,
) -> Tuple[List[DistInstance], Dict[str, torch.Tensor], torch.Tensor]:
encoding, memories = self.network_body(
net_inputs, memories=memories, sequence_length=sequence_length

self,
net_inputs: List[torch.Tensor],
memories: Optional[torch.Tensor] = None,
critic_obs: List[List[torch.Tensor]] = None,
critic_obs: List[List[torch.Tensor]] = None,
) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]:
actor_mem, critic_mem = None, None
if self.use_lstm:

net_inputs: List[torch.Tensor],
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
critic_obs: Optional[List[List[torch.Tensor]]] = None,
critic_obs: Optional[List[List[torch.Tensor]]] = None,
) -> Tuple[List[DistInstance], Dict[str, torch.Tensor], torch.Tensor]:
if self.use_lstm:
# Use only the back half of memories for critic and actor

正在加载...
取消
保存