浏览代码

addressing comments

/develop/rm-rf-new-models
vincentpierre 4 年前
当前提交
0c81006d
共有 4 个文件被更改,包括 12 次插入15 次删除
  1. 16
      ml-agents/mlagents/trainers/policy/torch_policy.py
  2. 1
      ml-agents/mlagents/trainers/torch/components/bc/module.py
  3. 6
      ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
  4. 4
      ml-agents/mlagents/trainers/trajectory.py

16
ml-agents/mlagents/trainers/policy/torch_policy.py


"""
return self._export_m_size
def _split_decision_step(
self, decision_requests: DecisionSteps
) -> Tuple[List[np.array], np.ndarray]:
obs = decision_requests.obs
def _extract_masks(self, decision_requests: DecisionSteps) -> np.ndarray:
mask = None
if self.behavior_spec.action_spec.discrete_size > 0:
mask = torch.ones([len(decision_requests), np.sum(self.act_size)])

)
return obs, mask
return mask
def update_normalization(self, buffer: AgentBuffer) -> None:
"""

def evaluate_actions(
self,
obs: torch.Tensor,
obs: List[torch.Tensor],
actions: AgentAction,
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,

:param decision_requests: DecisionStep object containing inputs.
:return: Outputs from network as defined by self.inference_dict.
"""
obs, masks = self._split_decision_step(decision_requests)
obs = [torch.as_tensor(np_ob) for np_ob in obs]
obs = decision_requests.obs
masks = self._extract_masks(decision_requests)
tensor_obs = [torch.as_tensor(np_ob) for np_ob in obs]
memories = torch.as_tensor(self.retrieve_memories(global_agent_ids)).unsqueeze(
0

with torch.no_grad():
action, log_probs, entropy, memories = self.sample_actions(
obs, masks=masks, memories=memories
tensor_obs, masks=masks, memories=memories
)
action_tuple = action.to_action_tuple()
run_out["action"] = action_tuple

1
ml-agents/mlagents/trainers/torch/components/bc/module.py


)
# Convert to tensors
obs = [ModelUtils.list_to_tensor(obs) for obs in obs]
print("\n\n\n\n", obs, obs[0].shape)
act_masks = None
expert_actions = AgentAction.from_dict(mini_batch_demo)
if self.policy.behavior_spec.action_spec.discrete_size > 0:

6
ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py


Extracts the current state embedding from a mini_batch.
"""
n_obs = len(self._state_encoder.encoders)
obs = ObsUtil.from_buffer(mini_batch, n_obs)
np_obs = ObsUtil.from_buffer(mini_batch, n_obs)
obs = [ModelUtils.list_to_tensor(obs) for obs in obs]
tensor_obs = [ModelUtils.list_to_tensor(obs) for obs in np_obs]
hidden, _ = self._state_encoder.forward(obs)
hidden, _ = self._state_encoder.forward(tensor_obs)
return hidden
def get_next_state(self, mini_batch: AgentBuffer) -> torch.Tensor:

4
ml-agents/mlagents/trainers/trajectory.py


"""
returns the name of the observation given the index of the observation
"""
return "obs_%d" % index
return f"obs_{index}"
@staticmethod
def get_name_at_next(index: int) -> str:

return "next_obs_%d" % index
return f"next_obs_{index}"
@staticmethod
def from_buffer(batch: AgentBuffer, num_obs: int) -> List[np.array]:

正在加载...
取消
保存