浏览代码

rename extract to from_dict

/develop/action-spec-gym
Andrew Cohen 4 年前
当前提交
73b778cc
共有 5 个文件被更改,包括 8 次插入8 次删除
  1. 4
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  2. 2
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  3. 4
      ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
  4. 2
      ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
  5. 4
      ml-agents/mlagents/trainers/torch/utils.py

4
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


vec_obs = [ModelUtils.list_to_tensor(batch["vector_obs"])]
act_masks = ModelUtils.list_to_tensor(batch["action_mask"])
actions = AgentAction.extract(batch)
actions = AgentAction.from_dict(batch)
memories = [
ModelUtils.list_to_tensor(batch["memory"][i])

memories=memories,
seq_len=self.policy.sequence_length,
)
old_log_probs = ActionLogProbs.extract(batch).flatten()
old_log_probs = ActionLogProbs.from_dict(batch).flatten()
log_probs = log_probs.flatten()
loss_masks = ModelUtils.list_to_tensor(batch["masks"], dtype=torch.bool)
value_loss = self.ppo_value_loss(

2
ml-agents/mlagents/trainers/sac/optimizer_torch.py


vec_obs = [ModelUtils.list_to_tensor(batch["vector_obs"])]
next_vec_obs = [ModelUtils.list_to_tensor(batch["next_vector_in"])]
act_masks = ModelUtils.list_to_tensor(batch["action_mask"])
actions = AgentAction.extract(batch)
actions = AgentAction.from_dict(batch)
memories_list = [
ModelUtils.list_to_tensor(batch["memory"][i])

4
ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py


Uses the current state embedding and the action of the mini_batch to predict
the next state embedding.
"""
actions = AgentAction.extract(mini_batch)
actions = AgentAction.from_dict(mini_batch)
if self._action_spec.is_continuous():
action = actions.continuous_tensor
else:

action prediction (given the current and next state).
"""
predicted_action = self.predict_action(mini_batch)
actions = AgentAction.extract(mini_batch)
actions = AgentAction.from_dict(mini_batch)
if self._action_spec.is_continuous():
sq_difference = (actions.continuous_tensor - predicted_action) ** 2
sq_difference = torch.sum(sq_difference, dim=1)

2
ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py


Creates the action Tensor. In continuous case, corresponds to the action. In
the discrete case, corresponds to the concatenation of one hot action Tensors.
"""
return self._action_flattener.forward(AgentAction.extract(mini_batch))
return self._action_flattener.forward(AgentAction.from_dict(mini_batch))
def get_state_inputs(
self, mini_batch: AgentBuffer

4
ml-agents/mlagents/trainers/torch/utils.py


return AgentAction(continuous, discrete)
@staticmethod
def extract(buff: Dict[str, np.ndarray]) -> "AgentAction":
def from_dict(buff: Dict[str, np.ndarray]) -> "AgentAction":
continuous: torch.Tensor = None
discrete: List[torch.Tensor] = None
if "continuous_action" in buff:

return ActionLogProbs(continuous, discrete, all_log_prob_list)
@staticmethod
def extract(buff: Dict[str, np.ndarray]) -> "ActionLogProbs":
def from_dict(buff: Dict[str, np.ndarray]) -> "ActionLogProbs":
continuous: torch.Tensor = None
discrete: List[torch.Tensor] = None
if "continuous_log_probs" in buff:

正在加载...
取消
保存