浏览代码

2d discrete passes

/develop/action-spec-gym
Andrew Cohen 4 年前
当前提交
fcf6471e
共有 5 个文件被更改,包括 149 次插入61 次删除
  1. 13
      ml-agents/mlagents/trainers/policy/torch_policy.py
  2. 5
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  3. 6
      ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
  4. 6
      ml-agents/mlagents/trainers/torch/networks.py
  5. 180
      ml-agents/mlagents/trainers/torch/utils.py

13
ml-agents/mlagents/trainers/policy/torch_policy.py


vec_obs, vis_obs, masks, memories, seq_len
)
action_list = self.actor_critic.sample_action(dists)
actions = AgentAction.create_agent_action(action_list, self.behavior_spec.action_spec)
log_probs = ActionLogProbs.create_action_log_probs(log_probs_list, self.behavior_spec.action_spec)
actions = AgentAction.create(action_list, self.behavior_spec.action_spec)
log_probs = ActionLogProbs.create(log_probs_list, self.behavior_spec.action_spec)
# Use the sum of entropy across actions, not the mean
entropy_sum = torch.sum(entropies, dim=1)
return (

self,
vec_obs: torch.Tensor,
vis_obs: torch.Tensor,
actions: List[torch.Tensor],
actions: AgentAction,
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
seq_len: int = 1,

)
print(actions)
log_probs_list, entropies, _ = ModelUtils.get_probs_and_entropy(actions, dists)
log_probs = ActionLogProbs.create_action_log_probs(log_probs_list, self.behavior_spec.action_spec)
print(log_probs)
action_list = actions.to_tensor_list()
log_probs_list, entropies, _ = ModelUtils.get_probs_and_entropy(action_list, dists)
log_probs = ActionLogProbs.create(log_probs_list, self.behavior_spec.action_spec)
# Use the sum of entropy across actions, not the mean
entropy_sum = torch.sum(entropies, dim=1)
return log_probs, entropy_sum, value_heads

5
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


vec_obs = [ModelUtils.list_to_tensor(batch["vector_obs"])]
act_masks = ModelUtils.list_to_tensor(batch["action_mask"])
actions = AgentAction.extract_agent_action(batch)
actions = AgentAction.extract(batch)
memories = [
ModelUtils.list_to_tensor(batch["memory"][i])

memories=memories,
seq_len=self.policy.sequence_length,
)
old_log_probs = ActionLogProbs.extract_action_log_probs(batch).flatten()
old_log_probs = ActionLogProbs.extract(batch).flatten()
print(log_probs)
loss_masks = ModelUtils.list_to_tensor(batch["masks"], dtype=torch.bool)
value_loss = self.ppo_value_loss(
values, old_values, returns, decay_eps, loss_masks

6
ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py


SAC_TORCH_CONFIG = attr.evolve(sac_dummy_config(), framework=FrameworkType.PYTORCH)
@pytest.mark.parametrize("use_discrete", [True])
@pytest.mark.parametrize("use_discrete", [True, False])
assert False
@pytest.mark.parametrize("use_discrete", [True])
@pytest.mark.parametrize("use_discrete", [True, False])
def test_2d_ppo(use_discrete):
env = SimpleEnvironment(
[BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8

PPO_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=10000
)
check_environment_trains(env, {BRAIN_NAME: config})
assert False
#@pytest.mark.parametrize("use_discrete", [True, False])

6
ml-agents/mlagents/trainers/torch/networks.py


def update_normalization(self, vector_obs: List[torch.Tensor]) -> None:
self.network_body.update_normalization(vector_obs)
def sample_action(self, dists: List[DistInstance]) -> AgentAction:
def sample_action(self, dists: List[DistInstance]) -> List[torch.Tensor]:
actions = []
for action_dist in dists:
action = action_dist.sample()

"""
dists, _ = self.get_dists(vec_inputs, vis_inputs, masks, memories, 1)
if self.action_spec.is_continuous():
agent_action = self.sample_action(dists)
action_out = agent_action.flatten()#torch.stack(action_list, dim=-1)
action_list = self.sample_action(dists)
action_out = torch.stack(action_list, dim=-1)
else:
action_out = torch.cat([dist.all_log_prob() for dist in dists], dim=1)
return (

180
ml-agents/mlagents/trainers/torch/utils.py


from mlagents.trainers.torch.distributions import DistInstance, DiscreteDistInstance
class ActionSpaceTuple(NamedTuple):
continuous: torch.Tensor
discrete: List[torch.Tensor]
action_arrays_dict: Dict[str, np.ndarray] = {}
array_dict: Dict[str, np.ndarray] = {}
action_arrays_dict["continuous_action"] = ModelUtils.to_numpy(self.continuous.unsqueeze(-1)[:, :, 0])
array_dict["continuous_action"] = ModelUtils.to_numpy(self.continuous)
action_arrays_dict["discrete_action"] = ModelUtils.to_numpy(discrete_tensor[:, 0, :])
return action_arrays_dict
array_dict["discrete_action"] = ModelUtils.to_numpy(discrete_tensor[:, 0, :])
return array_dict
tensor_list: List[torch.Tensor] = []
tensor_list : List[torch.Tensor] = []
return tensor_list
return tensor_list
def flatten(self) -> torch.Tensor:
return torch.stack(self.to_tensor_list(), dim=-1)
@staticmethod
def create(tensor_list: List[torch.Tensor], action_spec: ActionSpec) -> "AgentActions":
continuous: torch.Tensor = None
discrete: List[torch.Tensor] = None
_offset = 0
if action_spec.continuous_size > 0:
continuous = tensor_list[0]
_offset = 1
if action_spec.discrete_size > 0:
discrete = tensor_list[_offset:]
return AgentAction(continuous, discrete)
@staticmethod
def extract_agent_action(buff: Dict[str, np.ndarray]) -> "AgentAction":
@staticmethod
def extract(buff: Dict[str, np.ndarray]) -> "AgentActions":
continuous: torch.Tensor = None
discrete: List[torch.Tensor] = None
if "continuous_action" in buff:

discrete = [discrete_tensor[..., i] for i in range(discrete_tensor.shape[-1])]
return AgentAction(continuous, discrete)
@staticmethod
def create_agent_action(action_tensors: List[torch.Tensor], action_spec: ActionSpec) -> "AgentAction":
continuous: torch.Tensor = None
discrete: List[torch.Tensor] = None
_offset = 0
if action_spec.continuous_size > 0:
continuous = action_tensors[0]
_offset = 1
if action_spec.discrete_size > 0:
discrete = action_tensors[_offset:]
return AgentAction(continuous, discrete)
class ActionLogProbs(NamedTuple):
class ActionLogProbs(ActionSpaceTuple):
log_prob_arrays_dict: Dict[str, np.ndarray] = {}
array_dict: Dict[str, np.ndarray] = {}
log_prob_arrays_dict["continuous_log_probs"] = ModelUtils.to_numpy(self.continuous)
array_dict["continuous_log_probs"] = ModelUtils.to_numpy(self.continuous)
log_prob_arrays_dict["discrete_log_probs"] = ModelUtils.to_numpy(discrete_tensor.squeeze(1))
return log_prob_arrays_dict
array_dict["discrete_log_probs"] = ModelUtils.to_numpy(discrete_tensor)
return array_dict
tensor_list: List[torch.Tensor] = []
tensor_list : List[torch.Tensor] = []
return tensor_list
return tensor_list
def extract_action_log_probs(buff: Dict[str, np.ndarray]) -> "AgentAction":
continuous: torch.Tensor = None
discrete: List[torch.Tensor] = None
if "continuous_action" in buff:
continuous = ModelUtils.list_to_tensor(buff["continuous_log_probs"])
if "discrete_action" in buff:
discrete = ModelUtils.list_to_tensor(buff["discrete_log_probs"])
return ActionLogProbs(continuous, discrete)
@staticmethod
def create_action_log_probs(log_prob_tensors: List[torch.Tensor], action_spec: ActionSpec) -> "AgentAction":
def create(tensor_list: List[torch.Tensor], action_spec: ActionSpec) -> "ActionLogProbs":
continuous = log_prob_tensors[0]
continuous = tensor_list[0]
discrete = log_prob_tensors[_offset:]
discrete = tensor_list[_offset:]
@staticmethod
def extract(buff: Dict[str, np.ndarray]) -> "ActionLogProbs":
continuous: torch.Tensor = None
discrete: List[torch.Tensor] = None
if "continuous_log_probs" in buff:
continuous = ModelUtils.list_to_tensor(buff["continuous_log_probs"])
if "discrete_log_probs" in buff:
discrete_tensor = ModelUtils.list_to_tensor(buff["discrete_log_probs"])
discrete = [discrete_tensor[..., i] for i in range(discrete_tensor.shape[-1])]
return ActionLogProbs(continuous, discrete)
#def to_numpy_dict(self) -> Dict[str, np.ndarray]:
# action_arrays_dict: Dict[str, np.ndarray] = {}
# if self.continuous is not None:
# action_arrays_dict["continuous_action"] = ModelUtils.to_numpy(self.continuous.unsqueeze(-1)[:, :, 0])
# if self.discrete is not None:
# discrete_tensor = torch.stack(self.discrete, dim=-1)
# action_arrays_dict["discrete_action"] = ModelUtils.to_numpy(discrete_tensor[:, 0, :])
# return action_arrays_dict
#def to_tensor_list(self) -> List[torch.Tensor]:
# tensor_list: List[torch.Tensor] = []
# if self.continuous is not None:
# tensor_list.append(self.continuous)
# if self.discrete is not None:
# tensor_list += self.discrete
# return tensor_list
#def flatten(self) -> torch.Tensor:
# return torch.stack(self.to_tensor_list(), dim=-1)
#@staticmethod
#def extract_agent_action(buff: Dict[str, np.ndarray]) -> "AgentAction":
# continuous: torch.Tensor = None
# discrete: List[torch.Tensor] = None
# if "continuous_action" in buff:
# continuous = ModelUtils.list_to_tensor(buff["continuous_action"])
# if "discrete_action" in buff:
# discrete_tensor = ModelUtils.list_to_tensor(buff["discrete_action"])
# discrete = [discrete_tensor[..., i] for i in range(discrete_tensor.shape[-1])]
# return AgentAction(continuous, discrete)
#
#@staticmethod
#def create_agent_action(action_tensors: List[torch.Tensor], action_spec: ActionSpec) -> "AgentAction":
# continuous: torch.Tensor = None
# discrete: List[torch.Tensor] = None
# _offset = 0
# if action_spec.continuous_size > 0:
# continuous = action_tensors[0]
# _offset = 1
# if action_spec.discrete_size > 0:
# discrete = action_tensors[_offset:]
# return AgentAction(continuous, discrete)
#def to_numpy_dict(self) -> Dict[str, np.ndarray]:
# log_prob_arrays_dict: Dict[str, np.ndarray] = {}
# if self.continuous is not None:
# log_prob_arrays_dict["continuous_log_probs"] = ModelUtils.to_numpy(self.continuous)
# if self.discrete is not None:
# discrete_tensor = torch.stack(self.discrete, dim=-1)
# log_prob_arrays_dict["discrete_log_probs"] = ModelUtils.to_numpy(discrete_tensor.squeeze(1))
# return log_prob_arrays_dict
#def to_tensor_list(self) -> List[torch.Tensor]:
# tensor_list: List[torch.Tensor] = []
# if self.continuous is not None:
# tensor_list.append(self.continuous)
# if self.discrete is not None:
# tensor_list += self.discrete
# return tensor_list
#def flatten(self) -> torch.Tensor:
# return torch.stack(self.to_tensor_list(), dim=-1)
#@staticmethod
#def extract_action_log_probs(buff: Dict[str, np.ndarray]) -> "AgentAction":
# continuous: torch.Tensor = None
# discrete: List[torch.Tensor] = None
# if "continuous_action" in buff:
# continuous = ModelUtils.list_to_tensor(buff["continuous_log_probs"]).unsqueeze(-1)
# if "discrete_action" in buff:
# discrete_tensor = ModelUtils.list_to_tensor(buff["discrete_log_probs"])
# discrete = [discrete_tensor[..., i] for i in range(discrete_tensor.shape[-1])]
# return ActionLogProbs(continuous, discrete)
#@staticmethod
#def create_action_log_probs(log_prob_tensors: List[torch.Tensor], action_spec: ActionSpec) -> "AgentAction":
# continuous: torch.Tensor = None
# discrete: List[torch.Tensor] = None
# _offset = 0
# if action_spec.continuous_size > 0:
# continuous = log_prob_tensors[0]
# _offset = 1
# if action_spec.discrete_size > 0:
# discrete = log_prob_tensors[_offset:]
# return ActionLogProbs(continuous, discrete)
class ModelUtils:
# Minimum supported side for each encoder type. If refactoring an encoder, please

正在加载...
取消
保存