浏览代码

debugging discrete

/develop/action-spec-gym
Andrew Cohen 4 年前
当前提交
ad951493
共有 7 个文件被更改,包括 36 次插入33 次删除
  1. 8
      ml-agents-envs/mlagents_envs/base_env.py
  2. 12
      ml-agents/mlagents/trainers/policy/torch_policy.py
  3. 1
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  4. 4
      ml-agents/mlagents/trainers/tests/simple_test_envs.py
  5. 28
      ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
  6. 2
      ml-agents/mlagents/trainers/torch/networks.py
  7. 14
      ml-agents/mlagents/trainers/torch/utils.py

8
ml-agents-envs/mlagents_envs/base_env.py


Continuous and discrete actions are numpy arrays.
"""
continuous: np.ndarray
discrete: np.ndarray
continuous: List[np.ndarray]
discrete: List[np.ndarray]
continuous: np.ndarray = []
discrete: np.ndarray = []
continuous: List[np.ndarray] = [[]]
discrete: List[np.ndarray] = [[]]
if "continuous_action" in action_dict:
continuous = action_dict["continuous_action"]
if "discrete_action" in action_dict:

12
ml-agents/mlagents/trainers/policy/torch_policy.py


vec_obs, vis_obs, masks, memories, seq_len
)
action_list = self.actor_critic.sample_action(dists)
actions = AgentAction.create_agent_action(action_list, self.behavior_spec.action_spec)
# actions = torch.stack(action_list, dim=-1)
# if self.use_continuous_act:
# actions = actions[:, :, 0]
# else:
# actions = actions[:, 0, :]
action_list,
actions,
all_logs if all_log_probs else log_probs,
entropy_sum,
memories,

self,
vec_obs: torch.Tensor,
vis_obs: torch.Tensor,
actions: AgentAction,
actions: List[torch.Tensor],
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
seq_len: int = 1,

)
print(actions)
print(log_probs)
# Use the sum of entropy across actions, not the mean
entropy_sum = torch.sum(entropies, dim=1)
return log_probs, entropy_sum, value_heads

1
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


)
old_log_probs = ActionLogProbs.extract_action_log_probs(batch).flatten()
log_probs = log_probs.flatten()
print(log_probs)
loss_masks = ModelUtils.list_to_tensor(batch["masks"], dtype=torch.bool)
value_loss = self.ppo_value_loss(
values, old_values, returns, decay_eps, loss_masks

4
ml-agents/mlagents/trainers/tests/simple_test_envs.py


def _take_action(self, name: str) -> bool:
deltas = []
_act = self.action[name]
for _disc in _act.discrete:
for _disc in _act.discrete[0]:
for _cont in _act.continuous:
for _cont in _act.continuous[0]:
deltas.append(_cont)
for i, _delta in enumerate(deltas):
_delta = clamp(_delta, -self.step_size, self.step_size)

28
ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py


SAC_TORCH_CONFIG = attr.evolve(sac_dummy_config(), framework=FrameworkType.PYTORCH)
@pytest.mark.parametrize("use_discrete", [True, False])
@pytest.mark.parametrize("use_discrete", [True])
assert False
#@pytest.mark.parametrize("use_discrete", [True, False])
#def test_2d_ppo(use_discrete):
# env = SimpleEnvironment(
# [BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
# )
# new_hyperparams = attr.evolve(
# PPO_TORCH_CONFIG.hyperparameters, batch_size=64, buffer_size=640
# )
# config = attr.evolve(
# PPO_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=10000
# )
# check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.parametrize("use_discrete", [True])
def test_2d_ppo(use_discrete):
env = SimpleEnvironment(
[BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
)
new_hyperparams = attr.evolve(
PPO_TORCH_CONFIG.hyperparameters, batch_size=64, buffer_size=640
)
config = attr.evolve(
PPO_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=10000
)
check_environment_trains(env, {BRAIN_NAME: config})
assert False
#@pytest.mark.parametrize("use_discrete", [True, False])

2
ml-agents/mlagents/trainers/torch/networks.py


for action_dist in dists:
action = action_dist.sample()
actions.append(action)
return AgentAction.create_agent_action(actions, self.action_spec)
return actions
def get_dists(
self,

14
ml-agents/mlagents/trainers/torch/utils.py


def to_numpy_dict(self) -> Dict[str, np.ndarray]:
action_arrays_dict: Dict[str, np.ndarray] = {}
if self.continuous is not None:
action_arrays_dict["continuous_action"] = ModelUtils.to_numpy(self.continuous)
action_arrays_dict["continuous_action"] = ModelUtils.to_numpy(self.continuous.unsqueeze(-1)[:, :, 0])
action_arrays_dict["discrete_action"] = np.array([ModelUtils.to_numpy(_disc) for _disc in self.discrete])
discrete_tensor = torch.stack(self.discrete, dim=-1)
action_arrays_dict["discrete_action"] = ModelUtils.to_numpy(discrete_tensor[:, 0, :])
return action_arrays_dict
def to_tensor_list(self) -> List[torch.Tensor]:

if "continuous_action" in buff:
continuous = ModelUtils.list_to_tensor(buff["continuous_action"])
if "discrete_action" in buff:
discrete = ModelUtils.list_to_tensor(buff["discrete_action"])
discrete_tensor = ModelUtils.list_to_tensor(buff["discrete_action"])
discrete = [discrete_tensor[..., i] for i in range(discrete_tensor.shape[-1])]
return AgentAction(continuous, discrete)
@staticmethod

if self.continuous is not None:
log_prob_arrays_dict["continuous_log_probs"] = ModelUtils.to_numpy(self.continuous)
if self.discrete is not None:
log_prob_arrays_dict["discrete_log_probs"] = np.array([ModelUtils.to_numpy(_disc) for _disc in self.discrete])
discrete_tensor = torch.stack(self.discrete, dim=-1)
log_prob_arrays_dict["discrete_log_probs"] = ModelUtils.to_numpy(discrete_tensor.squeeze(1))
return log_prob_arrays_dict
def to_tensor_list(self) -> List[torch.Tensor]:

@staticmethod
def get_probs_and_entropy(
agent_action: AgentAction, dists: List[DistInstance]
action_list: List[torch.Tensor], dists: List[DistInstance]
action_list = agent_action.to_tensor_list()
for action, action_dist in zip(action_list, dists):
log_prob = action_dist.log_prob(action)
log_probs_list.append(log_prob)

正在加载...
取消
保存