浏览代码

discrete runs/cont passes

/develop/action-spec-gym
Andrew Cohen 4 年前
当前提交
b36fcf16
共有 4 个文件被更改,包括 7 次插入8 次删除
  1. 6
      ml-agents/mlagents/trainers/policy/torch_policy.py
  2. 2
      ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
  3. 4
      ml-agents/mlagents/trainers/torch/utils.py
  4. 3
      ml-agents/mlagents/trainers/trajectory.py

6
ml-agents/mlagents/trainers/policy/torch_policy.py


from typing import Any, Dict, List, Tuple, Optional
from typing import Any, Dict, List, Tuple, Optional, Union
import numpy as np
from mlagents.torch_utils import torch, default_device
import copy

memories: Optional[torch.Tensor] = None,
seq_len: int = 1,
all_log_probs: bool = False,
) -> Tuple[List[torch.Tensor], torch.Tensor, torch.Tensor, torch.Tensor]:
) -> Tuple[List[torch.Tensor], Union[torch.Tensor, List[torch.Tensor]], torch.Tensor, torch.Tensor]:
"""
:param vec_obs: List of vector observations.
:param vis_obs: List of visual observations.

vec_obs, vis_obs, masks=masks, memories=memories
)
run_out["action"] = action.to_numpy_dict()
run_out["pre_action"] = action.to_numpy_dict()["continuous_action"] # Todo - make pre_action difference
run_out["pre_action"] = action.to_numpy_dict()["continuous_action"] if self.use_continuous_act else None# Todo - make pre_action difference
run_out["log_probs"] = log_probs.to_numpy_dict()
run_out["entropy"] = ModelUtils.to_numpy(entropy)
run_out["learning_rate"] = 0.0

2
ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py


SAC_TORCH_CONFIG = attr.evolve(sac_dummy_config(), framework=FrameworkType.PYTORCH)
@pytest.mark.parametrize("use_discrete", [False])
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_ppo(use_discrete):
env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete)
config = attr.evolve(PPO_TORCH_CONFIG)

4
ml-agents/mlagents/trainers/torch/utils.py


if self.continuous is not None:
action_arrays_dict["continuous_action"] = ModelUtils.to_numpy(self.continuous)
if self.discrete is not None:
action_arrays_dict["discrete_action"] = ModelUtils.to_numpy(self.discrete)
action_arrays_dict["discrete_action"] = np.array([ModelUtils.to_numpy(_disc) for _disc in self.discrete])
return action_arrays_dict
def to_tensor_list(self) -> List[torch.Tensor]:

if self.continuous is not None:
log_prob_arrays_dict["continuous_log_probs"] = ModelUtils.to_numpy(self.continuous)
if self.discrete is not None:
log_prob_arrays_dict["discrete_log_probs"] = ModelUtils.to_numpy(self.discrete)
log_prob_arrays_dict["discrete_log_probs"] = np.array([ModelUtils.to_numpy(_disc) for _disc in self.discrete])
return log_prob_arrays_dict
def to_tensor_list(self) -> List[torch.Tensor]:

3
ml-agents/mlagents/trainers/trajectory.py


agent_buffer_trajectory["done"].append(exp.done)
# Add the outputs of the last eval
if exp.action_pre is not None:
actions_pre = exp.action_pre
agent_buffer_trajectory["actions_pre"].append(actions_pre)
agent_buffer_trajectory["actions_pre"].append(exp.action_pre)
# Adds the log prob and action of continuous/discrete separately
for act_type, act_array in exp.action.items():

正在加载...
取消
保存