浏览代码

removed action_spec.size

/develop/action-spec-gym
Andrew Cohen 4 年前
当前提交
b6d10456
共有 9 个文件被更改,包括 27 次插入29 次删除
  1. 5
      gym-unity/gym_unity/envs/__init__.py
  2. 10
      ml-agents-envs/mlagents_envs/base_env.py
  3. 16
      ml-agents-envs/mlagents_envs/tests/test_envs.py
  4. 4
      ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
  5. 6
      ml-agents-envs/mlagents_envs/tests/test_steps.py
  6. 4
      ml-agents/mlagents/trainers/policy/policy.py
  7. 3
      ml-agents/mlagents/trainers/tests/mock_brain.py
  8. 6
      ml-agents/mlagents/trainers/tests/torch/test_policy.py
  9. 2
      ml-agents/tests/yamato/scripts/run_llapi.py

5
gym-unity/gym_unity/envs/__init__.py


# Set action spaces
if self.group_spec.action_spec.is_discrete():
self.action_size = self.group_spec.action_spec.discrete_size
branches = self.group_spec.action_spec.discrete_branches
if self.group_spec.action_spec.discrete_size == 1:
self._action_space = spaces.Discrete(branches[0])

"The environment has a non-discrete action space. It will "
"not be flattened."
)
self.action_size = self.group_spec.action_spec.continuous_size
high = np.array([1] * self.group_spec.action_spec.continuous_size)
self._action_space = spaces.Box(-high, high, dtype=np.float32)

# Translate action into list
action = self._flattener.lookup_action(action)
spec = self.group_spec
action = np.array(action).reshape((1, spec.action_spec.size))
action = np.array(action).reshape((1, self.action_size))
self._env.set_actions(self.name, action)
self._env.step()

10
ml-agents-envs/mlagents_envs/base_env.py


def continuous_size(self) -> int:
return self.num_continuous_actions
@property
def size(self) -> int:
return self.discrete_size + self.continuous_size
def create_empty(self, n_agents: int) -> np.ndarray:
if self.is_continuous():
return np.zeros((n_agents, self.continuous_size), dtype=np.float32)

Validates that action has the correct action dim
for the correct number of agents.
"""
_expected_shape = (n_agents, self.size)
if self.continuous_size > 0:
_size = self.continuous_size
else:
_size = self.discrete_size
_expected_shape = (n_agents, _size)
if actions.shape != _expected_shape:
raise UnityActionException(
f"The behavior {name} needs an input of dimension "

16
ml-agents-envs/mlagents_envs/tests/test_envs.py


from unittest import mock
import pytest
import numpy as np
from mlagents_envs.environment import UnityEnvironment
from mlagents_envs.base_env import DecisionSteps, TerminalSteps
from mlagents_envs.exception import UnityEnvironmentException, UnityActionException

env.step()
decision_steps, terminal_steps = env.get_steps("RealFakeBrain")
n_agents = len(decision_steps)
env.set_actions(
"RealFakeBrain", np.zeros((n_agents, spec.action_spec.size), dtype=np.float32)
)
env.set_actions("RealFakeBrain", spec.action_spec.create_empty(n_agents))
env.set_actions(
"RealFakeBrain",
np.zeros((n_agents - 1, spec.action_spec.size), dtype=np.float32),
)
env.set_actions("RealFakeBrain", spec.action_spec.create_empty(n_agents - 1))
env.set_actions(
"RealFakeBrain",
-1 * np.ones((n_agents, spec.action_spec.size), dtype=np.float32),
)
env.set_actions("RealFakeBrain", spec.action_spec.create_empty(n_agents) - 1)
env.step()
env.close()

4
ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py


assert not behavior_spec.action_spec.is_continuous()
assert behavior_spec.observation_shapes == [(3,), (4,)]
assert behavior_spec.action_spec.discrete_branches == (5, 4)
assert behavior_spec.action_spec.size == 2
assert behavior_spec.action_spec.discrete_size == 2
bp = BrainParametersProto()
bp.vector_action_size.extend([6])
bp.vector_action_space_type = 1

assert behavior_spec.action_spec.size == 6
assert behavior_spec.action_spec.continuous_size == 6
def test_batched_step_result_from_proto_raises_on_infinite():

6
ml-agents-envs/mlagents_envs/tests/test_steps.py


def test_specs():
specs = ActionSpec.make_continuous(3)
assert specs.discrete_branches == ()
assert specs.size == 3
assert specs.discrete_size == 0
assert specs.continuous_size == 3
assert specs.size == 1
assert specs.discrete_size == 1
assert specs.continuous_size == 0
assert specs.create_empty(5).shape == (5, 1)
assert specs.create_empty(5).dtype == np.int32

4
ml-agents/mlagents/trainers/policy/policy.py


self.act_size = (
list(self.behavior_spec.action_spec.discrete_branches)
if self.behavior_spec.action_spec.is_discrete()
else [self.behavior_spec.action_spec.size]
else [self.behavior_spec.action_spec.continuous_size]
)
self.vec_obs_size = sum(
shape[0] for shape in behavior_spec.observation_shapes if len(shape) == 1

)
self.use_continuous_act = self.behavior_spec.action_spec.is_continuous()
self.num_branches = self.behavior_spec.action_spec.size
self.num_branches = self.behavior_spec.action_spec.discrete_size
self.previous_action_dict: Dict[str, np.array] = {}
self.memory_dict: Dict[str, np.ndarray] = {}
self.normalize = trainer_settings.network_settings.normalize

3
ml-agents/mlagents/trainers/tests/mock_brain.py


the trajectory is terminated by a max step rather than a done.
"""
steps_list = []
action_size = action_spec.size
action_size = action_spec.discrete_size + action_spec.continuous_size
action_probs = np.ones(
int(np.sum(action_spec.discrete_branches) + action_spec.continuous_size),
dtype=np.float32,

6
ml-agents/mlagents/trainers/tests/torch/test_policy.py


memories=memories,
seq_len=policy.sequence_length,
)
if discrete:
_size = policy.behavior_spec.action_spec.discrete_size
else:
_size = policy.behavior_spec.action_spec.continuous_size
assert log_probs.shape == (64, policy.behavior_spec.action_spec.size)
assert log_probs.shape == (64, _size)
assert entropy.shape == (64,)
for val in values.values():
assert val.shape == (64,)

2
ml-agents/tests/yamato/scripts/run_llapi.py


while not done:
if group_spec.action_spec.is_continuous():
action = np.random.randn(
len(decision_steps), group_spec.action_spec.size
len(decision_steps), group_spec.action_spec.continuous_size
)
elif group_spec.action_spec.is_discrete():

正在加载...
取消
保存