浏览代码

[bug-fix] Fix Gym and some Policy tests for ActionSpec (#4590)

* Fix Gym for ActionSpec

* Fix TF policy test
/develop/action-spec-gym
GitHub 4 年前
当前提交
60b173df
共有 3 个文件被更改,包括 21 次插入23 次删除
  1. 10
      gym-unity/gym_unity/envs/__init__.py
  2. 8
      gym-unity/gym_unity/tests/test_gym.py
  3. 26
      ml-agents/mlagents/trainers/tests/tensorflow/test_tf_policy.py

10
gym-unity/gym_unity/envs/__init__.py


self._previous_decision_step = decision_steps
# Set action spaces
if self.group_spec.is_action_discrete():
branches = self.group_spec.discrete_action_branches
if self.group_spec.action_size == 1:
if self.group_spec.action_spec.is_action_discrete():
branches = self.group_spec.action_spec.discrete_action_branches
if self.group_spec.action_spec.discrete_action_size == 1:
self._action_space = spaces.Discrete(branches[0])
else:
if flatten_branched:

"The environment has a non-discrete action space. It will "
"not be flattened."
)
high = np.array([1] * self.group_spec.action_shape)
high = np.array([1] * self.group_spec.action_spec.continuous_action_size)
self._action_space = spaces.Box(-high, high, dtype=np.float32)
# Set observations space

action = self._flattener.lookup_action(action)
spec = self.group_spec
action = np.array(action).reshape((1, spec.action_size))
action = np.array(action).reshape((1, spec.action_spec.action_size))
self._env.set_actions(self.name, action)
self._env.step()

8
gym-unity/gym_unity/tests/test_gym.py


from gym_unity.envs import UnityToGymWrapper
from mlagents_envs.base_env import (
BehaviorSpec,
ActionType,
ActionSpec,
DecisionSteps,
TerminalSteps,
BehaviorMapping,

Creates a mock BrainParameters object with parameters.
"""
# Avoid using mutable object as default param
act_type = ActionType.DISCRETE
act_type = ActionType.CONTINUOUS
action_spec = ActionSpec(vector_action_space_size, ())
action_spec = ActionSpec(0, vector_action_space_size)
return BehaviorSpec(obs_shapes, act_type, vector_action_space_size)
return BehaviorSpec(obs_shapes, action_spec)
def create_mock_vector_steps(specs, num_agents=1, number_visual_observations=0):

26
ml-agents/mlagents/trainers/tests/tensorflow/test_tf_policy.py


from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents_envs.base_env import DecisionSteps, BehaviorSpec
from mlagents_envs.base_env import ActionSpec, DecisionSteps, BehaviorSpec
from mlagents.trainers.action_info import ActionInfo
from unittest.mock import MagicMock
from mlagents.trainers.settings import TrainerSettings

def basic_mock_brain():
mock_brain = MagicMock()
mock_brain.vector_action_space_type = "continuous"
mock_brain.vector_observation_space_size = 1
mock_brain.vector_action_space_size = [1]
mock_brain.brain_name = "MockBrain"
return mock_brain
def basic_behavior_spec():
dummy_actionspec = ActionSpec(1, ())
dummy_groupspec = BehaviorSpec([(1,)], dummy_actionspec)
return dummy_groupspec
class FakePolicy(TFPolicy):

def test_take_action_returns_empty_with_no_agents():
test_seed = 3
policy = FakePolicy(test_seed, basic_mock_brain(), TrainerSettings(), "output")
# Doesn't really matter what this is
dummy_groupspec = BehaviorSpec([(1,)], "continuous", 1)
no_agent_step = DecisionSteps.empty(dummy_groupspec)
behavior_spec = basic_behavior_spec()
policy = FakePolicy(test_seed, behavior_spec, TrainerSettings(), "output")
no_agent_step = DecisionSteps.empty(behavior_spec)
result = policy.get_action(no_agent_step)
assert result == ActionInfo.empty()

policy = FakePolicy(test_seed, basic_mock_brain(), TrainerSettings(), "output")
behavior_spec = basic_behavior_spec()
policy = FakePolicy(test_seed, behavior_spec, TrainerSettings(), "output")
policy.evaluate = MagicMock(return_value={})
policy.save_memories = MagicMock()
step_with_agents = DecisionSteps(

def test_take_action_returns_action_info_when_available():
test_seed = 3
policy = FakePolicy(test_seed, basic_mock_brain(), TrainerSettings(), "output")
behavior_spec = basic_behavior_spec()
policy = FakePolicy(test_seed, behavior_spec, TrainerSettings(), "output")
policy_eval_out = {
"action": np.array([1.0], dtype=np.float32),
"memory_out": np.array([[2.5]], dtype=np.float32),

正在加载...
取消
保存