浏览代码

[tests] Add tests for multiple actions/action branches (#3672)

/develop/add-fire
GitHub 5 年前
当前提交
104f2c46
共有 4 个文件被更改,包括 84 次插入55 次删除
  1. 78
      ml-agents/mlagents/trainers/tests/simple_test_envs.py
  2. 4
      ml-agents/mlagents/trainers/tests/test_meta_curriculum.py
  3. 53
      ml-agents/mlagents/trainers/tests/test_simple_rl.py
  4. 4
      ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py

78
ml-agents/mlagents/trainers/tests/simple_test_envs.py


return max(min_val, min(x, max_val))
class Simple1DEnvironment(BaseEnv):
class SimpleEnvironment(BaseEnv):
"""
Very simple "game" - the agent has a position on [-1, 1], gets a reward of 1 if it reaches 1, and a reward of -1 if
it reaches -1. The position is incremented by the action amount (clamped to [-step_size, step_size]).

num_vector=1,
vis_obs_size=VIS_OBS_SIZE,
vec_obs_size=OBS_SIZE,
action_size=1,
):
super().__init__()
self.discrete = use_discrete

self.vec_obs_size = vec_obs_size
action_type = ActionType.DISCRETE if use_discrete else ActionType.CONTINUOUS
self.group_spec = AgentGroupSpec(
self._make_obs_spec(), action_type, (2,) if use_discrete else 1
self._make_obs_spec(),
action_type,
tuple(2 for _ in range(action_size)) if use_discrete else action_size,
self.action_size = action_size
self.position: Dict[str, float] = {}
self.positions: Dict[str, List[float]] = {}
self.step_count: Dict[str, float] = {}
self.random = random.Random(str(self.group_spec))
self.goal: Dict[str, int] = {}

return self.step_result[name]
def _take_action(self, name: str) -> bool:
deltas = []
for _act in self.action[name][0]:
if self.discrete:
deltas.append(1 if _act else -1)
else:
deltas.append(_act)
for i, _delta in enumerate(deltas):
_delta = clamp(_delta, -self.step_size, self.step_size)
self.positions[name][i] += _delta
self.positions[name][i] = clamp(self.positions[name][i], -1, 1)
self.step_count[name] += 1
# Both must be in 1.0 to be done
done = all(pos >= 1.0 or pos <= -1.0 for pos in self.positions[name])
return done
def _generate_mask(self):
act = self.action[name][0][0]
delta = 1 if act else -1
# LL-Python API will return an empty dim if there is only 1 agent.
ndmask = np.array(2 * self.action_size * [False], dtype=np.bool)
ndmask = np.expand_dims(ndmask, axis=0)
action_mask = [ndmask]
delta = self.action[name][0][0]
delta = clamp(delta, -self.step_size, self.step_size)
self.position[name] += delta
self.position[name] = clamp(self.position[name], -1, 1)
self.step_count[name] += 1
done = self.position[name] >= 1.0 or self.position[name] <= -1.0
return done
action_mask = None
return action_mask
reward = SUCCESS_REWARD * self.position[name] * self.goal[name]
reward = 0.0
for _pos in self.positions[name]:
reward += (SUCCESS_REWARD * _pos * self.goal[name]) / len(
self.positions[name]
)
def _reset_agent(self, name):
self.goal[name] = self.random.choice([-1, 1])
self.positions[name] = [0.0 for _ in range(self.action_size)]
self.step_count[name] = 0
self.final_rewards[name].append(self.rewards[name])
self.rewards[name] = 0
self.agent_id[name] = self.agent_id[name] + 1
def _make_batched_step(
self, name: str, done: bool, reward: float

self.rewards[name] += reward
self.step_result[name] = self._make_batched_step(name, done, reward)
def _generate_mask(self):
if self.discrete:
# LL-Python API will return an empty dim if there is only 1 agent.
ndmask = np.array(2 * [False], dtype=np.bool)
ndmask = np.expand_dims(ndmask, axis=0)
action_mask = [ndmask]
else:
action_mask = None
return action_mask
def _reset_agent(self, name):
self.goal[name] = self.random.choice([-1, 1])
self.position[name] = 0.0
self.step_count[name] = 0
self.final_rewards[name].append(self.rewards[name])
self.rewards[name] = 0
self.agent_id[name] = self.agent_id[name] + 1
def reset(self) -> None: # type: ignore
for name in self.names:
self._reset_agent(name)

pass
class Memory1DEnvironment(Simple1DEnvironment):
class MemoryEnvironment(SimpleEnvironment):
def __init__(self, brain_names, use_discrete, step_size=0.2):
super().__init__(brain_names, use_discrete, step_size=step_size)
# Number of steps to reveal the goal for. Lower is harder. Should be

)
class Record1DEnvironment(Simple1DEnvironment):
class RecordEnvironment(SimpleEnvironment):
def __init__(
self,
brain_names,

4
ml-agents/mlagents/trainers/tests/test_meta_curriculum.py


import json
import yaml
from mlagents.trainers.tests.simple_test_envs import Simple1DEnvironment
from mlagents.trainers.tests.simple_test_envs import SimpleEnvironment
from mlagents.trainers.tests.test_simple_rl import _check_environment_trains, BRAIN_NAME
from mlagents.trainers.tests.test_curriculum import dummy_curriculum_json_str

@pytest.mark.parametrize("curriculum_brain_name", [BRAIN_NAME, "WrongBrainName"])
def test_simple_metacurriculum(curriculum_brain_name):
env = Simple1DEnvironment([BRAIN_NAME], use_discrete=False)
env = SimpleEnvironment([BRAIN_NAME], use_discrete=False)
curriculum_config = json.loads(dummy_curriculum_json_str)
mc = MetaCurriculum({curriculum_brain_name: curriculum_config})
trainer_config = yaml.safe_load(TRAINER_CONFIG)

53
ml-agents/mlagents/trainers/tests/test_simple_rl.py


from typing import Dict, Any
from mlagents.trainers.tests.simple_test_envs import (
Simple1DEnvironment,
Memory1DEnvironment,
Record1DEnvironment,
SimpleEnvironment,
MemoryEnvironment,
RecordEnvironment,
)
from mlagents.trainers.trainer_controller import TrainerController
from mlagents.trainers.trainer_util import TrainerFactory

@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_ppo(use_discrete):
env = Simple1DEnvironment([BRAIN_NAME], use_discrete=use_discrete)
env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete)
config = generate_config(PPO_CONFIG)
_check_environment_trains(env, config)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_2d_ppo(use_discrete):
env = SimpleEnvironment(
[BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.5
)
config = generate_config(PPO_CONFIG)
_check_environment_trains(env, config)

def test_visual_ppo(num_visual, use_discrete):
env = Simple1DEnvironment(
env = SimpleEnvironment(
[BRAIN_NAME],
use_discrete=use_discrete,
num_visual=num_visual,

@pytest.mark.parametrize("num_visual", [1, 2])
@pytest.mark.parametrize("vis_encode_type", ["resnet", "nature_cnn"])
def test_visual_advanced_ppo(vis_encode_type, num_visual):
env = Simple1DEnvironment(
env = SimpleEnvironment(
[BRAIN_NAME],
use_discrete=True,
num_visual=num_visual,

@pytest.mark.parametrize("use_discrete", [True, False])
def test_recurrent_ppo(use_discrete):
env = Memory1DEnvironment([BRAIN_NAME], use_discrete=use_discrete)
env = MemoryEnvironment([BRAIN_NAME], use_discrete=use_discrete)
override_vals = {
"max_steps": 4000,
"batch_size": 64,

@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_sac(use_discrete):
env = Simple1DEnvironment([BRAIN_NAME], use_discrete=use_discrete)
env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete)
def test_2d_sac(use_discrete):
env = SimpleEnvironment(
[BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.5
)
override_vals = {"buffer_init_steps": 2000, "max_steps": 3000}
config = generate_config(SAC_CONFIG, override_vals)
_check_environment_trains(env, config)
@pytest.mark.parametrize("use_discrete", [True, False])
env = Simple1DEnvironment(
env = SimpleEnvironment(
[BRAIN_NAME],
use_discrete=use_discrete,
num_visual=num_visual,

@pytest.mark.parametrize("num_visual", [1, 2])
@pytest.mark.parametrize("vis_encode_type", ["resnet", "nature_cnn"])
def test_visual_advanced_sac(vis_encode_type, num_visual):
env = Simple1DEnvironment(
env = SimpleEnvironment(
[BRAIN_NAME],
use_discrete=True,
num_visual=num_visual,

@pytest.mark.parametrize("use_discrete", [True, False])
def test_recurrent_sac(use_discrete):
env = Memory1DEnvironment([BRAIN_NAME], use_discrete=use_discrete)
env = MemoryEnvironment([BRAIN_NAME], use_discrete=use_discrete)
override_vals = {"batch_size": 32, "use_recurrent": True, "max_steps": 2000}
config = generate_config(SAC_CONFIG, override_vals)
_check_environment_trains(env, config)

def test_simple_ghost(use_discrete):
env = Simple1DEnvironment(
env = SimpleEnvironment(
[BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
)
override_vals = {

@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_ghost_fails(use_discrete):
env = Simple1DEnvironment(
env = SimpleEnvironment(
[BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
)
# This config should fail because the ghosted policy is never swapped with a competent policy.

@pytest.fixture(scope="session")
def simple_record(tmpdir_factory):
def record_demo(use_discrete, num_visual=0, num_vector=1):
env = Record1DEnvironment(
env = RecordEnvironment(
[BRAIN_NAME],
use_discrete=use_discrete,
num_visual=num_visual,

@pytest.mark.parametrize("trainer_config", [PPO_CONFIG, SAC_CONFIG])
def test_gail(simple_record, use_discrete, trainer_config):
demo_path = simple_record(use_discrete)
env = Simple1DEnvironment([BRAIN_NAME], use_discrete=use_discrete, step_size=0.2)
env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete, step_size=0.2)
override_vals = {
"max_steps": 500,
"behavioral_cloning": {"demo_path": demo_path, "strength": 1.0, "steps": 1000},

@pytest.mark.parametrize("use_discrete", [True, False])
def test_gail_visual_ppo(simple_record, use_discrete):
demo_path = simple_record(use_discrete, num_visual=1, num_vector=0)
env = Simple1DEnvironment(
env = SimpleEnvironment(
[BRAIN_NAME],
num_visual=1,
num_vector=0,

@pytest.mark.parametrize("use_discrete", [True, False])
def test_gail_visual_sac(simple_record, use_discrete):
demo_path = simple_record(use_discrete, num_visual=1, num_vector=0)
env = Simple1DEnvironment(
env = SimpleEnvironment(
[BRAIN_NAME],
num_visual=1,
num_vector=0,

4
ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py


from mlagents.trainers.env_manager import EnvironmentStep
from mlagents_envs.base_env import BaseEnv
from mlagents_envs.side_channel.engine_configuration_channel import EngineConfig
from mlagents.trainers.tests.simple_test_envs import Simple1DEnvironment
from mlagents.trainers.tests.simple_test_envs import SimpleEnvironment
from mlagents.trainers.stats import StatsReporter
from mlagents.trainers.tests.test_simple_rl import (
_check_environment_trains,

def simple_env_factory(worker_id, config):
env = Simple1DEnvironment(["1D"], use_discrete=True)
env = SimpleEnvironment(["1D"], use_discrete=True)
return env

正在加载...
取消
保存