比较提交

...
此合并请求有变更与目标分支冲突。
/ml-agents-envs/setup.py
/ml-agents/mlagents/trainers/tests/test_simple_rl.py

2 次代码提交

作者 SHA1 备注 提交日期
vincentpierre d031c7a9 Merging master 4 年前
vincentpierre 599d7e9f Merging master 4 年前
共有 4 个文件被更改,包括 260 次插入4 次删除
  1. 1
      ml-agents-envs/setup.py
  2. 45
      ml-agents/mlagents/trainers/tests/test_simple_rl.py
  3. 178
      ml-agents-envs/mlagents_envs/gym_to_unity_wrapper.py
  4. 40
      ml-agents-envs/mlagents_envs/tests/test_gym_to_unity_wrapper.py

1
ml-agents-envs/setup.py


packages=find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]),
zip_safe=False,
install_requires=[
"gym",
"cloudpickle",
"grpcio>=1.11.0",
"numpy>=1.14.1,<2.0",

45
ml-agents/mlagents/trainers/tests/test_simple_rl.py


from mlagents.trainers.trainer_controller import TrainerController
from mlagents.trainers.trainer_util import TrainerFactory
from mlagents.trainers.simple_env_manager import SimpleEnvManager
from mlagents.trainers.subprocess_env_manager import SubprocessEnvManager
from mlagents.trainers.demo_loader import write_demo
from mlagents.trainers.stats import StatsReporter, StatsWriter, StatsSummary
from mlagents.trainers.settings import (

)
from mlagents_envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto
from mlagents_envs.communicator_objects.space_type_pb2 import discrete, continuous
import gym
from mlagents_envs.gym_to_unity_wrapper import GymToUnityWrapper
from mlagents_envs.side_channel.engine_configuration_channel import EngineConfig
BRAIN_NAME = "1D"

train=True,
training_seed=seed,
)
env_manager.close()
processed_rewards = [
reward_processor(rewards) for rewards in env.final_rewards.values()
]
if hasattr(env, "final_rewards"):
processed_rewards = [
reward_processor(rewards) for rewards in env.final_rewards.values()
]
else:
processed_rewards = list(debug_writer.get_last_rewards().values())
assert all(not math.isnan(reward) for reward in processed_rewards)
assert all(reward > success_threshold for reward in processed_rewards)

max_steps=500,
)
_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
@pytest.mark.gym
@pytest.mark.parametrize(
"gym_name,target_return",
[
pytest.param("CartPole-v0", 150), # optimal 200
# pytest.param("MountainCar-v0", -199), # solved if more than -200
# pytest.param("MountainCarContinuous-v0", 0), # optimal 90
],
)
def test_sac_gym_training(gym_name, target_return, pytestconfig):
if "gym" not in pytestconfig.getoption(name="-m", skip=False):
raise pytest.skip(
"Dit not run the gym tests, add the marker gym to run these tests"
)
env = GymToUnityWrapper(gym.make(gym_name), BRAIN_NAME)
hyperparams = attr.evolve(
SAC_CONFIG.hyperparameters, learning_rate=3e-4, buffer_size=1000
)
config = attr.evolve(SAC_CONFIG, hyperparameters=hyperparams, max_steps=50000)
def factory(worker_id, side_channels):
return GymToUnityWrapper(gym.make(gym_name), BRAIN_NAME)
manager = SubprocessEnvManager(factory, EngineConfig.default_config(), 30)
_check_environment_trains(
env, {BRAIN_NAME: config}, success_threshold=target_return, env_manager=manager
)

178
ml-agents-envs/mlagents_envs/gym_to_unity_wrapper.py


from mlagents_envs.base_env import (
BaseEnv,
DecisionSteps,
TerminalSteps,
BehaviorSpec,
BehaviorName,
AgentId,
ActionType,
BehaviorMapping,
)
from mlagents_envs.exception import UnityActionException, UnityObservationException
from typing import Tuple, Union, Optional
import numpy as np
import gym
class GymToUnityWrapper(BaseEnv):
_DEFAULT_BEHAVIOR_NAME = "gym_behavior_name"
_AGENT_ID = 1
def __init__(self, gym_env: gym.Env, name: Optional[str] = None):
"""
Wrapper construction. Creates an implementation of a Unity BaseEnv from a gym
environment.
:gym.Env gym_env: The gym environment that will be wrapped.
:str name: [Optional] The name of the gym environment. This will become the
name of the behavior for the BaseEnv.
"""
self._gym_env = gym_env
self._first_message = True
if name is None:
self._behavior_name = self._DEFAULT_BEHAVIOR_NAME
else:
self._behavior_name = name
action_type = ActionType.CONTINUOUS
action_shape: Union[Tuple[int, ...], int] = 0
if isinstance(self._gym_env.action_space, gym.spaces.Box):
action_type = ActionType.CONTINUOUS
action_shape = np.prod(self._gym_env.action_space.shape)
self._act_ratio = np.maximum(
self._gym_env.action_space.high, -self._gym_env.action_space.low
)
self._act_ratio[self._act_ratio > 1e38] = 1
elif isinstance(self._gym_env.action_space, gym.spaces.Discrete):
action_shape = (self._gym_env.action_space.n,)
action_type = ActionType.DISCRETE
else:
raise UnityActionException(
f"Unknown action type {self._gym_env.action_space}"
)
if not isinstance(self._gym_env.observation_space, gym.spaces.Box):
raise UnityObservationException(
f"Unknown observation type {self._gym_env.observation_space}"
)
self._obs_ratio = np.maximum(
self._gym_env.observation_space.high, -self._gym_env.observation_space.low
)
# If the range is infinity, just don't normalize
self._obs_ratio[self._obs_ratio > 1e38] = 1
self._behavior_specs = BehaviorSpec(
observation_shapes=[self._gym_env.observation_space.shape],
action_type=action_type,
action_shape=action_shape,
)
self._g_action: np.ndarray = None
self._current_steps: Tuple[DecisionSteps, TerminalSteps] = (
DecisionSteps.empty(self._behavior_specs),
TerminalSteps.empty(self._behavior_specs),
)
@property
def behavior_specs(self) -> BehaviorMapping:
return BehaviorMapping({self._behavior_name: self._behavior_specs})
def step(self) -> None:
if self._first_message:
self.reset()
return
obs, rew, done, info = self._gym_env.step(self._g_action)
if not done:
self._current_steps = (
DecisionSteps(
obs=[np.expand_dims(obs / self._obs_ratio, axis=0)],
reward=np.array([rew], dtype=np.float32),
agent_id=np.array([self._AGENT_ID], dtype=np.int32),
action_mask=None,
),
TerminalSteps.empty(self._behavior_specs),
)
else:
self._first_message = True
self._current_steps = (
DecisionSteps.empty(self._behavior_specs),
TerminalSteps(
obs=[np.expand_dims(obs / self._obs_ratio, axis=0)],
reward=np.array([rew], dtype=np.float32),
interrupted=np.array(
[info.get("TimeLimit.truncated", False)], dtype=np.bool
),
agent_id=np.array([self._AGENT_ID], dtype=np.int32),
),
)
def reset(self) -> None:
self._first_message = False
obs = self._gym_env.reset()
self._current_steps = (
DecisionSteps(
obs=[np.expand_dims(obs / self._obs_ratio, axis=0)],
reward=np.array([0], dtype=np.float32),
agent_id=np.array([self._AGENT_ID], dtype=np.int32),
action_mask=None,
),
TerminalSteps.empty(self._behavior_specs),
)
def close(self) -> None:
self._gym_env.close()
def set_actions(self, behavior_name: BehaviorName, action: np.ndarray) -> None:
assert behavior_name == self._behavior_name
spec = self._behavior_specs
expected_type = np.float32 if spec.is_action_continuous() else np.int32
n_agents = len(self._current_steps[0])
if n_agents == 0:
return
expected_shape = (n_agents, spec.action_size)
if action.shape != expected_shape:
raise UnityActionException(
"The behavior {0} needs an input of dimension {1} but received input of dimension {2}".format(
behavior_name, expected_shape, action.shape
)
)
if action.dtype != expected_type:
action = action.astype(expected_type)
if isinstance(self._gym_env.action_space, gym.spaces.Discrete):
self._g_action = int(action[0, 0])
elif isinstance(self._gym_env.action_space, gym.spaces.Box):
self._g_action = action[0] / self._act_ratio
else:
raise UnityActionException(
f"Unknown action type {self._gym_env.action_space}"
)
def set_action_for_agent(
self, behavior_name: BehaviorName, agent_id: AgentId, action: np.ndarray
) -> None:
assert behavior_name == self._behavior_name
assert agent_id == self._AGENT_ID
spec = self._behavior_specs
expected_shape = (spec.action_size,)
if action.shape != expected_shape:
raise UnityActionException(
f"The Agent {0} with BehaviorName {1} needs an input of dimension "
f"{2} but received input of dimension {3}".format(
agent_id, behavior_name, expected_shape, action.shape
)
)
expected_type = np.float32 if spec.is_action_continuous() else np.int32
if action.dtype != expected_type:
action = action.astype(expected_type)
if isinstance(self._gym_env.action_space, gym.spaces.Discrete):
self._g_action = int(action[0])
elif isinstance(self._gym_env.action_space, gym.spaces.Box):
self._g_action = action / self._act_ratio
else:
raise UnityActionException(
f"Unknown action type {self._gym_env.action_space}"
)
def get_steps(
self, behavior_name: BehaviorName
) -> Tuple[DecisionSteps, TerminalSteps]:
assert behavior_name == self._behavior_name
return self._current_steps

40
ml-agents-envs/mlagents_envs/tests/test_gym_to_unity_wrapper.py


from mlagents_envs.gym_to_unity_wrapper import GymToUnityWrapper
from mlagents_envs.base_env import ActionType
import gym
import pytest
GYM_ENVS = ["CartPole-v1", "MountainCar-v0"]
@pytest.mark.parametrize("name", GYM_ENVS, ids=GYM_ENVS)
def test_creation(name):
env = GymToUnityWrapper(gym.make(name), name)
env.close()
@pytest.mark.parametrize("name", GYM_ENVS, ids=GYM_ENVS)
def test_specs(name):
gym_env = gym.make(name)
env = GymToUnityWrapper(gym_env, name)
assert list(env.behavior_specs.keys()) == [name]
if isinstance(gym_env.action_space, gym.spaces.Box):
assert env.behavior_specs[name].action_type == ActionType.CONTINUOUS
elif isinstance(gym_env.action_space, gym.spaces.Discrete):
assert env.behavior_specs[name].action_type == ActionType.DISCRETE
else:
raise NotImplementedError("Test for this action space type not implemented")
env.close()
@pytest.mark.parametrize("name", GYM_ENVS, ids=GYM_ENVS)
def test_steps(name):
env = GymToUnityWrapper(gym.make(name), name)
spec = env.behavior_specs[name]
env.reset()
for _ in range(200):
d_steps, t_steps = env.get_steps(name)
env.set_actions(name, spec.create_empty_action(len(d_steps)))
env.step()
env.close()
正在加载...
取消
保存