浏览代码

[skip ci] Added some tests but they do not pass (too hard)

/develop/gym-wrapper
vincentpierre 5 年前
当前提交
cad57a00
共有 2 个文件被更改,包括 81 次插入10 次删除
  1. 20
      ml-agents-envs/mlagents_envs/gym_to_unity_wrapper.py
  2. 71
      ml-agents/mlagents/trainers/tests/test_simple_rl.py

20
ml-agents-envs/mlagents_envs/gym_to_unity_wrapper.py


if isinstance(self._gym_env.action_space, gym.spaces.Box):
action_type = ActionType.CONTINUOUS
action_shape = np.prod(self._gym_env.action_space.shape)
self.act_ratio = np.maximum(
self._gym_env.action_space.high, -self._gym_env.action_space.low
)
self.act_ratio[self.act_ratio > 1e38] = 1
elif isinstance(self._gym_env.action_space, gym.spaces.Discrete):
action_shape = (self._gym_env.action_space.n,)
action_type = ActionType.DISCRETE

)
self.obs_ratio = np.maximum(
self._gym_env.observation_space.high, -self._gym_env.observation_space.low
)
self.obs_ratio = np.maximum(
self._gym_env.observation_space.high, -self._gym_env.observation_space.low
)
# If the range is infinity, just don't normalize
self.obs_ratio[self.obs_ratio > 1e38] = 1
self._behavior_specs = BehaviorSpec(
observation_shapes=[self._gym_env.observation_space.shape],
action_type=action_type,

spec = self._behavior_specs
expected_type = np.float32 if spec.is_action_continuous() else np.int32
n_agents = len(self._current_steps[0])
if n_agents == 0:
return
expected_shape = (n_agents, spec.action_size)
if action.shape != expected_shape:
raise UnityActionException(

)
if action.dtype != expected_type:
action = action.astype(expected_type)
if n_agents == 0:
return
self._g_action = action[0]
self._g_action = action[0] / self.act_ratio
else:
raise UnityActionException(
f"Unknown action type {self._gym_env.action_space}"

if isinstance(self._gym_env.action_space, gym.spaces.Discrete):
self._g_action = int(action[0])
elif isinstance(self._gym_env.action_space, gym.spaces.Box):
self._g_action = action
self._g_action = action / self.act_ratio
else:
raise UnityActionException(
f"Unknown action type {self._gym_env.action_space}"

71
ml-agents/mlagents/trainers/tests/test_simple_rl.py


from mlagents_envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto
from mlagents_envs.communicator_objects.space_type_pb2 import discrete, continuous
import gym
from mlagents_envs.gym_to_unity_wrapper import GymToUnityWrapper
BRAIN_NAME = "1D"
PPO_CONFIG = f"""

if (
success_threshold is not None
): # For tests where we are just checking setup and not reward
processed_rewards = [
reward_processor(rewards) for rewards in env.final_rewards.values()
]
if hasattr(env, "final_rewards"):
processed_rewards = [
reward_processor(rewards) for rewards in env.final_rewards.values()
]
else:
processed_rewards = list(debug_writer.get_last_rewards().values())
assert all(not math.isnan(reward) for reward in processed_rewards)
assert all(reward > success_threshold for reward in processed_rewards)

}
config = generate_config(SAC_CONFIG, override_vals)
_check_environment_trains(env, config, success_threshold=0.9)
@pytest.mark.gym
@pytest.mark.parametrize(
"gym_name,target_return",
[
pytest.param("CartPole-v0", 150), # optimal 200
pytest.param("MountainCar-v0", -199), # solved if more than -200
pytest.param("MountainCarContinuous-v0", 0), # optimal 90
],
)
def test_ppo_gym_training(gym_name, target_return, pytestconfig):
if "gym" not in pytestconfig.getoption(name="-m", skip=False):
raise pytest.skip(
"Dit not run the gym tests, add the marker gym to run these tests"
)
env = GymToUnityWrapper(gym.make(gym_name), BRAIN_NAME)
override_vals = {
"max_steps": 1000000,
"batch_size": 1024,
"buffer_size": 10240,
"num_layers": 2,
"hidden_units": 128,
"time_horizon": 256,
"learning_rate_schedule": "linear",
"curiosity": {"strength": 0.01, "gamma": 0.95, "encoding_size": 256},
"learning_rate": 3.0e-4,
}
config = generate_config(PPO_CONFIG, override_vals)
_check_environment_trains(env, config, success_threshold=target_return)
@pytest.mark.gym
@pytest.mark.parametrize(
"gym_name,target_return",
[
pytest.param("CartPole-v0", 150), # optimal 200
pytest.param("MountainCar-v0", -199), # solved if more than -200
pytest.param("MountainCarContinuous-v0", 0), # optimal 90
],
)
def test_sac_gym_training(gym_name, target_return, pytestconfig):
if "gym" not in pytestconfig.getoption(name="-m", skip=False):
raise pytest.skip(
"Dit not run the gym tests, add the marker gym to run these tests"
)
env = GymToUnityWrapper(gym.make(gym_name), BRAIN_NAME)
override_vals = {
"max_steps": 1000000,
"buffer_size": 10240,
"num_layers": 2,
"hidden_units": 128,
"time_horizon": 256,
"learning_rate_schedule": "linear",
"curiosity": {"strength": 0.01, "gamma": 0.95, "encoding_size": 256},
"learning_rate": 3.0e-4,
}
config = generate_config(PPO_CONFIG, override_vals)
_check_environment_trains(env, config, success_threshold=target_return)
正在加载...
取消
保存