[skip ci] Added some tests but they do not pass (too hard)

5 年前 · cad57a00
--- a/ml-agents-envs/mlagents_envs/gym_to_unity_wrapper.py
+++ b/ml-agents-envs/mlagents_envs/gym_to_unity_wrapper.py
        if isinstance(self._gym_env.action_space, gym.spaces.Box):
            action_type = ActionType.CONTINUOUS
            action_shape = np.prod(self._gym_env.action_space.shape)
+            self.act_ratio = np.maximum(
+                self._gym_env.action_space.high, -self._gym_env.action_space.low
+            )
+            self.act_ratio[self.act_ratio > 1e38] = 1
        elif isinstance(self._gym_env.action_space, gym.spaces.Discrete):
            action_shape = (self._gym_env.action_space.n,)
            action_type = ActionType.DISCRETE
            )
-        self.obs_ratio = np.maximum(
-            self._gym_env.observation_space.high, -self._gym_env.observation_space.low
-        )
+        self.obs_ratio = np.maximum(
+            self._gym_env.observation_space.high, -self._gym_env.observation_space.low
+        )
+        # If the range is infinity, just don't normalize
+        self.obs_ratio[self.obs_ratio > 1e38] = 1
        self._behavior_specs = BehaviorSpec(
            observation_shapes=[self._gym_env.observation_space.shape],
            action_type=action_type,
        spec = self._behavior_specs
        expected_type = np.float32 if spec.is_action_continuous() else np.int32
        n_agents = len(self._current_steps[0])
+        if n_agents == 0:
+            return
        expected_shape = (n_agents, spec.action_size)
        if action.shape != expected_shape:
            raise UnityActionException(
            )
        if action.dtype != expected_type:
            action = action.astype(expected_type)
-        if n_agents == 0:
-            return
-            self._g_action = action[0]
+            self._g_action = action[0] / self.act_ratio
        else:
            raise UnityActionException(
                f"Unknown action type {self._gym_env.action_space}"
        if isinstance(self._gym_env.action_space, gym.spaces.Discrete):
            self._g_action = int(action[0])
        elif isinstance(self._gym_env.action_space, gym.spaces.Box):
-            self._g_action = action
+            self._g_action = action / self.act_ratio
        else:
            raise UnityActionException(
                f"Unknown action type {self._gym_env.action_space}"
--- a/ml-agents/mlagents/trainers/tests/test_simple_rl.py
+++ b/ml-agents/mlagents/trainers/tests/test_simple_rl.py
 from mlagents_envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto
 from mlagents_envs.communicator_objects.space_type_pb2 import discrete, continuous

+import gym
+from mlagents_envs.gym_to_unity_wrapper import GymToUnityWrapper
+
 BRAIN_NAME = "1D"

 PPO_CONFIG = f"""
        if (
            success_threshold is not None
        ):  # For tests where we are just checking setup and not reward
-            processed_rewards = [
-                reward_processor(rewards) for rewards in env.final_rewards.values()
-            ]
+            if hasattr(env, "final_rewards"):
+                processed_rewards = [
+                    reward_processor(rewards) for rewards in env.final_rewards.values()
+                ]
+            else:
+                processed_rewards = list(debug_writer.get_last_rewards().values())
            assert all(not math.isnan(reward) for reward in processed_rewards)
            assert all(reward > success_threshold for reward in processed_rewards)

    }
    config = generate_config(SAC_CONFIG, override_vals)
    _check_environment_trains(env, config, success_threshold=0.9)
+
+
+@pytest.mark.gym
+@pytest.mark.parametrize(
+    "gym_name,target_return",
+    [
+        pytest.param("CartPole-v0", 150),  # optimal 200
+        pytest.param("MountainCar-v0", -199),  # solved if more than -200
+        pytest.param("MountainCarContinuous-v0", 0),  # optimal 90
+    ],
+)
+def test_ppo_gym_training(gym_name, target_return, pytestconfig):
+    if "gym" not in pytestconfig.getoption(name="-m", skip=False):
+        raise pytest.skip(
+            "Dit not run the gym tests, add the marker gym to run these tests"
+        )
+    env = GymToUnityWrapper(gym.make(gym_name), BRAIN_NAME)
+    override_vals = {
+        "max_steps": 1000000,
+        "batch_size": 1024,
+        "buffer_size": 10240,
+        "num_layers": 2,
+        "hidden_units": 128,
+        "time_horizon": 256,
+        "learning_rate_schedule": "linear",
+        "curiosity": {"strength": 0.01, "gamma": 0.95, "encoding_size": 256},
+        "learning_rate": 3.0e-4,
+    }
+    config = generate_config(PPO_CONFIG, override_vals)
+    _check_environment_trains(env, config, success_threshold=target_return)
+
+
+@pytest.mark.gym
+@pytest.mark.parametrize(
+    "gym_name,target_return",
+    [
+        pytest.param("CartPole-v0", 150),  # optimal 200
+        pytest.param("MountainCar-v0", -199),  # solved if more than -200
+        pytest.param("MountainCarContinuous-v0", 0),  # optimal 90
+    ],
+)
+def test_sac_gym_training(gym_name, target_return, pytestconfig):
+    if "gym" not in pytestconfig.getoption(name="-m", skip=False):
+        raise pytest.skip(
+            "Dit not run the gym tests, add the marker gym to run these tests"
+        )
+    env = GymToUnityWrapper(gym.make(gym_name), BRAIN_NAME)
+    override_vals = {
+        "max_steps": 1000000,
+        "buffer_size": 10240,
+        "num_layers": 2,
+        "hidden_units": 128,
+        "time_horizon": 256,
+        "learning_rate_schedule": "linear",
+        "curiosity": {"strength": 0.01, "gamma": 0.95, "encoding_size": 256},
+        "learning_rate": 3.0e-4,
+    }
+    config = generate_config(PPO_CONFIG, override_vals)
+    _check_environment_trains(env, config, success_threshold=target_return)