tf tests except gail pass

4 年前 · fc3027ac
--- a/ml-agents/mlagents/trainers/agent_processor.py
+++ b/ml-agents/mlagents/trainers/agent_processor.py
            interrupted = step.interrupted if terminated else False
            # Add the outputs of the last eval
            action = stored_take_action_outputs["action"][idx]
-            #if self.policy.use_continuous_act:
-            #    action_pre = stored_take_action_outputs["pre_action"][idx]
-            #else:
-            #    action_pre = None
-            action_pre = None
+            if self.policy.use_continuous_act:
+                action_pre = stored_take_action_outputs["pre_action"][idx]
+            else:
+                action_pre = None
            action_probs = stored_take_action_outputs["log_probs"][idx]
            action_mask = stored_decision_step.action_mask
            prev_action = self.policy.retrieve_previous_action([global_id])[0, :]
--- a/ml-agents/mlagents/trainers/policy/policy.py
+++ b/ml-agents/mlagents/trainers/policy/policy.py
        self.trainer_settings = trainer_settings
        self.network_settings: NetworkSettings = trainer_settings.network_settings
        self.seed = seed
-        # For hybrid
+        # For mixed action spaces
-        #self.act_size = (
-        #    list(behavior_spec.discrete_action_branches)
-        #    if behavior_spec.is_action_discrete()
-        #    else [behavior_spec.action_size]
-        #)
+        self.act_size = (
+            list(behavior_spec.discrete_action_branches)
+            if behavior_spec.is_action_discrete()
+            else [behavior_spec.action_size]
+        )
        self.vec_obs_size = sum(
            shape[0] for shape in behavior_spec.observation_shapes if len(shape) == 1
        )
-        #self.use_continuous_act = behavior_spec.is_action_continuous()
+        self.use_continuous_act = behavior_spec.is_action_continuous()
        self.num_branches = self.behavior_spec.action_size
        self.previous_action_dict: Dict[str, np.array] = {}
        self.memory_dict: Dict[str, np.ndarray] = {}
--- a/ml-agents/mlagents/trainers/policy/tf_policy.py
+++ b/ml-agents/mlagents/trainers/policy/tf_policy.py
            reparameterize,
            condition_sigma_on_obs,
        )
+        if self.continuous_act_size > 0 and len(self.discrete_act_size) > 0:
+            raise UnityPolicyException(
+                "Tensorflow does not support mixed action spaces. Please run with --torch."
+            )
        # for ghost trainer save/load snapshots
        self.assign_phs: List[tf.Tensor] = []
        self.assign_ops: List[tf.Operation] = []
--- a/ml-agents/mlagents/trainers/tests/simple_test_envs.py
+++ b/ml-agents/mlagents/trainers/tests/simple_test_envs.py
        self.vis_obs_size = vis_obs_size
        self.vec_obs_size = vec_obs_size
        action_type = ActionType.DISCRETE if use_discrete else ActionType.CONTINUOUS
-        self.behavior_spec = BehaviorSpec(
-            self._make_obs_spec(),
-            action_type,
-            tuple(2 for _ in range(action_size)) if use_discrete else action_size,
-        )
+        if use_discrete:
+            self.behavior_spec = BehaviorSpec(
+                self._make_obs_spec(), 0, tuple(2 for _ in range(action_size))
+            )
+        else:
+            self.behavior_spec = BehaviorSpec(
+                self._make_obs_spec(), action_size, tuple()
+            )
        self.action_size = action_size
        self.names = brain_names
        self.positions: Dict[str, List[float]] = {}
        )
        super().__init__(
            brain_names,
-            True, # This is needed for env to generate masks correctly
+            True,  # This is needed for env to generate masks correctly
-            action_size=discrete_action_size, # This is needed for env to generate masks correctly
+            action_size=discrete_action_size,  # This is needed for env to generate masks correctly
-            self._make_obs_spec(), continuous_action_size, tuple(2 for _ in range(discrete_action_size))
+            self._make_obs_spec(),
+            continuous_action_size,
+            tuple(2 for _ in range(discrete_action_size)),
        )
        self.continuous_action_size = continuous_action_size
        self.discrete_action_size = discrete_action_size
            all_done = cont_done and disc_done
            if all_done:
                reward = 0
-                for _pos in self.continuous_env.positions[name] + self.discrete_env.positions[name]:
+                for _pos in (
+                    self.continuous_env.positions[name]
+                    + self.discrete_env.positions[name]
+                ):
-                        self.continuous_env.positions[name] + self.discrete_env.positions[name]
+                        self.continuous_env.positions[name]
+                        + self.discrete_env.positions[name]
-            self.step_result[name] = self._make_batched_step(
-                name, all_done, reward
-            )
+            self.step_result[name] = self._make_batched_step(name, all_done, reward)

    def reset(self) -> None:  # type: ignore
        super().reset()
        self.discrete_env.goal = self.goal

    def set_actions(self, behavior_name: BehaviorName, action) -> None:
-        #print(action, self.goal[behavior_name])
-        continuous_action = action[:, :self.continuous_action_size]
-        discrete_action = action[:, self.continuous_action_size:]
+        # print(action, self.goal[behavior_name])
+        continuous_action = action[:, : self.continuous_action_size]
+        discrete_action = action[:, self.continuous_action_size :]
        self.continuous_env.set_actions(behavior_name, continuous_action)
        self.discrete_env.set_actions(behavior_name, discrete_action)