removed abstract class

4 年前 · e686a785
--- a/ml-agents/mlagents/trainers/tests/simple_test_envs.py
+++ b/ml-agents/mlagents/trainers/tests/simple_test_envs.py
        self.discrete_env.goal = self.goal

    def set_actions(self, behavior_name: BehaviorName, action) -> None:
+        #print(action, self.goal[behavior_name])
        continuous_action = action[:, :self.continuous_action_size]
        discrete_action = action[:, self.continuous_action_size:]
        self.continuous_env.set_actions(behavior_name, continuous_action)
--- a/ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
            assert all(not math.isnan(reward) for reward in processed_rewards)
            assert all(reward > success_threshold for reward in processed_rewards)

-
 #@pytest.mark.parametrize("use_discrete", [True, False])
 #def test_simple_ppo(use_discrete):
 #    env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete)
-#def test_hybrid_ppo():
-#    env = HybridEnvironment([BRAIN_NAME], action_size=1, step_size=0.2)
-#    config = attr.evolve(PPO_CONFIG, max_steps=10000)
-#    _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=5.0)
-
-def test_2dhybrid_ppo():
-    env = HybridEnvironment([BRAIN_NAME], continuous_action_size=1, discrete_action_size=2, step_size=0.8)
+def test_hybrid_ppo():
+    env = HybridEnvironment([BRAIN_NAME], continuous_action_size=2, discrete_action_size=2, step_size=0.8)
-        PPO_CONFIG.hyperparameters, batch_size=128, buffer_size=1280
+        PPO_CONFIG.hyperparameters, batch_size=32, buffer_size=1280
-    config = attr.evolve(PPO_CONFIG, hyperparameters=new_hyperparams, max_steps=100000)
-    _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=5.0)
+    config = attr.evolve(PPO_CONFIG, hyperparameters=new_hyperparams, max_steps=10000)
+    _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)
+#def test_conthybrid_ppo():
+#    env = HybridEnvironment([BRAIN_NAME], continuous_action_size=1, discrete_action_size=0, step_size=0.8)
+#    new_hyperparams = attr.evolve(
+#        PPO_CONFIG.hyperparameters, batch_size=128, buffer_size=1280
+#    )
+#    config = attr.evolve(PPO_CONFIG, hyperparameters=new_hyperparams, max_steps=10000)
+#    _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)
+#def test_dischybrid_ppo():
+#    env = HybridEnvironment([BRAIN_NAME], continuous_action_size=0, discrete_action_size=1, step_size=0.8)
+#    new_hyperparams = attr.evolve(
+#        PPO_CONFIG.hyperparameters, batch_size=128, buffer_size=1280
+#    )
+#    config = attr.evolve(PPO_CONFIG, hyperparameters=new_hyperparams, max_steps=10000)
+#    _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)
+
+#jdef test_2dhybrid_ppo():
+#j    env = HybridEnvironment([BRAIN_NAME], continuous_action_size=2, discrete_action_size=2, step_size=0.8)
+#j    new_hyperparams = attr.evolve(
+#j        PPO_CONFIG.hyperparameters, batch_size=256, buffer_size=2560, beta=.05
+#j    )
+#j    config = attr.evolve(PPO_CONFIG, hyperparameters=new_hyperparams, max_steps=10000)
+#j    _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)
+#j
+#jdef test_3chybrid_ppo():
+#j    env = HybridEnvironment([BRAIN_NAME], continuous_action_size=2, discrete_action_size=1, step_size=0.8)
+#j    new_hyperparams = attr.evolve(
+#j        PPO_CONFIG.hyperparameters, batch_size=128, buffer_size=1280, beta=.01
+#j    )
+#j    config = attr.evolve(PPO_CONFIG, hyperparameters=new_hyperparams, max_steps=10000)
+#j    _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)
+
+#def test_3ddhybrid_ppo():
+#    env = HybridEnvironment([BRAIN_NAME], continuous_action_size=1, discrete_action_size=2, step_size=0.8)
+#    new_hyperparams = attr.evolve(
+#        PPO_CONFIG.hyperparameters, batch_size=128, buffer_size=1280, beta=.05
+#    )
+#    config = attr.evolve(PPO_CONFIG, hyperparameters=new_hyperparams, max_steps=10000)
+#    _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)
+
+
 #@pytest.mark.parametrize("use_discrete", [True, False])
 #def test_2d_ppo(use_discrete):
 #    env = SimpleEnvironment(
--- a/ml-agents/mlagents/trainers/torch/action_models.py
+++ b/ml-agents/mlagents/trainers/torch/action_models.py

 EPSILON = 1e-7  # Small value to avoid divide by zero

-
-
-
-class ActionModel(nn.Module, abc.ABC):
-    def _sample_action(self, dists: List[DistInstance]) -> List[torch.Tensor]:
-        """
-        Samples actions from list of distribution instances
-        """
-        actions = []
-        for action_dist in dists:
-            action = action_dist.sample()
-            actions.append(action)
-        return actions
-
-    @abc.abstractmethod
-    def evaluate(self, inputs: torch.Tensor, masks: torch.Tensor, actions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
-        """
-        Returns the log_probs and entropies of actions
-        """
-        pass
-
-    @abc.abstractmethod
-    def get_action_out(self, inputs: torch.Tensor, masks: torch.Tensor) -> torch.Tensor:
-        """
-        Returns the tensor to be exported to ONNX for the distribution
-        """
-        pass
-
-    @abc.abstractmethod
-    def forward(self, inputs: torch.Tensor, masks: torch.Tensor):
-        """
-        Returns the actions, log probs and entropies for given input
-        """
-        pass
-
-class HybridActionModel(ActionModel):
+class HybridActionModel(nn.Module):
    def __init__(
        self,
        hidden_size: int,
                )
            )
            self._split_list.append(continuous_act_size)
+
+    def _sample_action(self, dists: List[DistInstance]) -> List[torch.Tensor]:
+        """
+        Samples actions from list of distribution instances
+        """
+        actions = []
+        for action_dist in dists:
+            action = action_dist.sample()
+            actions.append(action)
+        return actions
+
+    def _get_dists(self, inputs: torch.Tensor, masks: torch.Tensor) -> Tuple[List[DistInstance], List[DiscreteDistInstance]]:
+        distribution_instances: List[DistInstance] = []
+        for distribution in self._distributions:
+            dist_instances = distribution(inputs, masks)
+            for dist_instance in dist_instances:
+                distribution_instances.append(dist_instance)
+        return distribution_instances
+
    def evaluate(self, inputs: torch.Tensor, masks: torch.Tensor, actions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
        dists = self._get_dists(inputs, masks)
        split_actions = torch.split(actions, self._split_list, dim=1) 
        dists = self._get_dists(inputs, masks)
        return torch.cat([dist.exported_model_output() for dist in dists], dim=1)

-    def _get_dists(self, inputs: torch.Tensor, masks: torch.Tensor) -> Tuple[List[DistInstance], List[DiscreteDistInstance]]:
-        distribution_instances: List[DistInstance] = []
-        for distribution in self._distributions:
-            dist_instances = distribution(inputs, masks)
-            for dist_instance in dist_instances:
-                distribution_instances.append(dist_instance)
-        return distribution_instances
-
+    
    def forward(self, inputs: torch.Tensor, masks: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
        dists = self._get_dists(inputs, masks)
        action_outs : List[torch.Tensor] = []