fix set_actions_for_agent

4 年前 · f2c9d184
--- a/Project/Assets/ML-Agents/Examples/PushBlock/Demos/ExpertPush.demo.meta
+++ b/Project/Assets/ML-Agents/Examples/PushBlock/Demos/ExpertPush.demo.meta
 guid: 7f11f35191533404c9957443a681aaee
 ScriptedImporter:
  fileIDToRecycleName:
-    11400000: Assets/ML-Agents/Examples/PushBlock/Demos/ExpertPush.demo
+    11400002: Assets/ML-Agents/Examples/Pushblock/Demos/ExpertPush.demo
  externalObjects: {}
  userData: ' (Unity.MLAgents.Demonstrations.DemonstrationSummary)'
  assetBundleName: 
--- a/ml-agents-envs/mlagents_envs/base_env.py
+++ b/ml-agents-envs/mlagents_envs/base_env.py
        step. The Actions must be in the same order as the order received in
        the DecisionSteps.
        :param behavior_name: The name of the behavior the agents are part of
-        :param action: ActionTuple tuple of continuous and/or discrete action
+        :param action: ActionTuple tuple of continuous and/or discrete action.
+        Actions are np.arrays with dimensions  (n_agents, continuous_size) and
+        (n_agents, discrete_size), respectively.
        """

    @abstractmethod
        :param behavior_name: The name of the behavior the agent is part of
        :param agent_id: The id of the agent the action is set for
        :param action: ActionTuple tuple of continuous and/or discrete action
+        Actions are np.arrays with dimensions  (1, continuous_size) and
+        (1, discrete_size), respectively. Note, this initial dimensions of 1 is because
+        this action is meant for a single agent.
        """

    @abstractmethod
--- a/ml-agents-envs/mlagents_envs/environment.py
+++ b/ml-agents-envs/mlagents_envs/environment.py
                    agent_id
                )
            ) from ie
-        self._env_actions[behavior_name][index] = action
+        if action_spec.continuous_size > 0:
+            self._env_actions[behavior_name].continuous[index] = action.continuous[0]
+        if action_spec.discrete_size > 0:
+            self._env_actions[behavior_name].discrete[index] = action.discrete[0]

    def get_steps(
        self, behavior_name: BehaviorName
--- a/ml-agents/mlagents/trainers/torch/utils.py
+++ b/ml-agents/mlagents/trainers/torch/utils.py
    discrete log probs of individual actions as well as all the log probs for an entire branch.
    Utility functions provide numpy <=> tensor conversions to be used by the optimizers.
    :param continuous_tensor: Torch tensor corresponding to log probs of continuous actions
-    :param discrete_list: List of Torch tensors each corresponding to log probs of the discrete actions that were sampled.
+    :param discrete_list: List of Torch tensors each corresponding to log probs of the discrete actions that were
+    sampled.
-    each Tensor corresponds to one discrete branch log probabilities. 
+    each Tensor corresponds to one discrete branch log probabilities.
    """

    continuous_tensor: torch.Tensor