Add additional logic to avoid load being called on every advance (#4934)

4 年前 · 5022d710
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
  while waiting for a connection, and raises a better error message if it crashes. (#4880)
 - Passing a `-logfile` option in the `--env-args` option to `mlagents-learn` is
  no longer overwritten. (#4880)
+- The `load_weights` function was being called unnecessarily often in the Ghost Trainer leading to training slowdowns. (#4934)


 ## [1.7.2-preview] - 2020-12-22
--- a/ml-agents/mlagents/trainers/ghost/trainer.py
+++ b/ml-agents/mlagents/trainers/ghost/trainer.py

        next_learning_team = self.controller.get_learning_team

-        # CASE 1: Current learning team is managed by this GhostTrainer.
-        # If the learning team changes, the following loop over queues will push the
-        # new policy into the policy queue for the new learning agent if
-        # that policy is managed by this GhostTrainer. Otherwise, it will save the current snapshot.
-        # CASE 2: Current learning team is managed by a different GhostTrainer.
-        # If the learning team changes to a team managed by this GhostTrainer, this loop
-        # will push the current_snapshot into the correct queue.  Otherwise,
-        # it will continue skipping and swap_snapshot will continue to handle
-        # pushing fixed snapshots
-        # Case 3: No team change. The if statement just continues to push the policy
+        # Case 1: No team change. The if statement just continues to push the policy
        # into the correct queue (or not if not learning team).
        for brain_name in self._internal_policy_queues:
            internal_policy_queue = self._internal_policy_queues[brain_name]
            except AgentManagerQueue.Empty:
-                pass
-            if next_learning_team in self._team_to_name_to_policy_queue:
+                continue
+            if (
+                self._learning_team == next_learning_team
+                and next_learning_team in self._team_to_name_to_policy_queue
+            ):
                name_to_policy_queue = self._team_to_name_to_policy_queue[
                    next_learning_team
                ]
                    policy = self.get_policy(behavior_id)
                    policy.load_weights(self.current_policy_snapshot[brain_name])
                    name_to_policy_queue[brain_name].put(policy)
+
+        # CASE 2: Current learning team is managed by this GhostTrainer.
+        # If the learning team changes, the following loop over queues will push the
+        # new policy into the policy queue for the new learning agent if
+        # that policy is managed by this GhostTrainer. Otherwise, it will save the current snapshot.
+        # CASE 3: Current learning team is managed by a different GhostTrainer.
+        # If the learning team changes to a team managed by this GhostTrainer, this loop
+        # will push the current_snapshot into the correct queue.  Otherwise,
+        # it will continue skipping and swap_snapshot will continue to handle
+        # pushing fixed snapshots
+        if (
+            self._learning_team != next_learning_team
+            and next_learning_team in self._team_to_name_to_policy_queue
+        ):
+            name_to_policy_queue = self._team_to_name_to_policy_queue[
+                next_learning_team
+            ]
+            for brain_name in name_to_policy_queue:
+                behavior_id = create_name_behavior_id(brain_name, next_learning_team)
+                policy = self.get_policy(behavior_id)
+                policy.load_weights(self.current_policy_snapshot[brain_name])
+                name_to_policy_queue[brain_name].put(policy)

        # Note save and swap should be on different step counters.
        # We don't want to save unless the policy is learning.
--- a/ml-agents/mlagents/trainers/tests/torch/test_ghost.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_ghost.py
 VECTOR_ACTION_SPACE = 1
 VECTOR_OBS_SPACE = 8
 DISCRETE_ACTION_SPACE = [3, 3, 3, 2]
-BUFFER_INIT_SAMPLES = 513
+BUFFER_INIT_SAMPLES = 10241
 NUM_AGENTS = 12


    assert policy_queue0.empty() and not policy_queue1.empty()
    # clear
    policy_queue1.get_nowait()
-
-    mock_specs = mb.setup_test_behavior_specs(
-        False,
-        False,
-        vector_action_space=VECTOR_ACTION_SPACE,
-        vector_obs_space=VECTOR_OBS_SPACE,
-    )

    buffer = mb.simulate_rollout(BUFFER_INIT_SAMPLES, mock_specs)
    # Mock out reward signal eval