Fix training not quitting when play button is unchecked (#2376)

This fixes an issue where stopping the game when training in the Editor won't end training, due to the new asynchronous SubprocessEnvManager changes. Another minor change was made to move the `env_manager.close()` in TrainerController to the end of `start_learning` so that we are more likely to save the model if something goes wrong during the environment shutdown (this occurs sometimes on Windows machines).
5 年前 · 98297be9
--- a/ml-agents-envs/mlagents/envs/environment.py
+++ b/ml-agents-envs/mlagents/envs/environment.py
 from .brain import AllBrainInfo, BrainInfo, BrainParameters
 from .exception import (
    UnityEnvironmentException,
+    UnityCommunicationException,
    UnityActionException,
    UnityTimeOutException,
 )
                self._generate_reset_input(train_mode, config, custom_reset_parameters)
            )
            if outputs is None:
-                raise KeyboardInterrupt
+                raise UnityCommunicationException("Communicator has stopped.")
            rl_output = outputs.rl_output
            s = self._get_state(rl_output)
            self._global_done = s[1]
            with hierarchical_timer("communicator.exchange"):
                outputs = self.communicator.exchange(step_input)
            if outputs is None:
-                raise KeyboardInterrupt
+                raise UnityCommunicationException("Communicator has stopped.")
            rl_output = outputs.rl_output
            state = self._get_state(rl_output)
            self._global_done = state[1]
--- a/ml-agents-envs/mlagents/envs/exception.py
+++ b/ml-agents-envs/mlagents/envs/exception.py
    pass


+class UnityCommunicationException(UnityException):
+    """
+    Related to errors with the communicator.
+    """
+
+    pass
+
+
 class UnityActionException(UnityException):
    """
    Related to errors with sending actions.
--- a/ml-agents-envs/mlagents/envs/subprocess_env_manager.py
+++ b/ml-agents-envs/mlagents/envs/subprocess_env_manager.py
 import cloudpickle

 from mlagents.envs import UnityEnvironment
+from mlagents.envs.exception import UnityCommunicationException
 from multiprocessing import Process, Pipe, Queue
 from multiprocessing.connection import Connection
 from queue import Empty as EmptyQueueException
            cmd = EnvironmentCommand(name, payload)
            self.conn.send(cmd)
        except (BrokenPipeError, EOFError):
-            raise KeyboardInterrupt
+            raise UnityCommunicationException("UnityEnvironment worker: send failed.")

    def recv(self) -> EnvironmentResponse:
        try:
-            raise KeyboardInterrupt
+            raise UnityCommunicationException("UnityEnvironment worker: recv failed.")

    def close(self):
        try:
                _send_response("global_done", env.global_done)
            elif cmd.name == "close":
                break
-    except KeyboardInterrupt:
-        print("UnityEnvironment worker: keyboard interrupt")
+    except (KeyboardInterrupt, UnityCommunicationException):
+        print("UnityEnvironment worker: environment stopping.")
+        step_queue.put(EnvironmentResponse("env_close", worker_id, None))
    finally:
        step_queue.close()
        env.close()
            try:
                while True:
                    step = self.step_queue.get_nowait()
+                    if step.name == "env_close":
+                        raise UnityCommunicationException(
+                            "At least one of the environments has closed."
+                        )
                    self.env_workers[step.worker_id].waiting = False
                    if step.worker_id not in step_workers:
                        worker_steps.append(step)
--- a/ml-agents/mlagents/trainers/trainer_controller.py
+++ b/ml-agents/mlagents/trainers/trainer_controller.py
 from mlagents.envs import BrainParameters
 from mlagents.envs.env_manager import StepInfo
 from mlagents.envs.env_manager import EnvManager
-from mlagents.envs.exception import UnityEnvironmentException
+from mlagents.envs.exception import (
+    UnityEnvironmentException,
+    UnityCommunicationException,
+)
 from mlagents.envs.sampler_class import SamplerManager
 from mlagents.envs.timers import hierarchical_timer, get_timer_tree, timed
 from mlagents.trainers import Trainer, TrainerMetrics
            # Final save Tensorflow model
            if global_step != 0 and self.train_model:
                self._save_model()
-        except KeyboardInterrupt:
+        except (KeyboardInterrupt, UnityCommunicationException):
-        env_manager.close()
+        env_manager.close()

    def end_trainer_episodes(
        self, env: BaseUnityEnvironment, lessons_incremented: Dict[str, bool]