ml-agents/ml-agents/mlagents/trainers/subprocess_env_manager.py


								from typing import Dict, NamedTuple, List, Any, Optional, Callable, Set

								import cloudpickle

								import enum

								import time


								from mlagents_envs.environment import UnityEnvironment

								from mlagents_envs.exception import (

								    UnityCommunicationException,

								    UnityTimeOutException,

								    UnityEnvironmentException,

								    UnityCommunicatorStoppedException,

								)

								from multiprocessing import Process, Pipe, Queue

								from multiprocessing.connection import Connection

								from queue import Empty as EmptyQueueException

								from mlagents_envs.base_env import BaseEnv, BehaviorName, BehaviorSpec

								from mlagents_envs import logging_util

								from mlagents.trainers.env_manager import EnvManager, EnvironmentStep, AllStepResult

								from mlagents.trainers.settings import TrainerSettings

								from mlagents_envs.timers import (

								    TimerNode,

								    timed,

								    hierarchical_timer,

								    reset_timers,

								    get_timer_root,

								)

								from mlagents.trainers.settings import ParameterRandomizationSettings, RunOptions

								from mlagents.trainers.action_info import ActionInfo

								from mlagents_envs.side_channel.environment_parameters_channel import (

								    EnvironmentParametersChannel,

								)

								from mlagents_envs.side_channel.engine_configuration_channel import (

								    EngineConfigurationChannel,

								    EngineConfig,

								)

								from mlagents_envs.side_channel.stats_side_channel import (

								    EnvironmentStats,

								    StatsSideChannel,

								)

								from mlagents.training_analytics_side_channel import TrainingAnalyticsSideChannel

								from mlagents_envs.side_channel.side_channel import SideChannel


								logger = logging_util.get_logger(__name__)

								WORKER_SHUTDOWN_TIMEOUT_S = 10


								class EnvironmentCommand(enum.Enum):

								    STEP = 1

								    BEHAVIOR_SPECS = 2

								    ENVIRONMENT_PARAMETERS = 3

								    RESET = 4

								    CLOSE = 5

								    ENV_EXITED = 6

								    CLOSED = 7

								    TRAINING_STARTED = 8


								class EnvironmentRequest(NamedTuple):

								    cmd: EnvironmentCommand

								    payload: Any = None


								class EnvironmentResponse(NamedTuple):

								    cmd: EnvironmentCommand

								    worker_id: int

								    payload: Any


								class StepResponse(NamedTuple):

								    all_step_result: AllStepResult

								    timer_root: Optional[TimerNode]

								    environment_stats: EnvironmentStats


								class UnityEnvWorker:

								    def __init__(self, process: Process, worker_id: int, conn: Connection):

								        self.process = process

								        self.worker_id = worker_id

								        self.conn = conn

								        self.previous_step: EnvironmentStep = EnvironmentStep.empty(worker_id)

								        self.previous_all_action_info: Dict[str, ActionInfo] = {}

								        self.waiting = False

								        self.closed = False


								    def send(self, cmd: EnvironmentCommand, payload: Any = None) -> None:

								        try:

								            req = EnvironmentRequest(cmd, payload)

								            self.conn.send(req)

								        except (BrokenPipeError, EOFError):

								            raise UnityCommunicationException("UnityEnvironment worker: send failed.")


								    def recv(self) -> EnvironmentResponse:

								        try:

								            response: EnvironmentResponse = self.conn.recv()

								            if response.cmd == EnvironmentCommand.ENV_EXITED:

								                env_exception: Exception = response.payload

								                raise env_exception

								            return response

								        except (BrokenPipeError, EOFError):

								            raise UnityCommunicationException("UnityEnvironment worker: recv failed.")


								    def request_close(self):

								        try:

								            self.conn.send(EnvironmentRequest(EnvironmentCommand.CLOSE))

								        except (BrokenPipeError, EOFError):

								            logger.debug(

								                f"UnityEnvWorker {self.worker_id} got exception trying to close."

								            )

								            pass


								def worker(

								    parent_conn: Connection,

								    step_queue: Queue,

								    pickled_env_factory: str,

								    worker_id: int,

								    run_options: RunOptions,

								    log_level: int = logging_util.INFO,

								) -> None:

								    env_factory: Callable[

								        [int, List[SideChannel]], UnityEnvironment

								    ] = cloudpickle.loads(pickled_env_factory)

								    env_parameters = EnvironmentParametersChannel()


								    engine_config = EngineConfig(

								        width=run_options.engine_settings.width,

								        height=run_options.engine_settings.height,

								        quality_level=run_options.engine_settings.quality_level,

								        time_scale=run_options.engine_settings.time_scale,

								        target_frame_rate=run_options.engine_settings.target_frame_rate,

								        capture_frame_rate=run_options.engine_settings.capture_frame_rate,

								    )

								    engine_configuration_channel = EngineConfigurationChannel()

								    engine_configuration_channel.set_configuration(engine_config)


								    stats_channel = StatsSideChannel()

								    training_analytics_channel: Optional[TrainingAnalyticsSideChannel] = None

								    if worker_id == 0:

								        training_analytics_channel = TrainingAnalyticsSideChannel()

								    env: UnityEnvironment = None

								    # Set log level. On some platforms, the logger isn't common with the

								    # main process, so we need to set it again.

								    logging_util.set_log_level(log_level)


								    def _send_response(cmd_name: EnvironmentCommand, payload: Any) -> None:

								        parent_conn.send(EnvironmentResponse(cmd_name, worker_id, payload))


								    def _generate_all_results() -> AllStepResult:

								        all_step_result: AllStepResult = {}

								        for brain_name in env.behavior_specs:

								            all_step_result[brain_name] = env.get_steps(brain_name)

								        return all_step_result


								    try:

								        side_channels = [env_parameters, engine_configuration_channel, stats_channel]

								        if training_analytics_channel is not None:

								            side_channels.append(training_analytics_channel)


								        env = env_factory(worker_id, side_channels)

								        if (

								            not env.academy_capabilities

								            or not env.academy_capabilities.trainingAnalytics

								        ):

								            # Make sure we don't try to send training analytics if the environment doesn't know how to process

								            # them. This wouldn't be catastrophic, but would result in unknown SideChannel UUIDs being used.

								            training_analytics_channel = None

								        if training_analytics_channel:

								            training_analytics_channel.environment_initialized(run_options)


								        while True:

								            req: EnvironmentRequest = parent_conn.recv()

								            if req.cmd == EnvironmentCommand.STEP:

								                all_action_info = req.payload

								                for brain_name, action_info in all_action_info.items():

								                    if len(action_info.agent_ids) > 0:

								                        env.set_actions(brain_name, action_info.env_action)

								                env.step()

								                all_step_result = _generate_all_results()

								                # The timers in this process are independent from all the processes and the "main" process

								                # So after we send back the root timer, we can safely clear them.

								                # Note that we could randomly return timers a fraction of the time if we wanted to reduce

								                # the data transferred.

								                # TODO get gauges from the workers and merge them in the main process too.

								                env_stats = stats_channel.get_and_reset_stats()

								                step_response = StepResponse(

								                    all_step_result, get_timer_root(), env_stats

								                )

								                step_queue.put(

								                    EnvironmentResponse(

								                        EnvironmentCommand.STEP, worker_id, step_response

								                    )

								                )

								                reset_timers()

								            elif req.cmd == EnvironmentCommand.BEHAVIOR_SPECS:

								                _send_response(EnvironmentCommand.BEHAVIOR_SPECS, env.behavior_specs)

								            elif req.cmd == EnvironmentCommand.ENVIRONMENT_PARAMETERS:

								                for k, v in req.payload.items():

								                    if isinstance(v, ParameterRandomizationSettings):

								                        v.apply(k, env_parameters)

								            elif req.cmd == EnvironmentCommand.TRAINING_STARTED:

								                behavior_name, trainer_config = req.payload

								                if training_analytics_channel:

								                    training_analytics_channel.training_started(

								                        behavior_name, trainer_config

								                    )

								            elif req.cmd == EnvironmentCommand.RESET:

								                env.reset()

								                all_step_result = _generate_all_results()

								                _send_response(EnvironmentCommand.RESET, all_step_result)

								            elif req.cmd == EnvironmentCommand.CLOSE:

								                break

								    except (

								        KeyboardInterrupt,

								        UnityCommunicationException,

								        UnityTimeOutException,

								        UnityEnvironmentException,

								        UnityCommunicatorStoppedException,

								    ) as ex:

								        logger.debug(f"UnityEnvironment worker {worker_id}: environment stopping.")

								        step_queue.put(

								            EnvironmentResponse(EnvironmentCommand.ENV_EXITED, worker_id, ex)

								        )

								        _send_response(EnvironmentCommand.ENV_EXITED, ex)

								    except Exception as ex:

								        logger.exception(

								            f"UnityEnvironment worker {worker_id}: environment raised an unexpected exception."

								        )

								        step_queue.put(

								            EnvironmentResponse(EnvironmentCommand.ENV_EXITED, worker_id, ex)

								        )

								        _send_response(EnvironmentCommand.ENV_EXITED, ex)

								    finally:

								        logger.debug(f"UnityEnvironment worker {worker_id} closing.")

								        if env is not None:

								            env.close()

								        logger.debug(f"UnityEnvironment worker {worker_id} done.")

								        parent_conn.close()

								        step_queue.put(EnvironmentResponse(EnvironmentCommand.CLOSED, worker_id, None))

								        step_queue.close()


								class SubprocessEnvManager(EnvManager):

								    def __init__(

								        self,

								        env_factory: Callable[[int, List[SideChannel]], BaseEnv],

								        run_options: RunOptions,

								        n_env: int = 1,

								    ):

								        super().__init__()

								        self.env_workers: List[UnityEnvWorker] = []

								        self.step_queue: Queue = Queue()

								        self.workers_alive = 0

								        for worker_idx in range(n_env):

								            self.env_workers.append(

								                self.create_worker(

								                    worker_idx, self.step_queue, env_factory, run_options

								                )

								            )

								            self.workers_alive += 1


								    @staticmethod

								    def create_worker(

								        worker_id: int,

								        step_queue: Queue,

								        env_factory: Callable[[int, List[SideChannel]], BaseEnv],

								        run_options: RunOptions,

								    ) -> UnityEnvWorker:

								        parent_conn, child_conn = Pipe()


								        # Need to use cloudpickle for the env factory function since function objects aren't picklable

								        # on Windows as of Python 3.6.

								        pickled_env_factory = cloudpickle.dumps(env_factory)

								        child_process = Process(

								            target=worker,

								            args=(

								                child_conn,

								                step_queue,

								                pickled_env_factory,

								                worker_id,

								                run_options,

								                logger.level,

								            ),

								        )

								        child_process.start()

								        return UnityEnvWorker(child_process, worker_id, parent_conn)


								    def _queue_steps(self) -> None:

								        for env_worker in self.env_workers:

								            if not env_worker.waiting:

								                env_action_info = self._take_step(env_worker.previous_step)

								                env_worker.previous_all_action_info = env_action_info

								                env_worker.send(EnvironmentCommand.STEP, env_action_info)

								                env_worker.waiting = True


								    def _step(self) -> List[EnvironmentStep]:

								        # Queue steps for any workers which aren't in the "waiting" state.

								        self._queue_steps()


								        worker_steps: List[EnvironmentResponse] = []

								        step_workers: Set[int] = set()

								        # Poll the step queue for completed steps from environment workers until we retrieve

								        # 1 or more, which we will then return as StepInfos

								        while len(worker_steps) < 1:

								            try:

								                while True:

								                    step: EnvironmentResponse = self.step_queue.get_nowait()

								                    if step.cmd == EnvironmentCommand.ENV_EXITED:

								                        env_exception: Exception = step.payload

								                        raise env_exception

								                    self.env_workers[step.worker_id].waiting = False

								                    if step.worker_id not in step_workers:

								                        worker_steps.append(step)

								                        step_workers.add(step.worker_id)

								            except EmptyQueueException:

								                pass


								        step_infos = self._postprocess_steps(worker_steps)

								        return step_infos


								    def _reset_env(self, config: Optional[Dict] = None) -> List[EnvironmentStep]:

								        while any(ew.waiting for ew in self.env_workers):

								            if not self.step_queue.empty():

								                step = self.step_queue.get_nowait()

								                self.env_workers[step.worker_id].waiting = False

								        # Send config to environment

								        self.set_env_parameters(config)

								        # First enqueue reset commands for all workers so that they reset in parallel

								        for ew in self.env_workers:

								            ew.send(EnvironmentCommand.RESET, config)

								        # Next (synchronously) collect the reset observations from each worker in sequence

								        for ew in self.env_workers:

								            ew.previous_step = EnvironmentStep(ew.recv().payload, ew.worker_id, {}, {})

								        return list(map(lambda ew: ew.previous_step, self.env_workers))


								    def set_env_parameters(self, config: Dict = None) -> None:

								        """

								        Sends environment parameter settings to C# via the

								        EnvironmentParametersSidehannel for each worker.

								        :param config: Dict of environment parameter keys and values

								        """

								        for ew in self.env_workers:

								            ew.send(EnvironmentCommand.ENVIRONMENT_PARAMETERS, config)


								    def on_training_started(

								        self, behavior_name: str, trainer_settings: TrainerSettings

								    ) -> None:

								        """

								        Handle traing starting for a new behavior type. Generally nothing is necessary here.

								        :param behavior_name:

								        :param trainer_settings:

								        :return:

								        """

								        for ew in self.env_workers:

								            ew.send(

								                EnvironmentCommand.TRAINING_STARTED, (behavior_name, trainer_settings)

								            )


								    @property

								    def training_behaviors(self) -> Dict[BehaviorName, BehaviorSpec]:

								        result: Dict[BehaviorName, BehaviorSpec] = {}

								        for worker in self.env_workers:

								            worker.send(EnvironmentCommand.BEHAVIOR_SPECS)

								            result.update(worker.recv().payload)

								        return result


								    def close(self) -> None:

								        logger.debug("SubprocessEnvManager closing.")

								        for env_worker in self.env_workers:

								            env_worker.request_close()

								        # Pull messages out of the queue until every worker has CLOSED or we time out.

								        deadline = time.time() + WORKER_SHUTDOWN_TIMEOUT_S

								        while self.workers_alive > 0 and time.time() < deadline:

								            try:

								                step: EnvironmentResponse = self.step_queue.get_nowait()

								                env_worker = self.env_workers[step.worker_id]

								                if step.cmd == EnvironmentCommand.CLOSED and not env_worker.closed:

								                    env_worker.closed = True

								                    self.workers_alive -= 1

								                # Discard all other messages.

								            except EmptyQueueException:

								                pass

								        self.step_queue.close()

								        # Sanity check to kill zombie workers and report an issue if they occur.

								        if self.workers_alive > 0:

								            logger.error("SubprocessEnvManager had workers that didn't signal shutdown")

								            for env_worker in self.env_workers:

								                if not env_worker.closed and env_worker.process.is_alive():

								                    env_worker.process.terminate()

								                    logger.error(

								                        "A SubprocessEnvManager worker did not shut down correctly so it was forcefully terminated."

								                    )

								        self.step_queue.join_thread()


								    def _postprocess_steps(

								        self, env_steps: List[EnvironmentResponse]

								    ) -> List[EnvironmentStep]:

								        step_infos = []

								        timer_nodes = []

								        for step in env_steps:

								            payload: StepResponse = step.payload

								            env_worker = self.env_workers[step.worker_id]

								            new_step = EnvironmentStep(

								                payload.all_step_result,

								                step.worker_id,

								                env_worker.previous_all_action_info,

								                payload.environment_stats,

								            )

								            step_infos.append(new_step)

								            env_worker.previous_step = new_step


								            if payload.timer_root:

								                timer_nodes.append(payload.timer_root)


								        if timer_nodes:

								            with hierarchical_timer("workers") as main_timer_node:

								                for worker_timer_node in timer_nodes:

								                    main_timer_node.merge(

								                        worker_timer_node, root_name="worker_root", is_parallel=True

								                    )


								        return step_infos


								    @timed

								    def _take_step(self, last_step: EnvironmentStep) -> Dict[BehaviorName, ActionInfo]:

								        all_action_info: Dict[str, ActionInfo] = {}

								        for brain_name, step_tuple in last_step.current_all_step_result.items():

								            if brain_name in self.policies:

								                all_action_info[brain_name] = self.policies[brain_name].get_action(

								                    step_tuple[0], last_step.worker_id

								                )

								        return all_action_info