import atexit import glob import io import logging import numpy as np import os import subprocess from .brain import BrainInfo, BrainParameters, AllBrainInfo from .exception import UnityEnvironmentException, UnityActionException, UnityTimeOutException from .communicator_objects import UnityRLInput, UnityRLOutput, AgentActionProto,\ EnvironmentParametersProto, UnityRLInitializationInput, UnityRLInitializationOutput,\ UnityInput, UnityOutput from .rpc_communicator import RpcCommunicator from .socket_communicator import SocketCommunicator from sys import platform from PIL import Image logging.basicConfig(level=logging.INFO) logger = logging.getLogger("mlagents.envs") class UnityEnvironment(object): def __init__(self, file_name=None, worker_id=0, base_port=5005, seed=0, docker_training=False, no_graphics=False): """ Starts a new unity environment and establishes a connection with the environment. Notice: Currently communication between Unity and Python takes place over an open socket without authentication. Ensure that the network where training takes place is secure. :string file_name: Name of Unity environment binary. :int base_port: Baseline port number to connect to Unity environment over. worker_id increments over this. :int worker_id: Number to add to communication port (5005) [0]. Used for asynchronous agent scenarios. :param docker_training: Informs this class whether the process is being run within a container. :param no_graphics: Whether to run the Unity simulator in no-graphics mode """ atexit.register(self._close) self.port = base_port + worker_id self._buffer_size = 12000 self._version_ = "API-4" self._loaded = False # If true, this means the environment was successfully loaded self.proc1 = None # The process that is started. If None, no process was started self.communicator = self.get_communicator(worker_id, base_port) # If the environment name is None, a new environment will not be launched # and the communicator will directly try to connect to an existing unity environment. # If the worker-id is not 0 and the environment name is None, an error is thrown if file_name is None and worker_id!=0: raise UnityEnvironmentException( "If the environment name is None, the worker-id must be 0 in order to connect with the Editor.") if file_name is not None: self.executable_launcher(file_name, docker_training, no_graphics) else: logger.info("Start training by pressing the Play button in the Unity Editor.") self._loaded = True rl_init_parameters_in = UnityRLInitializationInput( seed=seed ) try: aca_params = self.send_academy_parameters(rl_init_parameters_in) except UnityTimeOutException: self._close() raise # TODO : think of a better way to expose the academyParameters self._unity_version = aca_params.version if self._unity_version != self._version_: raise UnityEnvironmentException( "The API number is not compatible between Unity and python. Python API : {0}, Unity API : " "{1}.\nPlease go to https://github.com/Unity-Technologies/ml-agents to download the latest version " "of ML-Agents.".format(self._version_, self._unity_version)) self._n_agents = {} self._global_done = None self._academy_name = aca_params.name self._log_path = aca_params.log_path self._brains = {} self._brain_names = [] self._external_brain_names = [] for brain_param in aca_params.brain_parameters: self._brain_names += [brain_param.brain_name] resolution = [{ "height": x.height, "width": x.width, "blackAndWhite": x.gray_scale } for x in brain_param.camera_resolutions] self._brains[brain_param.brain_name] = \ BrainParameters(brain_param.brain_name, { "vectorObservationSize": brain_param.vector_observation_size, "numStackedVectorObservations": brain_param.num_stacked_vector_observations, "cameraResolutions": resolution, "vectorActionSize": brain_param.vector_action_size, "vectorActionDescriptions": brain_param.vector_action_descriptions, "vectorActionSpaceType": brain_param.vector_action_space_type }) if brain_param.brain_type == 2: self._external_brain_names += [brain_param.brain_name] self._num_brains = len(self._brain_names) self._num_external_brains = len(self._external_brain_names) self._resetParameters = dict(aca_params.environment_parameters.float_parameters) # TODO logger.info("\n'{0}' started successfully!\n{1}".format(self._academy_name, str(self))) if self._num_external_brains == 0: logger.warning(" No External Brains found in the Unity Environment. " "You will not be able to pass actions to your agent(s).") @property def logfile_path(self): return self._log_path @property def brains(self): return self._brains @property def global_done(self): return self._global_done @property def academy_name(self): return self._academy_name @property def number_brains(self): return self._num_brains @property def number_external_brains(self): return self._num_external_brains @property def brain_names(self): return self._brain_names @property def external_brain_names(self): return self._external_brain_names def executable_launcher(self, file_name, docker_training, no_graphics): cwd = os.getcwd() file_name = (file_name.strip() .replace('.app', '').replace('.exe', '').replace('.x86_64', '').replace('.x86', '')) true_filename = os.path.basename(os.path.normpath(file_name)) logger.debug('The true file name is {}'.format(true_filename)) launch_string = None if platform == "linux" or platform == "linux2": candidates = glob.glob(os.path.join(cwd, file_name) + '.x86_64') if len(candidates) == 0: candidates = glob.glob(os.path.join(cwd, file_name) + '.x86') if len(candidates) == 0: candidates = glob.glob(file_name + '.x86_64') if len(candidates) == 0: candidates = glob.glob(file_name + '.x86') if len(candidates) > 0: launch_string = candidates[0] elif platform == 'darwin': candidates = glob.glob(os.path.join(cwd, file_name + '.app', 'Contents', 'MacOS', true_filename)) if len(candidates) == 0: candidates = glob.glob(os.path.join(file_name + '.app', 'Contents', 'MacOS', true_filename)) if len(candidates) == 0: candidates = glob.glob(os.path.join(cwd, file_name + '.app', 'Contents', 'MacOS', '*')) if len(candidates) == 0: candidates = glob.glob(os.path.join(file_name + '.app', 'Contents', 'MacOS', '*')) if len(candidates) > 0: launch_string = candidates[0] elif platform == 'win32': candidates = glob.glob(os.path.join(cwd, file_name + '.exe')) if len(candidates) == 0: candidates = glob.glob(file_name + '.exe') if len(candidates) > 0: launch_string = candidates[0] if launch_string is None: self._close() raise UnityEnvironmentException("Couldn't launch the {0} environment. " "Provided filename does not match any environments." .format(true_filename)) else: logger.debug("This is the launch string {}".format(launch_string)) # Launch Unity environment if not docker_training: if no_graphics: self.proc1 = subprocess.Popen( [launch_string,'-nographics', '-batchmode', '--port', str(self.port)]) else: self.proc1 = subprocess.Popen( [launch_string, '--port', str(self.port)]) else: """ Comments for future maintenance: xvfb-run is a wrapper around Xvfb, a virtual xserver where all rendering is done to virtual memory. It automatically creates a new virtual server automatically picking a server number `auto-servernum`. The server is passed the arguments using `server-args`, we are telling Xvfb to create Screen number 0 with width 640, height 480 and depth 24 bits. Note that 640 X 480 are the default width and height. The main reason for us to add this is because we'd like to change the depth from the default of 8 bits to 24. Unfortunately, this means that we will need to pass the arguments through a shell which is why we set `shell=True`. Now, this adds its own complications. E.g SIGINT can bounce off the shell and not get propagated to the child processes. This is why we add `exec`, so that the shell gets launched, the arguments are passed to `xvfb-run`. `exec` replaces the shell we created with `xvfb`. """ docker_ls = ("exec xvfb-run --auto-servernum" " --server-args='-screen 0 640x480x24'" " {0} --port {1}").format(launch_string, str(self.port)) self.proc1 = subprocess.Popen(docker_ls, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) def get_communicator(self, worker_id, base_port): return RpcCommunicator(worker_id, base_port) # return SocketCommunicator(worker_id, base_port) def __str__(self): return '''Unity Academy name: {0} Number of Brains: {1} Number of External Brains : {2} Reset Parameters :\n\t\t{3}'''.format(self._academy_name, str(self._num_brains), str(self._num_external_brains), "\n\t\t".join([str(k) + " -> " + str(self._resetParameters[k]) for k in self._resetParameters])) + '\n' + \ '\n'.join([str(self._brains[b]) for b in self._brains]) def reset(self, config=None, train_mode=True) -> AllBrainInfo: """ Sends a signal to reset the unity environment. :return: AllBrainInfo : A Data structure corresponding to the initial reset state of the environment. """ if config is None: config = self._resetParameters elif config: logger.info("Academy reset with parameters: {0}" .format(', '.join([str(x) + ' -> ' + str(config[x]) for x in config]))) for k in config: if (k in self._resetParameters) and (isinstance(config[k], (int, float))): self._resetParameters[k] = config[k] elif not isinstance(config[k], (int, float)): raise UnityEnvironmentException( "The value for parameter '{0}'' must be an Integer or a Float.".format(k)) else: raise UnityEnvironmentException("The parameter '{0}' is not a valid parameter.".format(k)) if self._loaded: outputs = self.communicator.exchange( self._generate_reset_input(train_mode, config) ) if outputs is None: raise KeyboardInterrupt rl_output = outputs.rl_output s = self._get_state(rl_output) self._global_done = s[1] for _b in self._external_brain_names: self._n_agents[_b] = len(s[0][_b].agents) return s[0] else: raise UnityEnvironmentException("No Unity environment is loaded.") def step(self, vector_action=None, memory=None, text_action=None, value=None) -> AllBrainInfo: """ Provides the environment with an action, moves the environment dynamics forward accordingly, and returns observation, state, and reward information to the agent. :param vector_action: Agent's vector action to send to environment. Can be a scalar or vector of int/floats. :param memory: Vector corresponding to memory used for RNNs, frame-stacking, or other auto-regressive process. :param text_action: Text action to send to environment for. :return: AllBrainInfo : A Data structure corresponding to the new state of the environment. """ vector_action = {} if vector_action is None else vector_action memory = {} if memory is None else memory text_action = {} if text_action is None else text_action value = {} if value is None else value if self._loaded and not self._global_done and self._global_done is not None: if isinstance(vector_action, (int, np.int_, float, np.float_, list, np.ndarray)): if self._num_external_brains == 1: vector_action = {self._external_brain_names[0]: vector_action} elif self._num_external_brains > 1: raise UnityActionException( "You have {0} brains, you need to feed a dictionary of brain names a keys, " "and vector_actions as values".format(self._num_brains)) else: raise UnityActionException( "There are no external brains in the environment, " "step cannot take a vector_action input") if isinstance(memory, (int, np.int_, float, np.float_, list, np.ndarray)): if self._num_external_brains == 1: memory = {self._external_brain_names[0]: memory} elif self._num_external_brains > 1: raise UnityActionException( "You have {0} brains, you need to feed a dictionary of brain names as keys " "and memories as values".format(self._num_brains)) else: raise UnityActionException( "There are no external brains in the environment, " "step cannot take a memory input") if isinstance(text_action, (str, list, np.ndarray)): if self._num_external_brains == 1: text_action = {self._external_brain_names[0]: text_action} elif self._num_external_brains > 1: raise UnityActionException( "You have {0} brains, you need to feed a dictionary of brain names as keys " "and text_actions as values".format(self._num_brains)) else: raise UnityActionException( "There are no external brains in the environment, " "step cannot take a value input") if isinstance(value, (int, np.int_, float, np.float_, list, np.ndarray)): if self._num_external_brains == 1: value = {self._external_brain_names[0]: value} elif self._num_external_brains > 1: raise UnityActionException( "You have {0} brains, you need to feed a dictionary of brain names as keys " "and state/action value estimates as values".format(self._num_brains)) else: raise UnityActionException( "There are no external brains in the environment, " "step cannot take a value input") for brain_name in list(vector_action.keys()) + list(memory.keys()) + list(text_action.keys()): if brain_name not in self._external_brain_names: raise UnityActionException( "The name {0} does not correspond to an external brain " "in the environment".format(brain_name)) for b in self._external_brain_names: n_agent = self._n_agents[b] if b not in vector_action: # raise UnityActionException("You need to input an action for the brain {0}".format(b)) if self._brains[b].vector_action_space_type == "discrete": vector_action[b] = [0.0] * n_agent * len(self._brains[b].vector_action_space_size) else: vector_action[b] = [0.0] * n_agent * self._brains[b].vector_action_space_size[0] else: vector_action[b] = self._flatten(vector_action[b]) if b not in memory: memory[b] = [] else: if memory[b] is None: memory[b] = [] else: memory[b] = self._flatten(memory[b]) if b not in text_action: text_action[b] = [""] * n_agent else: if text_action[b] is None: text_action[b] = [""] * n_agent if isinstance(text_action[b], str): text_action[b] = [text_action[b]] * n_agent if not ((len(text_action[b]) == n_agent) or len(text_action[b]) == 0): raise UnityActionException( "There was a mismatch between the provided text_action and environment's expectation: " "The brain {0} expected {1} text_action but was given {2}".format( b, n_agent, len(text_action[b]))) if not ((self._brains[b].vector_action_space_type == "discrete" and len( vector_action[b]) == n_agent * len(self._brains[b].vector_action_space_size)) or (self._brains[b].vector_action_space_type == "continuous" and len( vector_action[b]) == self._brains[b].vector_action_space_size[0] * n_agent)): raise UnityActionException( "There was a mismatch between the provided action and environment's expectation: " "The brain {0} expected {1} {2} action(s), but was provided: {3}" .format(b, str(len(self._brains[b].vector_action_space_size) * n_agent) if self._brains[b].vector_action_space_type == "discrete" else str(self._brains[b].vector_action_space_size[0] * n_agent), self._brains[b].vector_action_space_type, str(vector_action[b]))) outputs = self.communicator.exchange( self._generate_step_input(vector_action, memory, text_action, value) ) if outputs is None: raise KeyboardInterrupt rl_output = outputs.rl_output s = self._get_state(rl_output) self._global_done = s[1] for _b in self._external_brain_names: self._n_agents[_b] = len(s[0][_b].agents) return s[0] elif not self._loaded: raise UnityEnvironmentException("No Unity environment is loaded.") elif self._global_done: raise UnityActionException("The episode is completed. Reset the environment with 'reset()'") elif self.global_done is None: raise UnityActionException( "You cannot conduct step without first calling reset. Reset the environment with 'reset()'") def close(self): """ Sends a shutdown signal to the unity environment, and closes the socket connection. """ if self._loaded: self._close() else: raise UnityEnvironmentException("No Unity environment is loaded.") def _close(self): self._loaded = False self.communicator.close() if self.proc1 is not None: self.proc1.kill() @staticmethod def _flatten(arr): """ Converts arrays to list. :param arr: numpy vector. :return: flattened list. """ if isinstance(arr, (int, np.int_, float, np.float_)): arr = [float(arr)] if isinstance(arr, np.ndarray): arr = arr.tolist() if len(arr) == 0: return arr if isinstance(arr[0], np.ndarray): arr = [item for sublist in arr for item in sublist.tolist()] if isinstance(arr[0], list): arr = [item for sublist in arr for item in sublist] arr = [float(x) for x in arr] return arr @staticmethod def _process_pixels(image_bytes, gray_scale): """ Converts byte array observation image into numpy array, re-sizes it, and optionally converts it to grey scale :param image_bytes: input byte array corresponding to image :return: processed numpy array of observation from environment """ s = bytearray(image_bytes) image = Image.open(io.BytesIO(s)) s = np.array(image) / 255.0 if gray_scale: s = np.mean(s, axis=2) s = np.reshape(s, [s.shape[0], s.shape[1], 1]) return s def _get_state(self, output: UnityRLOutput) -> (AllBrainInfo, bool): """ Collects experience information from all external brains in environment at current step. :return: a dictionary of BrainInfo objects. """ _data = {} global_done = output.global_done for b in output.agentInfos: agent_info_list = output.agentInfos[b].value vis_obs = [] for i in range(self.brains[b].number_visual_observations): obs = [self._process_pixels(x.visual_observations[i], self.brains[b].camera_resolutions[i]['blackAndWhite']) for x in agent_info_list] vis_obs += [np.array(obs)] if len(agent_info_list) == 0: memory_size = 0 else: memory_size = max([len(x.memories) for x in agent_info_list]) if memory_size == 0: memory = np.zeros((0, 0)) else: [x.memories.extend([0] * (memory_size - len(x.memories))) for x in agent_info_list] memory = np.array([x.memories for x in agent_info_list]) total_num_actions = sum(self.brains[b].vector_action_space_size) mask_actions = np.ones((len(agent_info_list), total_num_actions)) for agent_index, agent_info in enumerate(agent_info_list): if agent_info.action_mask is not None: if len(agent_info.action_mask) == total_num_actions: mask_actions[agent_index, :] = [ 0 if agent_info.action_mask[k] else 1 for k in range(total_num_actions)] if any([np.isnan(x.reward) for x in agent_info_list]): logger.warning("An agent had a NaN reward for brain "+b) if any([np.isnan(x.stacked_vector_observation).any() for x in agent_info_list]): logger.warning("An agent had a NaN observation for brain " + b) _data[b] = BrainInfo( visual_observation=vis_obs, vector_observation=np.nan_to_num(np.array([x.stacked_vector_observation for x in agent_info_list])), text_observations=[x.text_observation for x in agent_info_list], memory=memory, reward=[x.reward if not np.isnan(x.reward) else 0 for x in agent_info_list], agents=[x.id for x in agent_info_list], local_done=[x.done for x in agent_info_list], vector_action=np.array([x.stored_vector_actions for x in agent_info_list]), text_action=[x.stored_text_actions for x in agent_info_list], max_reached=[x.max_step_reached for x in agent_info_list], action_mask=mask_actions ) return _data, global_done def _generate_step_input(self, vector_action, memory, text_action, value) -> UnityRLInput: rl_in = UnityRLInput() for b in vector_action: n_agents = self._n_agents[b] if n_agents == 0: continue _a_s = len(vector_action[b]) // n_agents _m_s = len(memory[b]) // n_agents for i in range(n_agents): action = AgentActionProto( vector_actions=vector_action[b][i*_a_s: (i+1)*_a_s], memories=memory[b][i*_m_s: (i+1)*_m_s], text_actions=text_action[b][i], ) if b in value: if value[b] is not None: action.value = float(value[b][i]) rl_in.agent_actions[b].value.extend([action]) rl_in.command = 0 return self.wrap_unity_input(rl_in) def _generate_reset_input(self, training, config) -> UnityRLInput: rl_in = UnityRLInput() rl_in.is_training = training rl_in.environment_parameters.CopyFrom(EnvironmentParametersProto()) for key in config: rl_in.environment_parameters.float_parameters[key] = config[key] rl_in.command = 1 return self.wrap_unity_input(rl_in) def send_academy_parameters(self, init_parameters: UnityRLInitializationInput) -> UnityRLInitializationOutput: inputs = UnityInput() inputs.rl_initialization_input.CopyFrom(init_parameters) return self.communicator.initialize(inputs).rl_initialization_output def wrap_unity_input(self, rl_input: UnityRLInput) -> UnityOutput: result = UnityInput() result.rl_input.CopyFrom(rl_input) return result