|
|
|
|
|
|
import atexit |
|
|
|
import glob |
|
|
|
import io |
|
|
|
from .brain import BrainInfo, BrainParameters, AllBrainInfo |
|
|
|
from .utilities import process_pixels |
|
|
|
from .brain import AllBrainInfo, BrainInfo, BrainParameters |
|
|
|
from .communicator_objects import UnityRLInput, UnityRLOutput, AgentActionProto,\ |
|
|
|
EnvironmentParametersProto, UnityRLInitializationInput, UnityRLInitializationOutput,\ |
|
|
|
from .communicator_objects import UnityRLInput, UnityRLOutput, AgentActionProto, \ |
|
|
|
EnvironmentParametersProto, UnityRLInitializationInput, UnityRLInitializationOutput, \ |
|
|
|
from .socket_communicator import SocketCommunicator |
|
|
|
|
|
|
|
|
|
|
|
from PIL import Image |
|
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
|
|
logger = logging.getLogger("mlagents.envs") |
|
|
|
|
|
|
self.port = base_port + worker_id |
|
|
|
self._buffer_size = 12000 |
|
|
|
self._version_ = "API-5" |
|
|
|
self._loaded = False # If true, this means the environment was successfully loaded |
|
|
|
self.proc1 = None # The process that is started. If None, no process was started |
|
|
|
self._loaded = False # If true, this means the environment was successfully loaded |
|
|
|
self.proc1 = None # The process that is started. If None, no process was started |
|
|
|
if file_name is None and worker_id!=0: |
|
|
|
if file_name is None and worker_id != 0: |
|
|
|
"If the environment name is None, the worker-id must be 0 in order to connect with the Editor.") |
|
|
|
"If the environment name is None, " |
|
|
|
"the worker-id must be 0 in order to connect with the Editor.") |
|
|
|
if file_name is not None: |
|
|
|
self.executable_launcher(file_name, docker_training, no_graphics) |
|
|
|
else: |
|
|
|
|
|
|
self._external_brain_names = [] |
|
|
|
for brain_param in aca_params.brain_parameters: |
|
|
|
self._brain_names += [brain_param.brain_name] |
|
|
|
resolution = [{ |
|
|
|
"height": x.height, |
|
|
|
"width": x.width, |
|
|
|
"blackAndWhite": x.gray_scale |
|
|
|
} for x in brain_param.camera_resolutions] |
|
|
|
self._brains[brain_param.brain_name] = \ |
|
|
|
BrainParameters(brain_param.brain_name, { |
|
|
|
"vectorObservationSize": brain_param.vector_observation_size, |
|
|
|
"numStackedVectorObservations": brain_param.num_stacked_vector_observations, |
|
|
|
"cameraResolutions": resolution, |
|
|
|
"vectorActionSize": brain_param.vector_action_size, |
|
|
|
"vectorActionDescriptions": brain_param.vector_action_descriptions, |
|
|
|
"vectorActionSpaceType": brain_param.vector_action_space_type |
|
|
|
}) |
|
|
|
self._brains[brain_param.brain_name] = BrainParameters.from_proto(brain_param) |
|
|
|
self._resetParameters = dict(aca_params.environment_parameters.float_parameters) # TODO |
|
|
|
self._resetParameters = dict(aca_params.environment_parameters.float_parameters) |
|
|
|
logger.info("\n'{0}' started successfully!\n{1}".format(self._academy_name, str(self))) |
|
|
|
if self._num_external_brains == 0: |
|
|
|
logger.warning(" No Learning Brains set to train found in the Unity Environment. " |
|
|
|
|
|
|
def executable_launcher(self, file_name, docker_training, no_graphics): |
|
|
|
cwd = os.getcwd() |
|
|
|
file_name = (file_name.strip() |
|
|
|
.replace('.app', '').replace('.exe', '').replace('.x86_64', '').replace('.x86', '')) |
|
|
|
.replace('.app', '').replace('.exe', '').replace('.x86_64', '').replace('.x86', |
|
|
|
'')) |
|
|
|
true_filename = os.path.basename(os.path.normpath(file_name)) |
|
|
|
logger.debug('The true file name is {}'.format(true_filename)) |
|
|
|
launch_string = None |
|
|
|
|
|
|
launch_string = candidates[0] |
|
|
|
|
|
|
|
elif platform == 'darwin': |
|
|
|
candidates = glob.glob(os.path.join(cwd, file_name + '.app', 'Contents', 'MacOS', true_filename)) |
|
|
|
candidates = glob.glob( |
|
|
|
os.path.join(cwd, file_name + '.app', 'Contents', 'MacOS', true_filename)) |
|
|
|
candidates = glob.glob(os.path.join(file_name + '.app', 'Contents', 'MacOS', true_filename)) |
|
|
|
candidates = glob.glob( |
|
|
|
os.path.join(file_name + '.app', 'Contents', 'MacOS', true_filename)) |
|
|
|
candidates = glob.glob(os.path.join(cwd, file_name + '.app', 'Contents', 'MacOS', '*')) |
|
|
|
candidates = glob.glob( |
|
|
|
os.path.join(cwd, file_name + '.app', 'Contents', 'MacOS', '*')) |
|
|
|
if len(candidates) == 0: |
|
|
|
candidates = glob.glob(os.path.join(file_name + '.app', 'Contents', 'MacOS', '*')) |
|
|
|
if len(candidates) > 0: |
|
|
|
|
|
|
if not docker_training: |
|
|
|
if no_graphics: |
|
|
|
self.proc1 = subprocess.Popen( |
|
|
|
[launch_string,'-nographics', '-batchmode', |
|
|
|
[launch_string, '-nographics', '-batchmode', |
|
|
|
'--port', str(self.port)]) |
|
|
|
else: |
|
|
|
self.proc1 = subprocess.Popen( |
|
|
|
|
|
|
Number of Brains: {1} |
|
|
|
Number of Training Brains : {2} |
|
|
|
Reset Parameters :\n\t\t{3}'''.format(self._academy_name, str(self._num_brains), |
|
|
|
str(self._num_external_brains), |
|
|
|
"\n\t\t".join([str(k) + " -> " + str(self._resetParameters[k]) |
|
|
|
for k in self._resetParameters])) + '\n' + \ |
|
|
|
str(self._num_external_brains), |
|
|
|
"\n\t\t".join( |
|
|
|
[str(k) + " -> " + str(self._resetParameters[k]) |
|
|
|
for k in self._resetParameters])) + '\n' + \ |
|
|
|
:return: AllBrainInfo : A Data structure corresponding to the initial reset state of the environment. |
|
|
|
:return: AllBrainInfo : A data structure corresponding to the initial reset state of the environment. |
|
|
|
""" |
|
|
|
if config is None: |
|
|
|
config = self._resetParameters |
|
|
|
|
|
|
raise UnityEnvironmentException( |
|
|
|
"The value for parameter '{0}'' must be an Integer or a Float.".format(k)) |
|
|
|
else: |
|
|
|
raise UnityEnvironmentException("The parameter '{0}' is not a valid parameter.".format(k)) |
|
|
|
raise UnityEnvironmentException( |
|
|
|
"The parameter '{0}' is not a valid parameter.".format(k)) |
|
|
|
|
|
|
|
if self._loaded: |
|
|
|
outputs = self.communicator.exchange( |
|
|
|
|
|
|
else: |
|
|
|
raise UnityEnvironmentException("No Unity environment is loaded.") |
|
|
|
|
|
|
|
def step(self, vector_action=None, memory=None, text_action=None, value=None) -> AllBrainInfo: |
|
|
|
def step(self, vector_action=None, memory=None, text_action=None, value=None) -> AllBrainInfo: |
|
|
|
Provides the environment with an action, moves the environment dynamics forward accordingly, and returns |
|
|
|
observation, state, and reward information to the agent. |
|
|
|
:param vector_action: Agent's vector action to send to environment. Can be a scalar or vector of int/floats. |
|
|
|
:param memory: Vector corresponding to memory used for RNNs, frame-stacking, or other auto-regressive process. |
|
|
|
Provides the environment with an action, moves the environment dynamics forward accordingly, |
|
|
|
and returns observation, state, and reward information to the agent. |
|
|
|
:param value: Value estimates provided by agents. |
|
|
|
:param vector_action: Agent's vector action. Can be a scalar or vector of int/floats. |
|
|
|
:param memory: Vector corresponding to memory used for recurrent policies. |
|
|
|
:param text_action: Text action to send to environment for. |
|
|
|
:return: AllBrainInfo : A Data structure corresponding to the new state of the environment. |
|
|
|
""" |
|
|
|
|
|
|
value = {} if value is None else value |
|
|
|
|
|
|
|
# Check that environment is loaded, and episode is currently running. |
|
|
|
if self._loaded and not self._global_done and self._global_done is not None: |
|
|
|
if isinstance(vector_action, (int, np.int_, float, np.float_, list, np.ndarray)): |
|
|
|
if self._num_external_brains == 1: |
|
|
|
|
|
|
raise UnityActionException( |
|
|
|
"There are no external brains in the environment, " |
|
|
|
"step cannot take a memory input") |
|
|
|
|
|
|
|
if isinstance(text_action, (str, list, np.ndarray)): |
|
|
|
if self._num_external_brains == 1: |
|
|
|
text_action = {self._external_brain_names[0]: text_action} |
|
|
|
|
|
|
raise UnityActionException( |
|
|
|
"There are no external brains in the environment, " |
|
|
|
"step cannot take a value input") |
|
|
|
|
|
|
|
if isinstance(value, (int, np.int_, float, np.float_, list, np.ndarray)): |
|
|
|
if self._num_external_brains == 1: |
|
|
|
value = {self._external_brain_names[0]: value} |
|
|
|
|
|
|
"There are no external brains in the environment, " |
|
|
|
"step cannot take a value input") |
|
|
|
|
|
|
|
for brain_name in list(vector_action.keys()) + list(memory.keys()) + list(text_action.keys()): |
|
|
|
for brain_name in list(vector_action.keys()) + list(memory.keys()) + list( |
|
|
|
text_action.keys()): |
|
|
|
for b in self._external_brain_names: |
|
|
|
n_agent = self._n_agents[b] |
|
|
|
if b not in vector_action: |
|
|
|
# raise UnityActionException("You need to input an action for the brain {0}".format(b)) |
|
|
|
if self._brains[b].vector_action_space_type == "discrete": |
|
|
|
vector_action[b] = [0.0] * n_agent * len(self._brains[b].vector_action_space_size) |
|
|
|
for brain_name in self._external_brain_names: |
|
|
|
n_agent = self._n_agents[brain_name] |
|
|
|
if brain_name not in vector_action: |
|
|
|
if self._brains[brain_name].vector_action_space_type == "discrete": |
|
|
|
vector_action[brain_name] = [0.0] * n_agent * len( |
|
|
|
self._brains[brain_name].vector_action_space_size) |
|
|
|
vector_action[b] = [0.0] * n_agent * self._brains[b].vector_action_space_size[0] |
|
|
|
vector_action[brain_name] = [0.0] * n_agent * \ |
|
|
|
self._brains[ |
|
|
|
brain_name].vector_action_space_size[0] |
|
|
|
vector_action[b] = self._flatten(vector_action[b]) |
|
|
|
if b not in memory: |
|
|
|
memory[b] = [] |
|
|
|
vector_action[brain_name] = self._flatten(vector_action[brain_name]) |
|
|
|
if brain_name not in memory: |
|
|
|
memory[brain_name] = [] |
|
|
|
if memory[b] is None: |
|
|
|
memory[b] = [] |
|
|
|
if memory[brain_name] is None: |
|
|
|
memory[brain_name] = [] |
|
|
|
memory[b] = self._flatten(memory[b]) |
|
|
|
if b not in text_action: |
|
|
|
text_action[b] = [""] * n_agent |
|
|
|
memory[brain_name] = self._flatten(memory[brain_name]) |
|
|
|
if brain_name not in text_action: |
|
|
|
text_action[brain_name] = [""] * n_agent |
|
|
|
if text_action[b] is None: |
|
|
|
text_action[b] = [""] * n_agent |
|
|
|
if isinstance(text_action[b], str): |
|
|
|
text_action[b] = [text_action[b]] * n_agent |
|
|
|
if not ((len(text_action[b]) == n_agent) or len(text_action[b]) == 0): |
|
|
|
if text_action[brain_name] is None: |
|
|
|
text_action[brain_name] = [""] * n_agent |
|
|
|
if isinstance(text_action[brain_name], str): |
|
|
|
text_action[brain_name] = [text_action[brain_name]] * n_agent |
|
|
|
|
|
|
|
number_text_actions = len(text_action[brain_name]) |
|
|
|
if not ((number_text_actions == n_agent) or number_text_actions == 0): |
|
|
|
"There was a mismatch between the provided text_action and environment's expectation: " |
|
|
|
"There was a mismatch between the provided text_action and " |
|
|
|
"the environment's expectation: " |
|
|
|
b, n_agent, len(text_action[b]))) |
|
|
|
if not ((self._brains[b].vector_action_space_type == "discrete" and len( |
|
|
|
vector_action[b]) == n_agent * len(self._brains[b].vector_action_space_size)) or |
|
|
|
(self._brains[b].vector_action_space_type == "continuous" and len( |
|
|
|
vector_action[b]) == self._brains[b].vector_action_space_size[0] * n_agent)): |
|
|
|
brain_name, n_agent, number_text_actions)) |
|
|
|
|
|
|
|
discrete_check = self._brains[brain_name].vector_action_space_type == "discrete" |
|
|
|
|
|
|
|
expected_discrete_size = n_agent * len( |
|
|
|
self._brains[brain_name].vector_action_space_size) |
|
|
|
|
|
|
|
continuous_check = self._brains[brain_name].vector_action_space_type == "continuous" |
|
|
|
|
|
|
|
expected_continuous_size = self._brains[brain_name].vector_action_space_size[ |
|
|
|
0] * n_agent |
|
|
|
|
|
|
|
if not ((discrete_check and len( |
|
|
|
vector_action[brain_name]) == expected_discrete_size) or |
|
|
|
(continuous_check and len( |
|
|
|
vector_action[brain_name]) == expected_continuous_size)): |
|
|
|
"There was a mismatch between the provided action and environment's expectation: " |
|
|
|
"There was a mismatch between the provided action and " |
|
|
|
"the environment's expectation: " |
|
|
|
.format(b, str(len(self._brains[b].vector_action_space_size) * n_agent) |
|
|
|
if self._brains[b].vector_action_space_type == "discrete" |
|
|
|
else str(self._brains[b].vector_action_space_size[0] * n_agent), |
|
|
|
self._brains[b].vector_action_space_type, |
|
|
|
str(vector_action[b]))) |
|
|
|
.format(brain_name, str(expected_discrete_size) |
|
|
|
if discrete_check |
|
|
|
else str(expected_continuous_size), |
|
|
|
self._brains[brain_name].vector_action_space_type, |
|
|
|
str(vector_action[brain_name]))) |
|
|
|
self._generate_step_input(vector_action, memory, text_action, value) |
|
|
|
) |
|
|
|
self._generate_step_input(vector_action, memory, text_action, value)) |
|
|
|
s = self._get_state(rl_output) |
|
|
|
self._global_done = s[1] |
|
|
|
state = self._get_state(rl_output) |
|
|
|
self._global_done = state[1] |
|
|
|
self._n_agents[_b] = len(s[0][_b].agents) |
|
|
|
return s[0] |
|
|
|
self._n_agents[_b] = len(state[0][_b].agents) |
|
|
|
return state[0] |
|
|
|
raise UnityActionException("The episode is completed. Reset the environment with 'reset()'") |
|
|
|
raise UnityActionException( |
|
|
|
"The episode is completed. Reset the environment with 'reset()'") |
|
|
|
"You cannot conduct step without first calling reset. Reset the environment with 'reset()'") |
|
|
|
"You cannot conduct step without first calling reset. " |
|
|
|
"Reset the environment with 'reset()'") |
|
|
|
|
|
|
|
def close(self): |
|
|
|
""" |
|
|
|
|
|
|
""" |
|
|
|
_data = {} |
|
|
|
global_done = output.global_done |
|
|
|
for b in output.agentInfos: |
|
|
|
agent_info_list = output.agentInfos[b].value |
|
|
|
vis_obs = [] |
|
|
|
for i in range(self.brains[b].number_visual_observations): |
|
|
|
obs = [process_pixels(x.visual_observations[i], |
|
|
|
self.brains[b].camera_resolutions[i]['blackAndWhite']) |
|
|
|
for x in agent_info_list] |
|
|
|
vis_obs += [np.array(obs)] |
|
|
|
if len(agent_info_list) == 0: |
|
|
|
memory_size = 0 |
|
|
|
else: |
|
|
|
memory_size = max([len(x.memories) for x in agent_info_list]) |
|
|
|
if memory_size == 0: |
|
|
|
memory = np.zeros((0, 0)) |
|
|
|
else: |
|
|
|
[x.memories.extend([0] * (memory_size - len(x.memories))) for x in agent_info_list] |
|
|
|
memory = np.array([x.memories for x in agent_info_list]) |
|
|
|
total_num_actions = sum(self.brains[b].vector_action_space_size) |
|
|
|
mask_actions = np.ones((len(agent_info_list), total_num_actions)) |
|
|
|
for agent_index, agent_info in enumerate(agent_info_list): |
|
|
|
if agent_info.action_mask is not None: |
|
|
|
if len(agent_info.action_mask) == total_num_actions: |
|
|
|
mask_actions[agent_index, :] = [ |
|
|
|
0 if agent_info.action_mask[k] else 1 for k in range(total_num_actions)] |
|
|
|
if any([np.isnan(x.reward) for x in agent_info_list]): |
|
|
|
logger.warning("An agent had a NaN reward for brain "+b) |
|
|
|
if any([np.isnan(x.stacked_vector_observation).any() for x in agent_info_list]): |
|
|
|
logger.warning("An agent had a NaN observation for brain " + b) |
|
|
|
_data[b] = BrainInfo( |
|
|
|
visual_observation=vis_obs, |
|
|
|
vector_observation=np.nan_to_num(np.array([x.stacked_vector_observation for x in agent_info_list])), |
|
|
|
text_observations=[x.text_observation for x in agent_info_list], |
|
|
|
memory=memory, |
|
|
|
reward=[x.reward if not np.isnan(x.reward) else 0 for x in agent_info_list], |
|
|
|
agents=[x.id for x in agent_info_list], |
|
|
|
local_done=[x.done for x in agent_info_list], |
|
|
|
vector_action=np.array([x.stored_vector_actions for x in agent_info_list]), |
|
|
|
text_action=[x.stored_text_actions for x in agent_info_list], |
|
|
|
max_reached=[x.max_step_reached for x in agent_info_list], |
|
|
|
action_mask=mask_actions |
|
|
|
) |
|
|
|
for brain_name in output.agentInfos: |
|
|
|
agent_info_list = output.agentInfos[brain_name].value |
|
|
|
_data[brain_name] = BrainInfo.from_agent_proto(agent_info_list, |
|
|
|
self.brains[brain_name]) |
|
|
|
return _data, global_done |
|
|
|
|
|
|
|
def _generate_step_input(self, vector_action, memory, text_action, value) -> UnityRLInput: |
|
|
|
|
|
|
_m_s = len(memory[b]) // n_agents |
|
|
|
for i in range(n_agents): |
|
|
|
action = AgentActionProto( |
|
|
|
vector_actions=vector_action[b][i*_a_s: (i+1)*_a_s], |
|
|
|
memories=memory[b][i*_m_s: (i+1)*_m_s], |
|
|
|
vector_actions=vector_action[b][i * _a_s: (i + 1) * _a_s], |
|
|
|
memories=memory[b][i * _m_s: (i + 1) * _m_s], |
|
|
|
text_actions=text_action[b][i], |
|
|
|
) |
|
|
|
if b in value: |
|
|
|
|
|
|
rl_in.command = 1 |
|
|
|
return self.wrap_unity_input(rl_in) |
|
|
|
|
|
|
|
def send_academy_parameters(self, init_parameters: UnityRLInitializationInput) -> UnityRLInitializationOutput: |
|
|
|
def send_academy_parameters(self, |
|
|
|
init_parameters: UnityRLInitializationInput) -> UnityRLInitializationOutput: |
|
|
|
inputs = UnityInput() |
|
|
|
inputs.rl_initialization_input.CopyFrom(init_parameters) |
|
|
|
return self.communicator.initialize(inputs).rl_initialization_output |
|
|
|