|
|
|
|
|
|
import subprocess |
|
|
|
import struct |
|
|
|
|
|
|
|
from .brain import BrainInfo, BrainParameters |
|
|
|
from .brain import BrainInfo, BrainParameters, AllBrainInfo |
|
|
|
from .exception import UnityEnvironmentException, UnityActionException, UnityTimeOutException |
|
|
|
from .curriculum import Curriculum |
|
|
|
|
|
|
|
|
|
|
self._global_done = None |
|
|
|
self._academy_name = p["AcademyName"] |
|
|
|
self._log_path = p["logPath"] |
|
|
|
self._brains = {} |
|
|
|
self._brains = AllBrainInfo() |
|
|
|
self._brain_names = p["brainNames"] |
|
|
|
self._external_brain_names = p["externalBrainNames"] |
|
|
|
self._external_brain_names = [] if self._external_brain_names is None else self._external_brain_names |
|
|
|
|
|
|
self._brains[self._brain_names[i]] = BrainParameters(self._brain_names[i], p["brainParameters"][i]) |
|
|
|
self._loaded = True |
|
|
|
logger.info("\n'{}' started successfully!".format(self._academy_name)) |
|
|
|
if (self._num_external_brains == 0): |
|
|
|
if self._num_external_brains == 0: |
|
|
|
logger.warning(" No External Brains found in the Unity Environment. " |
|
|
|
"You will not be able to pass actions to your agent(s).") |
|
|
|
except UnityEnvironmentException: |
|
|
|
|
|
|
|
|
|
|
@property |
|
|
|
def curriculum(self): |
|
|
|
return self._curriculum |
|
|
|
|
|
|
@staticmethod |
|
|
|
def _process_pixels(image_bytes=None, bw=False): |
|
|
|
""" |
|
|
|
Converts bytearray observation image into numpy array, resizes it, and optionally converts it to greyscale |
|
|
|
:param image_bytes: input bytearray corresponding to image |
|
|
|
Converts byte array observation image into numpy array, re-sizes it, and optionally converts it to grey scale |
|
|
|
:param image_bytes: input byte array corresponding to image |
|
|
|
:return: processed numpy array of observation from environment |
|
|
|
""" |
|
|
|
s = bytearray(image_bytes) |
|
|
|
|
|
|
state_dict = json.loads(state) |
|
|
|
return state_dict |
|
|
|
|
|
|
|
def reset(self, train_mode=True, config=None, lesson=None): |
|
|
|
def reset(self, train_mode=True, config=None, lesson=None) -> AllBrainInfo: |
|
|
|
:return: A Data structure corresponding to the initial reset state of the environment. |
|
|
|
:return: AllBrainInfo : A Data structure corresponding to the initial reset state of the environment. |
|
|
|
|
|
|
|
|
|
|
|
elif config != {}: |
|
|
|
logger.info("\nAcademy Reset with parameters : \t{0}" |
|
|
|
.format(', '.join([str(x) + ' -> ' + str(config[x]) for x in config]))) |
|
|
|
|
|
|
else: |
|
|
|
raise UnityEnvironmentException("No Unity environment is loaded.") |
|
|
|
|
|
|
|
def _get_state(self): |
|
|
|
def _get_state(self) -> AllBrainInfo: |
|
|
|
:return: a dictionary BrainInfo objects. |
|
|
|
:return: a dictionary of BrainInfo objects. |
|
|
|
self._data = {} |
|
|
|
self._data = AllBrainInfo() |
|
|
|
for index in range(self._num_brains): |
|
|
|
state_dict = self._get_state_dict() |
|
|
|
b = state_dict["brain_name"] |
|
|
|
|
|
|
|
|
|
|
observations.append(np.array(obs_n)) |
|
|
|
|
|
|
|
self._data[b] = BrainInfo(observations, states, memories, rewards, agents, dones, actions, max_reached=maxes) |
|
|
|
self._data[b] = BrainInfo(observations, states, memories, rewards, agents, |
|
|
|
dones, actions, max_reached=maxes) |
|
|
|
|
|
|
|
try: |
|
|
|
self._global_done = self._conn.recv(self._buffer_size).decode('utf-8') == 'True' |
|
|
|
|
|
|
arr = [float(x) for x in arr] |
|
|
|
return arr |
|
|
|
|
|
|
|
def step(self, action=None, memory=None, value=None): |
|
|
|
def step(self, action=None, memory=None, value=None) -> AllBrainInfo: |
|
|
|
""" |
|
|
|
Provides the environment with an action, moves the environment dynamics forward accordingly, and returns |
|
|
|
observation, state, and reward information to the agent. |
|
|
|
|
|
|
:return: A Data structure corresponding to the new state of the environment. |
|
|
|
:return: AllBrainInfo : A Data structure corresponding to the new state of the environment. |
|
|
|
""" |
|
|
|
action = {} if action is None else action |
|
|
|
memory = {} if memory is None else memory |
|
|
|
|
|
|
raise UnityActionException( |
|
|
|
"There was a mismatch between the provided memory and environment's expectation: " |
|
|
|
"The brain {0} expected {1} memories but was given {2}" |
|
|
|
.format(b, self._brains[b].memory_space_size * n_agent, len(memory[b]))) |
|
|
|
.format(b, self._brains[b].memory_space_size * n_agent, len(memory[b]))) |
|
|
|
(self._brains[b].action_space_type == "continuous" and len( |
|
|
|
(self._brains[b].action_space_type == "continuous" and len( |
|
|
|
.format(b, n_agent if self._brains[b].action_space_type == "discrete" else |
|
|
|
str(self._brains[b].action_space_size * n_agent), self._brains[b].action_space_type, |
|
|
|
str(action[b]))) |
|
|
|
.format(b, n_agent if self._brains[b].action_space_type == "discrete" else |
|
|
|
str(self._brains[b].action_space_size * n_agent), self._brains[b].action_space_type, |
|
|
|
str(action[b]))) |
|
|
|
self._conn.send(b"STEP") |
|
|
|
self._send_action(action, memory, value) |
|
|
|
return self._get_state() |
|
|
|