|
|
|
|
|
|
continuous: np.ndarray # dims (n_agents, cont_size) |
|
|
|
discrete: np.ndarray # dims (n_agents, disc_size) |
|
|
|
|
|
|
|
@staticmethod |
|
|
|
def from_numpy_dict(action_dict: Dict[str, np.ndarray]) -> "ActionBuffers": |
|
|
|
continuous: List[np.ndarray] = [[]] |
|
|
|
discrete: List[np.ndarray] = [[]] |
|
|
|
if "continuous_action" in action_dict: |
|
|
|
continuous = action_dict["continuous_action"] |
|
|
|
if "discrete_action" in action_dict: |
|
|
|
discrete = action_dict["discrete_action"] |
|
|
|
return ActionBuffers(continuous, discrete) |
|
|
|
|
|
|
|
|
|
|
|
class ActionSpec(NamedTuple): |
|
|
|
""" |
|
|
|
|
|
|
""" |
|
|
|
return len(self.discrete_branches) |
|
|
|
|
|
|
|
def empty_action(self, n_agents: int) -> Dict[str, np.ndarray]: |
|
|
|
def empty_action(self, n_agents: int) -> ActionBuffers: |
|
|
|
action_dict: Dict[str, np.ndarray] = {} |
|
|
|
continuous: np.ndarray = None |
|
|
|
discrete: np.ndarray = None |
|
|
|
action_dict["continuous_action"] = np.zeros( |
|
|
|
(n_agents, self.continuous_size), dtype=np.float32 |
|
|
|
) |
|
|
|
continuous = np.zeros((n_agents, self.continuous_size), dtype=np.float32) |
|
|
|
action_dict["discrete_action"] = np.zeros( |
|
|
|
(n_agents, self.discrete_size), dtype=np.int32 |
|
|
|
) |
|
|
|
return action_dict |
|
|
|
discrete = np.zeros((n_agents, self.discrete_size), dtype=np.int32) |
|
|
|
return ActionBuffers(continuous, discrete) |
|
|
|
# return ActionBuffers( |
|
|
|
# np.zeros((n_agents, self.continuous_size), dtype=np.float32), |
|
|
|
# np.zeros((n_agents, self.discrete_size), dtype=np.int32), |
|
|
|
# ) |
|
|
|
|
|
|
|
def random_action(self, n_agents: int) -> Dict[str, np.ndarray]: |
|
|
|
def random_action(self, n_agents: int) -> ActionBuffers: |
|
|
|
action_dict: Dict[str, np.ndarray] = {} |
|
|
|
continuous: np.ndarray = None |
|
|
|
discrete: np.ndarray = None |
|
|
|
continuous_action = np.random.uniform( |
|
|
|
continuous = np.random.uniform( |
|
|
|
action_dict["continuous_action"] = continuous_action |
|
|
|
discrete_action = np.column_stack( |
|
|
|
discrete = np.column_stack( |
|
|
|
[ |
|
|
|
np.random.randint( |
|
|
|
0, |
|
|
|
|
|
|
for i in range(self.discrete_size) |
|
|
|
] |
|
|
|
) |
|
|
|
action_dict["discrete_action"] = discrete_action |
|
|
|
return action_dict |
|
|
|
# return ActionBuffers(continuous_action, discrete_action) |
|
|
|
return ActionBuffers(continuous, discrete) |
|
|
|
|
|
|
|
def _validate_action( |
|
|
|
self, actions: ActionBuffers, n_agents: int, name: str |
|
|
|
|
|
|
for the correct number of agents and ensures the type. |
|
|
|
""" |
|
|
|
_expected_shape = (n_agents, self.continuous_size) |
|
|
|
if actions.continuous.shape != _expected_shape: |
|
|
|
if self.continuous_size > 0 and actions.continuous.shape != _expected_shape: |
|
|
|
f"received input of dimension {actions.shape}" |
|
|
|
f"received input of dimension {actions.continuous.shape}" |
|
|
|
if actions.continuous.dtype != np.float32: |
|
|
|
actions.continuous = actions.continuous.astype(np.float32) |
|
|
|
|
|
|
|
if actions.discrete.shape != _expected_shape: |
|
|
|
if self.discrete_size > 0 and actions.discrete.shape != _expected_shape: |
|
|
|
f"received input of dimension {actions.shape}" |
|
|
|
f"received input of dimension {actions.discrete.shape}" |
|
|
|
if actions.continuous.dtype != np.float32: |
|
|
|
actions.continuous = actions.continuous.astype(np.float32) |
|
|
|
if actions.discrete.dtype != np.int32: |
|
|
|
actions.discrete = actions.discrete.astype(np.int32) |
|
|
|
|
|
|
|
if actions.discrete.dtype != np.int32: |
|
|
|
actions.discrete = actions.discrete.astype(np.int32) |
|
|
|
return actions |
|
|
|
|
|
|
|
@staticmethod |
|
|
|