|
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
class ActionTuple: |
|
|
|
class _ActionTupleBase(ABC): |
|
|
|
An object whose fields correspond to actions of different types. |
|
|
|
Continuous and discrete actions are numpy arrays of type float32 and |
|
|
|
int32, respectively and are type checked on construction. |
|
|
|
Dimensions are of (n_agents, continuous_size) and (n_agents, discrete_size), |
|
|
|
respectively. |
|
|
|
An object whose fields correspond to action data of continuous and discrete |
|
|
|
spaces. Dimensions are of (n_agents, continuous_size) and (n_agents, discrete_size), |
|
|
|
respectively. Note, this also holds when continuous or discrete size is |
|
|
|
zero. |
|
|
|
""" |
|
|
|
|
|
|
|
def __init__( |
|
|
|
|
|
|
): |
|
|
|
if continuous is not None and continuous.dtype != np.float32: |
|
|
|
continuous = continuous.astype(np.float32, copy=False) |
|
|
|
self._continuous = continuous |
|
|
|
if discrete is not None and discrete.dtype != np.int32: |
|
|
|
discrete = discrete.astype(np.int32, copy=False) |
|
|
|
self._discrete = discrete |
|
|
|
self._continuous: Optional[np.ndarray] = None |
|
|
|
self._discrete: Optional[np.ndarray] = None |
|
|
|
if continuous is not None: |
|
|
|
self.add_continuous(continuous) |
|
|
|
if discrete is not None: |
|
|
|
self.add_discrete(discrete) |
|
|
|
|
|
|
|
@property |
|
|
|
def continuous(self) -> np.ndarray: |
|
|
|
|
|
|
def discrete(self) -> np.ndarray: |
|
|
|
return self._discrete |
|
|
|
|
|
|
|
def add_continuous(self, continuous: np.ndarray) -> None: |
|
|
|
if continuous.dtype != np.float32: |
|
|
|
continuous = continuous.astype(np.float32, copy=False) |
|
|
|
if self._discrete is None: |
|
|
|
_discrete_dtype = self.get_discrete_dtype() |
|
|
|
self._discrete = np.zeros((continuous.shape[0], 0), dtype=_discrete_dtype) |
|
|
|
self._continuous = continuous |
|
|
|
|
|
|
|
def add_discrete(self, discrete: np.ndarray) -> None: |
|
|
|
_discrete_dtype = self.get_discrete_dtype() |
|
|
|
if discrete.dtype != _discrete_dtype: |
|
|
|
discrete = discrete.astype(np.int32, copy=False) |
|
|
|
if self._continuous is None: |
|
|
|
self._continuous = np.zeros((discrete.shape[0], 0), dtype=np.float32) |
|
|
|
self._discrete = discrete |
|
|
|
|
|
|
|
@abstractmethod |
|
|
|
def get_discrete_dtype(self) -> np.dtype: |
|
|
|
pass |
|
|
|
|
|
|
|
|
|
|
|
class ActionTuple(_ActionTupleBase): |
|
|
|
""" |
|
|
|
An object whose fields correspond to actions of different types. |
|
|
|
Continuous and discrete actions are numpy arrays of type float32 and |
|
|
|
int32, respectively and are type checked on construction. |
|
|
|
Dimensions are of (n_agents, continuous_size) and (n_agents, discrete_size), |
|
|
|
respectively. Note, this also holds when continuous or discrete size is |
|
|
|
zero. |
|
|
|
""" |
|
|
|
|
|
|
|
def get_discrete_dtype(self) -> np.dtype: |
|
|
|
""" |
|
|
|
The dtype of a discrete action. |
|
|
|
""" |
|
|
|
return np.int32 |
|
|
|
|
|
|
|
|
|
|
|
class ActionSpec(NamedTuple): |
|
|
|
""" |
|
|
|
|
|
|
for a number of agents. |
|
|
|
:param n_agents: The number of agents that will have actions generated |
|
|
|
""" |
|
|
|
continuous = np.zeros((n_agents, self.continuous_size), dtype=np.float32) |
|
|
|
discrete = np.zeros((n_agents, self.discrete_size), dtype=np.int32) |
|
|
|
return ActionTuple(continuous, discrete) |
|
|
|
_continuous = np.zeros((n_agents, self.continuous_size), dtype=np.float32) |
|
|
|
_discrete = np.zeros((n_agents, self.discrete_size), dtype=np.int32) |
|
|
|
return ActionTuple(continuous=_continuous, discrete=_discrete) |
|
|
|
|
|
|
|
def random_action(self, n_agents: int) -> ActionTuple: |
|
|
|
""" |
|
|
|
|
|
|
""" |
|
|
|
continuous = np.random.uniform( |
|
|
|
_continuous = np.random.uniform( |
|
|
|
discrete = np.zeros((n_agents, self.discrete_size), dtype=np.int32) |
|
|
|
_discrete = np.zeros((n_agents, self.discrete_size), dtype=np.int32) |
|
|
|
discrete = np.column_stack( |
|
|
|
_discrete = np.column_stack( |
|
|
|
[ |
|
|
|
np.random.randint( |
|
|
|
0, |
|
|
|
|
|
|
for i in range(self.discrete_size) |
|
|
|
] |
|
|
|
) |
|
|
|
return ActionTuple(continuous, discrete) |
|
|
|
return ActionTuple(continuous=_continuous, discrete=_discrete) |
|
|
|
|
|
|
|
def _validate_action( |
|
|
|
self, actions: ActionTuple, n_agents: int, name: str |
|
|
|