浏览代码

Added a random action creator on the BehaviorSpecs (#4030)

* Added a random action creator on the BehaviorSpecs

* Bumping numpy version

* Bumping numpy version

* Not using np.random.Generator as it seems to still be under developement
/MLA-1734-demo-provider
GitHub 5 年前
当前提交
0d75f3da
共有 2 个文件被更改,包括 66 次插入0 次删除
  1. 32
      ml-agents-envs/mlagents_envs/base_env.py
  2. 34
      ml-agents-envs/mlagents_envs/tests/test_steps.py

32
ml-agents-envs/mlagents_envs/base_env.py


return None
def create_empty_action(self, n_agents: int) -> np.ndarray:
"""
Generates a numpy array corresponding to an empty action (all zeros)
for a number of agents.
:param n_agents: The number of agents that will have actions generated
"""
def create_random_action(self, n_agents: int) -> np.ndarray:
"""
Generates a numpy array corresponding to a random action (either discrete
or continuous) for a number of agents.
:param n_agents: The number of agents that will have actions generated
:param generator: The random number generator used for creating random action
"""
if self.is_action_continuous():
action = np.random.uniform(
low=-1.0, high=1.0, size=(n_agents, self.action_size)
).astype(np.float32)
return action
elif self.is_action_discrete():
branch_size = self.discrete_action_branches
action = np.column_stack(
[
np.random.randint(
0,
branch_size[i], # type: ignore
size=(n_agents),
dtype=np.int32,
)
for i in range(self.action_size)
]
)
return action
class BehaviorMapping(Mapping):

34
ml-agents-envs/mlagents_envs/tests/test_steps.py


assert specs.action_size == 1
assert specs.create_empty_action(5).shape == (5, 1)
assert specs.create_empty_action(5).dtype == np.int32
def test_action_generator():
# Continuous
action_len = 30
specs = BehaviorSpec(
observation_shapes=[(5,)],
action_type=ActionType.CONTINUOUS,
action_shape=action_len,
)
zero_action = specs.create_empty_action(4)
assert np.array_equal(zero_action, np.zeros((4, action_len), dtype=np.float32))
random_action = specs.create_random_action(4)
assert random_action.dtype == np.float32
assert random_action.shape == (4, action_len)
assert np.min(random_action) >= -1
assert np.max(random_action) <= 1
# Discrete
action_shape = (10, 20, 30)
specs = BehaviorSpec(
observation_shapes=[(5,)],
action_type=ActionType.DISCRETE,
action_shape=action_shape,
)
zero_action = specs.create_empty_action(4)
assert np.array_equal(zero_action, np.zeros((4, len(action_shape)), dtype=np.int32))
random_action = specs.create_random_action(4)
assert random_action.dtype == np.int32
assert random_action.shape == (4, len(action_shape))
assert np.min(random_action) >= 0
for index, branch_size in enumerate(action_shape):
assert np.max(random_action[:, index]) < branch_size
正在加载...
取消
保存