浏览代码

Merge branch 'develop-action-spec' into develop-action-buffer

/develop/action-spec-gym
Andrew Cohen 4 年前
当前提交
f6355ba9
共有 6 个文件被更改,包括 35 次插入32 次删除
  1. 31
      ml-agents-envs/mlagents_envs/base_env.py
  2. 6
      ml-agents-envs/mlagents_envs/environment.py
  3. 6
      ml-agents-envs/mlagents_envs/tests/test_envs.py
  4. 16
      ml-agents-envs/mlagents_envs/tests/test_steps.py
  5. 6
      ml-agents/mlagents/trainers/tests/tensorflow/test_simple_rl.py
  6. 2
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py

31
ml-agents-envs/mlagents_envs/base_env.py


A NamedTuple whose fields correspond to actions of different types.
Continuous and discrete actions are numpy arrays.
"""
continuous: np.ndarray
discrete: np.ndarray

"""
return len(self.discrete_branches)
def create_empty(self, n_agents: int) -> ActionBuffers:
def empty_action(self, n_agents: int) -> ActionBuffers:
return ActionBuffers(np.zeros((n_agents, self.continuous_size), dtype=np.float32),
np.zeros((n_agents, self.discrete_size), dtype=np.int32))
return ActionBuffers(
np.zeros((n_agents, self.continuous_size), dtype=np.float32),
np.zeros((n_agents, self.discrete_size), dtype=np.int32),
)
def create_random(self, n_agents: int) -> ActionBuffers:
def random_action(self, n_agents: int) -> ActionBuffers:
"""
Generates ActionBuffers corresponding to a random action (either discrete
or continuous) for a number of agents.

).astype(np.float32)
discrete_action = np.column_stack(
[
np.random.randint(
0,
self.discrete_branches[i], # type: ignore
size=(n_agents),
dtype=np.int32,
)
for i in range(self.discrete_size)
]
)
[
np.random.randint(
0,
self.discrete_branches[i], # type: ignore
size=(n_agents),
dtype=np.int32,
)
for i in range(self.discrete_size)
]
)
return ActionBuffers(continuous_action, discrete_action)
def _validate_action(

6
ml-agents-envs/mlagents_envs/environment.py


n_agents = len(self._env_state[group_name][0])
self._env_actions[group_name] = self._env_specs[
group_name
].action_spec.create_empty(n_agents)
].action_spec.empty_action(n_agents)
step_input = self._generate_step_input(self._env_actions)
with hierarchical_timer("communicator.exchange"):
outputs = self._communicator.exchange(step_input)

num_agents = len(self._env_state[behavior_name][0])
action = action_spec._validate_action(action, num_agents, behavior_name)
if behavior_name not in self._env_actions:
self._env_actions[behavior_name] = action_spec.create_empty(num_agents)
self._env_actions[behavior_name] = action_spec.empty_action(num_agents)
try:
index = np.where(self._env_state[behavior_name][0].agent_id == agent_id)[0][
0

if n_agents == 0:
continue
for i in range(n_agents):
#TODO: extend to AgentBuffers
# TODO: extend to AgentBuffers
action = AgentActionProto(vector_actions=vector_action[b][i])
rl_in.agent_actions[b].value.extend([action])
rl_in.command = STEP

6
ml-agents-envs/mlagents_envs/tests/test_envs.py


env.step()
decision_steps, terminal_steps = env.get_steps("RealFakeBrain")
n_agents = len(decision_steps)
env.set_actions("RealFakeBrain", spec.action_spec.create_empty(n_agents))
env.set_actions("RealFakeBrain", spec.action_spec.empty_action(n_agents))
env.set_actions("RealFakeBrain", spec.action_spec.create_empty(n_agents - 1))
env.set_actions("RealFakeBrain", spec.action_spec.empty_action(n_agents - 1))
env.set_actions("RealFakeBrain", spec.action_spec.create_empty(n_agents) - 1)
env.set_actions("RealFakeBrain", spec.action_spec.empty_action(n_agents) - 1)
env.step()
env.close()

16
ml-agents-envs/mlagents_envs/tests/test_steps.py


assert specs.discrete_branches == ()
assert specs.discrete_size == 0
assert specs.continuous_size == 3
assert specs.create_empty(5).shape == (5, 3)
assert specs.create_empty(5).dtype == np.float32
assert specs.empty_action(5).shape == (5, 3)
assert specs.empty_action(5).dtype == np.float32
assert specs.create_empty(5).shape == (5, 1)
assert specs.create_empty(5).dtype == np.int32
assert specs.empty_action(5).shape == (5, 1)
assert specs.empty_action(5).dtype == np.int32
def test_action_generator():

zero_action = specs.create_empty(4)
zero_action = specs.empty_action(4)
random_action = specs.create_random(4)
random_action = specs.random_action(4)
assert random_action.dtype == np.float32
assert random_action.shape == (4, action_len)
assert np.min(random_action) >= -1

action_shape = (10, 20, 30)
specs = ActionSpec.create_discrete(action_shape)
zero_action = specs.create_empty(4)
zero_action = specs.empty_action(4)
random_action = specs.create_random(4)
random_action = specs.random_action(4)
assert random_action.dtype == np.int32
assert random_action.shape == (4, len(action_shape))
assert np.min(random_action) >= 0

6
ml-agents/mlagents/trainers/tests/tensorflow/test_simple_rl.py


)
new_networksettings = attr.evolve(
SAC_TF_CONFIG.network_settings,
memory=NetworkSettings.MemorySettings(memory_size=16, sequence_length=16),
memory=NetworkSettings.MemorySettings(memory_size=16),
batch_size=256,
batch_size=128,
learning_rate=1e-3,
buffer_init_steps=1000,
steps_per_update=2,

hyperparameters=new_hyperparams,
network_settings=new_networksettings,
max_steps=2000,
max_steps=4000,
framework=FrameworkType.TENSORFLOW,
)
_check_environment_trains(env, {BRAIN_NAME: config})

2
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py


next_observations = [
np.random.normal(size=shape) for shape in behavior_spec.observation_shapes
]
action = behavior_spec.action_spec.create_random(1)[0, :]
action = behavior_spec.action_spec.random_action(1)[0, :]
for _ in range(number):
curr_split_obs = SplitObservations.from_observations(curr_observations)
next_split_obs = SplitObservations.from_observations(next_observations)

正在加载...
取消
保存