浏览代码

ml-agents-envs pass

/develop/action-spec-gym
Andrew Cohen 4 年前
当前提交
4ebc6c44
共有 13 个文件被更改,包括 96 次插入72 次删除
  1. 2
      Project/Assets/ML-Agents/Examples/PushBlock/Demos/ExpertPush.demo.meta
  2. 62
      ml-agents-envs/mlagents_envs/base_env.py
  3. 6
      ml-agents-envs/mlagents_envs/environment.py
  4. 5
      ml-agents-envs/mlagents_envs/tests/test_envs.py
  5. 25
      ml-agents-envs/mlagents_envs/tests/test_steps.py
  6. 15
      ml-agents/mlagents/trainers/env_manager.py
  7. 4
      ml-agents/mlagents/trainers/optimizer/tf_optimizer.py
  8. 27
      ml-agents/mlagents/trainers/policy/policy.py
  9. 4
      ml-agents/mlagents/trainers/simple_env_manager.py
  10. 6
      ml-agents/mlagents/trainers/subprocess_env_manager.py
  11. 8
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py
  12. 2
      ml-agents/mlagents/trainers/tf/components/reward_signals/curiosity/signal.py
  13. 2
      ml-agents/mlagents/trainers/tf/components/reward_signals/gail/signal.py

2
Project/Assets/ML-Agents/Examples/PushBlock/Demos/ExpertPush.demo.meta


guid: 7f11f35191533404c9957443a681aaee
ScriptedImporter:
fileIDToRecycleName:
11400000: Assets/ML-Agents/Examples/PushBlock/Demos/ExpertPush.demo
11400002: Assets/ML-Agents/Examples/Pushblock/Demos/ExpertPush.demo
externalObjects: {}
userData: ' (Unity.MLAgents.Demonstrations.DemonstrationSummary)'
assetBundleName:

62
ml-agents-envs/mlagents_envs/base_env.py


continuous: np.ndarray # dims (n_agents, cont_size)
discrete: np.ndarray # dims (n_agents, disc_size)
@staticmethod
def from_numpy_dict(action_dict: Dict[str, np.ndarray]) -> "ActionBuffers":
continuous: List[np.ndarray] = [[]]
discrete: List[np.ndarray] = [[]]
if "continuous_action" in action_dict:
continuous = action_dict["continuous_action"]
if "discrete_action" in action_dict:
discrete = action_dict["discrete_action"]
return ActionBuffers(continuous, discrete)
class ActionSpec(NamedTuple):
"""

"""
return len(self.discrete_branches)
def empty_action(self, n_agents: int) -> Dict[str, np.ndarray]:
def empty_action(self, n_agents: int) -> ActionBuffers:
action_dict: Dict[str, np.ndarray] = {}
continuous: np.ndarray = None
discrete: np.ndarray = None
action_dict["continuous_action"] = np.zeros(
(n_agents, self.continuous_size), dtype=np.float32
)
continuous = np.zeros((n_agents, self.continuous_size), dtype=np.float32)
action_dict["discrete_action"] = np.zeros(
(n_agents, self.discrete_size), dtype=np.int32
)
return action_dict
discrete = np.zeros((n_agents, self.discrete_size), dtype=np.int32)
return ActionBuffers(continuous, discrete)
# return ActionBuffers(
# np.zeros((n_agents, self.continuous_size), dtype=np.float32),
# np.zeros((n_agents, self.discrete_size), dtype=np.int32),
# )
def random_action(self, n_agents: int) -> Dict[str, np.ndarray]:
def random_action(self, n_agents: int) -> ActionBuffers:
action_dict: Dict[str, np.ndarray] = {}
continuous: np.ndarray = None
discrete: np.ndarray = None
continuous_action = np.random.uniform(
continuous = np.random.uniform(
action_dict["continuous_action"] = continuous_action
discrete_action = np.column_stack(
discrete = np.column_stack(
[
np.random.randint(
0,

for i in range(self.discrete_size)
]
)
action_dict["discrete_action"] = discrete_action
return action_dict
# return ActionBuffers(continuous_action, discrete_action)
return ActionBuffers(continuous, discrete)
def _validate_action(
self, actions: ActionBuffers, n_agents: int, name: str

for the correct number of agents and ensures the type.
"""
_expected_shape = (n_agents, self.continuous_size)
if actions.continuous.shape != _expected_shape:
if self.continuous_size > 0 and actions.continuous.shape != _expected_shape:
f"received input of dimension {actions.shape}"
f"received input of dimension {actions.continuous.shape}"
if actions.continuous.dtype != np.float32:
actions.continuous = actions.continuous.astype(np.float32)
if actions.discrete.shape != _expected_shape:
if self.discrete_size > 0 and actions.discrete.shape != _expected_shape:
f"received input of dimension {actions.shape}"
f"received input of dimension {actions.discrete.shape}"
if actions.continuous.dtype != np.float32:
actions.continuous = actions.continuous.astype(np.float32)
if actions.discrete.dtype != np.int32:
actions.discrete = actions.discrete.astype(np.int32)
if actions.discrete.dtype != np.int32:
actions.discrete = actions.discrete.astype(np.int32)
return actions
@staticmethod

6
ml-agents-envs/mlagents_envs/environment.py


continue
for i in range(n_agents):
# TODO: extend to AgentBuffers
action = AgentActionProto(vector_actions=vector_action[b][i])
if vector_action[b].continuous is not None:
_act = vector_action[b].continuous[i]
else:
_act = vector_action[b].discrete[i]
action = AgentActionProto(vector_actions=_act)
rl_in.agent_actions[b].value.extend([action])
rl_in.command = STEP
rl_in.side_channel = bytes(

5
ml-agents-envs/mlagents_envs/tests/test_envs.py


env.step()
with pytest.raises(UnityActionException):
env.set_actions("RealFakeBrain", spec.action_spec.empty_action(n_agents - 1))
decision_steps, terminal_steps = env.get_steps("RealFakeBrain")
n_agents = len(decision_steps)
env.set_actions("RealFakeBrain", spec.action_spec.empty_action(n_agents) - 1)
env.step()
env.close()
assert isinstance(decision_steps, DecisionSteps)
assert isinstance(terminal_steps, TerminalSteps)

25
ml-agents-envs/mlagents_envs/tests/test_steps.py


assert specs.discrete_branches == ()
assert specs.discrete_size == 0
assert specs.continuous_size == 3
assert specs.empty_action(5).shape == (5, 3)
assert specs.empty_action(5).dtype == np.float32
assert specs.empty_action(5).continuous.shape == (5, 3)
assert specs.empty_action(5).continuous.dtype == np.float32
assert specs.empty_action(5).shape == (5, 1)
assert specs.empty_action(5).dtype == np.int32
assert specs.empty_action(5).discrete.shape == (5, 1)
assert specs.empty_action(5).discrete.dtype == np.int32
specs = ActionSpec(3, (3,))
assert specs.continuous_size == 3
assert specs.discrete_branches == (3,)
assert specs.discrete_size == 1
assert specs.empty_action(5).continuous.shape == (5, 3)
assert specs.empty_action(5).continuous.dtype == np.float32
assert specs.empty_action(5).discrete.shape == (5, 1)
assert specs.empty_action(5).discrete.dtype == np.int32
def test_action_generator():

zero_action = specs.empty_action(4)
zero_action = specs.empty_action(4).continuous
random_action = specs.random_action(4)
random_action = specs.random_action(4).continuous
assert random_action.dtype == np.float32
assert random_action.shape == (4, action_len)
assert np.min(random_action) >= -1

action_shape = (10, 20, 30)
specs = ActionSpec.create_discrete(action_shape)
zero_action = specs.empty_action(4)
zero_action = specs.empty_action(4).discrete
random_action = specs.random_action(4)
random_action = specs.random_action(4).discrete
assert random_action.dtype == np.int32
assert random_action.shape == (4, len(action_shape))
assert np.min(random_action) >= 0

15
ml-agents/mlagents/trainers/env_manager.py


from abc import ABC, abstractmethod
import numpy as np
from typing import List, Dict, NamedTuple, Iterable, Tuple
from mlagents_envs.base_env import (
DecisionSteps,

ActionBuffers,
)
from mlagents_envs.side_channel.stats_side_channel import EnvironmentStats

step_info.environment_stats, step_info.worker_id
)
return len(step_infos)
@staticmethod
def action_buffers_from_numpy_dict(
action_dict: Dict[str, np.ndarray]
) -> ActionBuffers:
continuous: np.ndarray = [np.array([])]
discrete: np.ndarray = [np.array([])]
if "continuous_action" in action_dict:
continuous = action_dict["continuous_action"]
if "discrete_action" in action_dict:
discrete = action_dict["discrete_action"]
return ActionBuffers(continuous, discrete)

4
ml-agents/mlagents/trainers/optimizer/tf_optimizer.py


[self.value_heads, self.policy.memory_out, self.memory_out], feed_dict
)
prev_action = (
batch["actions"][-1] if not self.policy.use_continuous_act else None
batch["discrete_action"][-1]
if not self.policy.use_continuous_act
else None
)
else:
value_estimates = self.sess.run(self.value_heads, feed_dict)

27
ml-agents/mlagents/trainers/policy/policy.py


from mlagents_envs.exception import UnityException
from mlagents.trainers.action_info import ActionInfo
from mlagents_envs.base_env import BehaviorSpec, ActionBuffers
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.settings import TrainerSettings, NetworkSettings

1 for shape in behavior_spec.observation_shapes if len(shape) == 3
)
self.use_continuous_act = self.behavior_spec.action_spec.is_continuous()
self.previous_action_dict: Dict[str, ActionBuffers] = {}
self.previous_action_dict: Dict[str, Dict[str, np.ndarray]] = {}
self.memory_dict: Dict[str, np.ndarray] = {}
self.normalize = trainer_settings.network_settings.normalize
self.use_recurrent = self.network_settings.memory is not None

if agent_id in self.memory_dict:
self.memory_dict.pop(agent_id)
def make_empty_previous_action(self, num_agents) -> ActionBuffers:
def make_empty_previous_action(self, num_agents: int) -> Dict[str, np.ndarray]:
:return: ActionBuffers .
:return: Dict of action type to np.ndarray
return self.behavior_spec.action_spec.empty_action(num_agents)
act_dict: Dict[str, np.ndarray] = {}
action_buffer = self.behavior_spec.action_spec.empty_action(num_agents)
if action_buffer.continuous is not None:
act_dict["continuous_action"] = action_buffer.continuous
if action_buffer.discrete is not None:
act_dict["discrete_action"] = action_buffer.discrete
return act_dict
def save_previous_action(
self, agent_ids: List[str], action_dict: Dict[str, np.ndarray]

for index, agent_id in enumerate(agent_ids):
self.previous_action_dict[agent_id] = action_dict
agent_action_dict: Dict[str, np.ndarray] = {}
for act_type in action_dict:
agent_action_dict[act_type] = action_dict[act_type][index, :]
self.previous_action_dict[agent_id] = agent_action_dict
action_dict = self.behavior_spec.action_spec.empty_action(len(agent_ids))
action_dict = self.make_empty_previous_action(len(agent_ids))
action_dict[act_type][index, :] = self.previous_action_dict[agent_id][act_type]
action_dict[act_type][index, :] = self.previous_action_dict[
agent_id
][act_type]
return action_dict
def remove_previous_action(self, agent_ids):

4
ml-agents/mlagents/trainers/simple_env_manager.py


from typing import Dict, List
from mlagents_envs.base_env import BaseEnv, BehaviorName, BehaviorSpec, ActionBuffers
from mlagents_envs.base_env import BaseEnv, BehaviorName, BehaviorSpec
from mlagents.trainers.env_manager import EnvManager, EnvironmentStep, AllStepResult
from mlagents_envs.timers import timed
from mlagents.trainers.action_info import ActionInfo

self.previous_all_action_info = all_action_info
for brain_name, action_info in all_action_info.items():
_action = ActionBuffers.from_numpy_dict(action_info.action)
_action = EnvManager.action_buffers_from_numpy_dict(action_info.action)
self.env.set_actions(brain_name, _action)
self.env.step()
all_step_result = self._generate_all_results()

6
ml-agents/mlagents/trainers/subprocess_env_manager.py


from multiprocessing import Process, Pipe, Queue
from multiprocessing.connection import Connection
from queue import Empty as EmptyQueueException
from mlagents_envs.base_env import BaseEnv, BehaviorName, BehaviorSpec, ActionBuffers
from mlagents_envs.base_env import BaseEnv, BehaviorName, BehaviorSpec
from mlagents_envs import logging_util
from mlagents.trainers.env_manager import EnvManager, EnvironmentStep, AllStepResult
from mlagents_envs.timers import (

all_action_info = req.payload
for brain_name, action_info in all_action_info.items():
if len(action_info.action) != 0:
_action = ActionBuffers.from_numpy_dict(action_info.action)
_action = EnvManager.action_buffers_from_numpy_dict(
action_info.action
)
env.set_actions(brain_name, _action)
env.step()
all_step_result = _generate_all_results()

8
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py


next_observations = [
np.random.normal(size=shape) for shape in behavior_spec.observation_shapes
]
action = behavior_spec.action_spec.random_action(1)
action_buffer = behavior_spec.action_spec.random_action(1)
action = {}
if action_buffer.continuous is not None:
action["continuous_action"] = action_buffer.continuous
if action_buffer.discrete is not None:
action["discrete_action"] = action_buffer.discrete
for _ in range(number):
curr_split_obs = SplitObservations.from_observations(curr_observations)
next_split_obs = SplitObservations.from_observations(next_observations)

2
ml-agents/mlagents/trainers/tf/components/reward_signals/curiosity/signal.py


def evaluate_batch(self, mini_batch: AgentBuffer) -> RewardSignalResult:
feed_dict: Dict[tf.Tensor, Any] = {
self.policy.batch_size_ph: len(mini_batch["actions"]),
self.policy.batch_size_ph: len(mini_batch["vector_obs"]),
self.policy.sequence_length_ph: self.policy.sequence_length,
}
if self.policy.use_vec_obs:

2
ml-agents/mlagents/trainers/tf/components/reward_signals/gail/signal.py


def evaluate_batch(self, mini_batch: AgentBuffer) -> RewardSignalResult:
feed_dict: Dict[tf.Tensor, Any] = {
self.policy.batch_size_ph: len(mini_batch["actions"]),
self.policy.batch_size_ph: len(mini_batch["vector_obs"]),
self.policy.sequence_length_ph: self.policy.sequence_length,
}
if self.model.use_vail:

正在加载...
取消
保存