浏览代码

Add test env for hybrid actions, clean up BehaviorSpec (#4522)

/develop/actionmodel-csharp
GitHub 4 年前
当前提交
827525f9
共有 2 个文件被更改,包括 78 次插入5 次删除
  1. 28
      ml-agents-envs/mlagents_envs/base_env.py
  2. 55
      ml-agents/mlagents/trainers/tests/simple_test_envs.py

28
ml-agents-envs/mlagents_envs/base_env.py


BehaviorName = str
class HybridAction(NamedTuple):
"""
Contains continuous and discrete actions as numpy arrays.
"""
continuous: np.ndarray
discrete: np.ndarray
class DecisionStep(NamedTuple):
"""
Contains the data a single Agent collected since the last

class ActionType(Enum):
DISCRETE = 0
CONTINUOUS = 1
HYBRID = 2
class BehaviorSpec(NamedTuple):

def discrete_action_branches(self) -> Optional[Tuple[int, ...]]:
return self.discrete_action_shape # type: ignore
def create_empty_action(self, n_agents: int) -> np.ndarray:
return np.zeros((n_agents, self.action_size), dtype=np.float32)
def create_empty_action(self, n_agents: int) -> Tuple[np.ndarray, np.ndarray]:
return HybridAction(
np.zeros((n_agents, self.continuous_action_size), dtype=np.float32),
np.zeros((n_agents, self.discrete_action_size), dtype=np.int32),
)
def create_random_action(self, n_agents: int) -> np.ndarray:
continuous_action = np.random.uniform(

for i in range(self.discrete_action_size)
]
)
return np.concatenate(discrete_action, continuous_action)
return HybridAction(continuous_action, discrete_action)
class BehaviorSpec(NamedTuple):

"""
@abstractmethod
def set_actions(self, behavior_name: BehaviorName, action: np.ndarray) -> None:
def set_actions(
self, behavior_name: BehaviorName, action: Union[HybridAction, np.ndarray]
) -> None:
"""
Sets the action for all of the agents in the simulation for the next
step. The Actions must be in the same order as the order received in

@abstractmethod
def set_action_for_agent(
self, behavior_name: BehaviorName, agent_id: AgentId, action: np.ndarray
self,
behavior_name: BehaviorName,
agent_id: AgentId,
action: Union[HybridAction, np.ndarray],
) -> None:
"""
Sets the action for one of the agents in the simulation for the next

55
ml-agents/mlagents/trainers/tests/simple_test_envs.py


BaseEnv,
BehaviorSpec,
DecisionSteps,
HybridBehaviorSpec,
BehaviorName,
HybridAction,
)
from mlagents_envs.tests.test_rpc_utils import proto_from_steps_and_action
from mlagents_envs.communicator_objects.agent_info_action_pair_pb2 import (

def close(self):
pass
class HybridEnvironment(SimpleEnvironment):
def __init__(
self,
brain_names,
step_size=STEP_SIZE,
num_visual=0,
num_vector=1,
vis_obs_size=VIS_OBS_SIZE,
vec_obs_size=OBS_SIZE,
action_size=1,
):
self.continuous_env = SimpleEnvironment(
brain_names,
False,
step_size,
num_visual,
num_vector,
vis_obs_size,
vec_obs_size,
action_size,
)
self.discrete_env = SimpleEnvironment(
brain_names,
True,
step_size,
num_visual,
num_vector,
vis_obs_size,
vec_obs_size,
action_size,
)
# Number of steps to reveal the goal for. Lower is harder. Should be
# less than 1/step_size to force agent to use memory
self.behavior_spec = HybridBehaviorSpec(
self._make_obs_spec(), action_size, tuple(2 for _ in range(action_size))
)
self.continuous_action = {}
self.discrete_action = {}
def step(self) -> None:
self.continuous_env.step()
self.discrete_env.step()
def reset(self) -> None: # type: ignore
self.continuous_env.reset()
self.discrete_env.reset()
def set_actions(self, behavior_name: BehaviorName, action: HybridAction) -> None:
self.continuous_env.set_actions(action.continuous)
self.discrete_env.set_actions(action.discrete)
class MemoryEnvironment(SimpleEnvironment):

正在加载...
取消
保存