|
|
|
|
|
|
from mlagents_envs.communicator_objects.agent_action_pb2 import AgentActionProto |
|
|
|
from mlagents_envs.base_env import ( |
|
|
|
BehaviorSpec, |
|
|
|
ActionType, |
|
|
|
ActionSpec, |
|
|
|
DecisionSteps, |
|
|
|
TerminalSteps, |
|
|
|
) |
|
|
|
|
|
|
def test_batched_step_result_from_proto(): |
|
|
|
n_agents = 10 |
|
|
|
shapes = [(3,), (4,)] |
|
|
|
spec = BehaviorSpec(shapes, ActionType.CONTINUOUS, 3) |
|
|
|
spec = BehaviorSpec(shapes, ActionSpec(3, ())) |
|
|
|
ap_list = generate_list_agent_proto(n_agents, shapes) |
|
|
|
decision_steps, terminal_steps = steps_from_proto(ap_list, spec) |
|
|
|
for agent_id in range(n_agents): |
|
|
|
|
|
|
def test_action_masking_discrete(): |
|
|
|
n_agents = 10 |
|
|
|
shapes = [(3,), (4,)] |
|
|
|
behavior_spec = BehaviorSpec(shapes, ActionType.DISCRETE, (7, 3)) |
|
|
|
behavior_spec = BehaviorSpec(shapes, ActionSpec(0, (7, 3))) |
|
|
|
ap_list = generate_list_agent_proto(n_agents, shapes) |
|
|
|
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec) |
|
|
|
masks = decision_steps.action_mask |
|
|
|
|
|
|
def test_action_masking_discrete_1(): |
|
|
|
n_agents = 10 |
|
|
|
shapes = [(3,), (4,)] |
|
|
|
behavior_spec = BehaviorSpec(shapes, ActionType.DISCRETE, (10,)) |
|
|
|
behavior_spec = BehaviorSpec(shapes, ActionSpec(0, (10,))) |
|
|
|
ap_list = generate_list_agent_proto(n_agents, shapes) |
|
|
|
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec) |
|
|
|
masks = decision_steps.action_mask |
|
|
|
|
|
|
def test_action_masking_discrete_2(): |
|
|
|
n_agents = 10 |
|
|
|
shapes = [(3,), (4,)] |
|
|
|
behavior_spec = BehaviorSpec(shapes, ActionType.DISCRETE, (2, 2, 6)) |
|
|
|
behavior_spec = BehaviorSpec(shapes, ActionSpec(0, (2, 2, 6))) |
|
|
|
ap_list = generate_list_agent_proto(n_agents, shapes) |
|
|
|
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec) |
|
|
|
masks = decision_steps.action_mask |
|
|
|
|
|
|
def test_action_masking_continuous(): |
|
|
|
n_agents = 10 |
|
|
|
shapes = [(3,), (4,)] |
|
|
|
behavior_spec = BehaviorSpec(shapes, ActionType.CONTINUOUS, 10) |
|
|
|
behavior_spec = BehaviorSpec(shapes, ActionSpec(10, ())) |
|
|
|
ap_list = generate_list_agent_proto(n_agents, shapes) |
|
|
|
decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec) |
|
|
|
masks = decision_steps.action_mask |
|
|
|
|
|
|
bp.vector_action_size.extend([5, 4]) |
|
|
|
bp.vector_action_space_type = 0 |
|
|
|
behavior_spec = behavior_spec_from_proto(bp, agent_proto) |
|
|
|
assert behavior_spec.is_action_discrete() |
|
|
|
assert not behavior_spec.is_action_continuous() |
|
|
|
assert behavior_spec.action_spec.is_action_discrete() |
|
|
|
assert not behavior_spec.action_spec.is_action_continuous() |
|
|
|
assert behavior_spec.discrete_action_branches == (5, 4) |
|
|
|
assert behavior_spec.action_size == 2 |
|
|
|
assert behavior_spec.action_spec.discrete_action_branches == (5, 4) |
|
|
|
assert behavior_spec.action_spec.action_size == 2 |
|
|
|
assert not behavior_spec.is_action_discrete() |
|
|
|
assert behavior_spec.is_action_continuous() |
|
|
|
assert behavior_spec.action_size == 6 |
|
|
|
assert not behavior_spec.action_spec.is_action_discrete() |
|
|
|
assert behavior_spec.action_spec.is_action_continuous() |
|
|
|
assert behavior_spec.action_spec.action_size == 6 |
|
|
|
behavior_spec = BehaviorSpec(shapes, ActionType.CONTINUOUS, 3) |
|
|
|
behavior_spec = BehaviorSpec(shapes, ActionSpec(3, ())) |
|
|
|
ap_list = generate_list_agent_proto(n_agents, shapes, infinite_rewards=True) |
|
|
|
with pytest.raises(RuntimeError): |
|
|
|
steps_from_proto(ap_list, behavior_spec) |
|
|
|
|
|
|
n_agents = 10 |
|
|
|
shapes = [(3,), (4,)] |
|
|
|
behavior_spec = BehaviorSpec(shapes, ActionType.CONTINUOUS, 3) |
|
|
|
behavior_spec = BehaviorSpec(shapes, ActionSpec(3, ())) |
|
|
|
ap_list = generate_list_agent_proto(n_agents, shapes, nan_observations=True) |
|
|
|
with pytest.raises(RuntimeError): |
|
|
|
steps_from_proto(ap_list, behavior_spec) |