|
|
|
|
|
|
from typing import List, Tuple, Union |
|
|
|
from collections.abc import Iterable |
|
|
|
from typing import List, Tuple |
|
|
|
import numpy as np |
|
|
|
|
|
|
|
from mlagents.trainers.buffer import AgentBuffer |
|
|
|
|
|
|
def create_mock_steps( |
|
|
|
num_agents: int, |
|
|
|
observation_shapes: List[Tuple], |
|
|
|
action_shape: Union[int, Tuple[int]] = None, |
|
|
|
discrete: bool = False, |
|
|
|
action_spec: ActionSpec, |
|
|
|
done: bool = False, |
|
|
|
) -> Tuple[DecisionSteps, TerminalSteps]: |
|
|
|
""" |
|
|
|
|
|
|
:bool discrete: Whether or not action space is discrete |
|
|
|
:bool done: Whether all the agents in the batch are done |
|
|
|
""" |
|
|
|
if action_shape is None: |
|
|
|
action_shape = 2 |
|
|
|
|
|
|
|
if discrete and isinstance(action_shape, Iterable): |
|
|
|
if action_spec.is_discrete(): |
|
|
|
for action_size in action_shape # type: ignore |
|
|
|
for action_size in action_spec.discrete_branches # type: ignore |
|
|
|
if discrete: |
|
|
|
action_spec = ActionSpec(0, action_shape) |
|
|
|
else: |
|
|
|
action_spec = ActionSpec(action_shape, ()) |
|
|
|
behavior_spec = BehaviorSpec(observation_shapes, action_spec) |
|
|
|
if done: |
|
|
|
return ( |
|
|
|
|
|
|
def create_steps_from_behavior_spec( |
|
|
|
behavior_spec: BehaviorSpec, num_agents: int = 1 |
|
|
|
) -> Tuple[DecisionSteps, TerminalSteps]: |
|
|
|
action_spec = behavior_spec.action_spec |
|
|
|
is_discrete = action_spec.is_discrete() |
|
|
|
action_shape=action_spec.discrete_branches |
|
|
|
if is_discrete |
|
|
|
else action_spec.continuous_size, |
|
|
|
discrete=is_discrete, |
|
|
|
action_spec=behavior_spec.action_spec, |
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
action_spec: ActionSpec, |
|
|
|
action_space: Union[int, Tuple[int]] = 2, |
|
|
|
is_discrete: bool = True, |
|
|
|
) -> Trajectory: |
|
|
|
""" |
|
|
|
Makes a fake trajectory of length length. If max_step_complete, |
|
|
|
|
|
|
action_size = action_spec.size |
|
|
|
action_probs = np.ones(np.sum(action_spec.total_size), dtype=np.float32) |
|
|
|
for _i in range(length - 1): |
|
|
|
obs = [] |
|
|
|
for _shape in observation_shapes: |
|
|
|
|
|
|
if is_discrete: |
|
|
|
action_size = len(action_space) # type: ignore |
|
|
|
action_probs = np.ones(np.sum(action_space), dtype=np.float32) |
|
|
|
else: |
|
|
|
action_size = int(action_space) # type: ignore |
|
|
|
action_probs = np.ones((action_size), dtype=np.float32) |
|
|
|
[[False for _ in range(branch)] for branch in action_space] # type: ignore |
|
|
|
if is_discrete |
|
|
|
[ |
|
|
|
[False for _ in range(branch)] |
|
|
|
for branch in action_spec.discrete_branches |
|
|
|
] # type: ignore |
|
|
|
if action_spec.is_discrete() |
|
|
|
else None |
|
|
|
) |
|
|
|
prev_action = np.ones(action_size, dtype=np.float32) |
|
|
|
|
|
|
memory_size: int = 10, |
|
|
|
exclude_key_list: List[str] = None, |
|
|
|
) -> AgentBuffer: |
|
|
|
is_discrete = behavior_spec.action_spec.is_discrete() |
|
|
|
if is_discrete: |
|
|
|
action_space = behavior_spec.action_spec.discrete_branches |
|
|
|
else: |
|
|
|
action_space = behavior_spec.action_spec.continuous_size |
|
|
|
action_space=action_space, |
|
|
|
action_spec=behavior_spec.action_spec, |
|
|
|
is_discrete=is_discrete, |
|
|
|
) |
|
|
|
buffer = trajectory.to_agentbuffer() |
|
|
|
# If a key_list was given, remove those keys |
|
|
|