|
|
|
|
|
|
import numpy as np |
|
|
|
|
|
|
|
from mlagents.trainers.buffer import AgentBuffer |
|
|
|
from mlagents.trainers.torch.action_log_probs import LogProbsTuple |
|
|
|
from mlagents.trainers.trajectory import Trajectory, AgentExperience |
|
|
|
from mlagents_envs.base_env import ( |
|
|
|
DecisionSteps, |
|
|
|
|
|
|
ActionTuple, |
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
steps_list = [] |
|
|
|
|
|
|
|
action_size = action_spec.discrete_size + action_spec.continuous_size |
|
|
|
action_probs = { |
|
|
|
"action_probs": np.ones( |
|
|
|
int(np.sum(action_spec.discrete_branches) + action_spec.continuous_size), |
|
|
|
dtype=np.float32, |
|
|
|
) |
|
|
|
} |
|
|
|
prob_ones = np.ones( |
|
|
|
int(np.sum(action_spec.discrete_branches) + action_spec.continuous_size), |
|
|
|
dtype=np.float32, |
|
|
|
) |
|
|
|
for _i in range(length - 1): |
|
|
|
obs = [] |
|
|
|
for _shape in observation_shapes: |
|
|
|
|
|
|
if action_spec.is_continuous(): |
|
|
|
action = {"continuous_action": np.zeros(action_size, dtype=np.float32)} |
|
|
|
action = ActionTuple(continuous=np.zeros(action_size, dtype=np.float32)) |
|
|
|
action_probs = LogProbsTuple(continuous=prob_ones) |
|
|
|
action = {"discrete_action": np.zeros(action_size, dtype=np.float32)} |
|
|
|
action = ActionTuple(discrete=np.zeros(action_size, dtype=np.float32)) |
|
|
|
action_probs = LogProbsTuple(discrete=prob_ones) |
|
|
|
action_pre = np.zeros(action_size, dtype=np.float32) |
|
|
|
action_mask = ( |
|
|
|
[ |
|
|
|