|
|
|
|
|
|
action_shape = None |
|
|
|
for act_type, act_array in exp.action.items(): |
|
|
|
agent_buffer_trajectory[act_type].append(act_array) |
|
|
|
action_shape = act_array.shape # TODO Better way to make mask |
|
|
|
for log_type, log_array in exp.action_probs.items(): |
|
|
|
agent_buffer_trajectory[log_type].append(log_array) |
|
|
|
|
|
|
|
|
|
|
else: |
|
|
|
# This should never be needed unless the environment somehow doesn't supply the |
|
|
|
# action mask in a discrete space. |
|
|
|
|
|
|
|
if "discrete_action" in exp.action: |
|
|
|
action_shape = exp.action["discrete_action"].shape |
|
|
|
else: |
|
|
|
action_shape = exp.action["continuous_action"].shape |
|
|
|
agent_buffer_trajectory["action_mask"].append( |
|
|
|
np.ones(action_shape, dtype=np.float32), padding_value=1 |
|
|
|
) |
|
|
|