|
|
|
|
|
|
episode_rewards = 0 |
|
|
|
tracked_agent = -1 |
|
|
|
while not done: |
|
|
|
if group_spec.is_continuous(): |
|
|
|
action = np.random.randn(len(decision_steps), group_spec.size) |
|
|
|
if group_spec.action_spec.is_continuous(): |
|
|
|
action = np.random.randn( |
|
|
|
len(decision_steps), group_spec.action_spec.size |
|
|
|
) |
|
|
|
elif group_spec.is_discrete(): |
|
|
|
branch_size = group_spec.discrete_branches |
|
|
|
elif group_spec.action_spec.is_discrete(): |
|
|
|
branch_size = group_spec.action_spec.discrete_branches |
|
|
|
action = np.column_stack( |
|
|
|
[ |
|
|
|
np.random.randint( |
|
|
|