|
|
|
|
|
|
seq_len=policy.sequence_length, |
|
|
|
) |
|
|
|
assert log_probs.shape == (64, policy.behavior_spec.action_spec.size) |
|
|
|
assert entropy.shape == (64, policy.behavior_spec.action_spec.size) |
|
|
|
assert entropy.shape == (64,) |
|
|
|
for val in values.values(): |
|
|
|
assert val.shape == (64,) |
|
|
|
|
|
|
|
|
|
|
) |
|
|
|
else: |
|
|
|
assert log_probs.shape == (64, policy.behavior_spec.action_spec.continuous_size) |
|
|
|
assert entropies.shape == (64, policy.behavior_spec.action_spec.size) |
|
|
|
assert entropies.shape == (64,) |
|
|
|
|
|
|
|
if rnn: |
|
|
|
assert memories.shape == (1, 1, policy.m_size) |