|
|
|
|
|
|
# Add two dists to the list. |
|
|
|
act_size = 2 |
|
|
|
test_prob = torch.tensor( |
|
|
|
[1.0 - 0.1 * (act_size - 1)] + [0.1] * (act_size - 1) |
|
|
|
[[1.0 - 0.1 * (act_size - 1)] + [0.1] * (act_size - 1)] |
|
|
|
) # High prob for first action |
|
|
|
dist_list = [CategoricalDistInstance(test_prob), CategoricalDistInstance(test_prob)] |
|
|
|
action_list = [torch.tensor([0]), torch.tensor([1])] |
|
|
|
|
|
|
assert all_probs.shape == (len(dist_list * act_size),) |
|
|
|
assert entropies.shape == (len(dist_list),) |
|
|
|
assert all_probs.shape == (1, len(dist_list * act_size)) |
|
|
|
assert entropies.shape == (1, len(dist_list)) |
|
|
|
# Make sure the first action has high probability than the others. |
|
|
|
assert log_probs.flatten()[0] > log_probs.flatten()[1] |