|
|
|
|
|
|
import abc |
|
|
|
import numpy as np |
|
|
|
import math |
|
|
|
from mlagents.trainers.torch.layers import linear_layer, Initialization |
|
|
|
DiscreteDistInstance, |
|
|
|
GaussianDistribution, |
|
|
|
MultiCategoricalDistribution, |
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
def _get_dists( |
|
|
|
self, inputs: torch.Tensor, masks: torch.Tensor |
|
|
|
) -> Tuple[List[DistInstance], List[DiscreteDistInstance]]: |
|
|
|
) -> List[DistInstance]: |
|
|
|
distribution_instances: List[DistInstance] = [] |
|
|
|
for distribution in self._distributions: |
|
|
|
dist_instances = distribution(inputs, masks) |
|
|
|
|
|
|
) |
|
|
|
# Use the sum of entropy across actions, not the mean |
|
|
|
entropy_sum = torch.sum(entropies, dim=1) |
|
|
|
return (actions, log_probs, entropies) |
|
|
|
return (actions, log_probs, entropy_sum) |