浏览代码

removed abstract class

/develop/actionmodel-csharp
Andrew Cohen 4 年前
当前提交
e686a785
共有 3 个文件被更改,包括 67 次插入55 次删除
  1. 1
      ml-agents/mlagents/trainers/tests/simple_test_envs.py
  2. 56
      ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
  3. 65
      ml-agents/mlagents/trainers/torch/action_models.py

1
ml-agents/mlagents/trainers/tests/simple_test_envs.py


self.discrete_env.goal = self.goal
def set_actions(self, behavior_name: BehaviorName, action) -> None:
#print(action, self.goal[behavior_name])
continuous_action = action[:, :self.continuous_action_size]
discrete_action = action[:, self.continuous_action_size:]
self.continuous_env.set_actions(behavior_name, continuous_action)

56
ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py


assert all(not math.isnan(reward) for reward in processed_rewards)
assert all(reward > success_threshold for reward in processed_rewards)
#@pytest.mark.parametrize("use_discrete", [True, False])
#def test_simple_ppo(use_discrete):
# env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete)

#def test_hybrid_ppo():
# env = HybridEnvironment([BRAIN_NAME], action_size=1, step_size=0.2)
# config = attr.evolve(PPO_CONFIG, max_steps=10000)
# _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=5.0)
def test_2dhybrid_ppo():
env = HybridEnvironment([BRAIN_NAME], continuous_action_size=1, discrete_action_size=2, step_size=0.8)
def test_hybrid_ppo():
env = HybridEnvironment([BRAIN_NAME], continuous_action_size=2, discrete_action_size=2, step_size=0.8)
PPO_CONFIG.hyperparameters, batch_size=128, buffer_size=1280
PPO_CONFIG.hyperparameters, batch_size=32, buffer_size=1280
config = attr.evolve(PPO_CONFIG, hyperparameters=new_hyperparams, max_steps=100000)
_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=5.0)
config = attr.evolve(PPO_CONFIG, hyperparameters=new_hyperparams, max_steps=10000)
_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)
#def test_conthybrid_ppo():
# env = HybridEnvironment([BRAIN_NAME], continuous_action_size=1, discrete_action_size=0, step_size=0.8)
# new_hyperparams = attr.evolve(
# PPO_CONFIG.hyperparameters, batch_size=128, buffer_size=1280
# )
# config = attr.evolve(PPO_CONFIG, hyperparameters=new_hyperparams, max_steps=10000)
# _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)
#def test_dischybrid_ppo():
# env = HybridEnvironment([BRAIN_NAME], continuous_action_size=0, discrete_action_size=1, step_size=0.8)
# new_hyperparams = attr.evolve(
# PPO_CONFIG.hyperparameters, batch_size=128, buffer_size=1280
# )
# config = attr.evolve(PPO_CONFIG, hyperparameters=new_hyperparams, max_steps=10000)
# _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)
#jdef test_2dhybrid_ppo():
#j env = HybridEnvironment([BRAIN_NAME], continuous_action_size=2, discrete_action_size=2, step_size=0.8)
#j new_hyperparams = attr.evolve(
#j PPO_CONFIG.hyperparameters, batch_size=256, buffer_size=2560, beta=.05
#j )
#j config = attr.evolve(PPO_CONFIG, hyperparameters=new_hyperparams, max_steps=10000)
#j _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)
#j
#jdef test_3chybrid_ppo():
#j env = HybridEnvironment([BRAIN_NAME], continuous_action_size=2, discrete_action_size=1, step_size=0.8)
#j new_hyperparams = attr.evolve(
#j PPO_CONFIG.hyperparameters, batch_size=128, buffer_size=1280, beta=.01
#j )
#j config = attr.evolve(PPO_CONFIG, hyperparameters=new_hyperparams, max_steps=10000)
#j _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)
#def test_3ddhybrid_ppo():
# env = HybridEnvironment([BRAIN_NAME], continuous_action_size=1, discrete_action_size=2, step_size=0.8)
# new_hyperparams = attr.evolve(
# PPO_CONFIG.hyperparameters, batch_size=128, buffer_size=1280, beta=.05
# )
# config = attr.evolve(PPO_CONFIG, hyperparameters=new_hyperparams, max_steps=10000)
# _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)
#@pytest.mark.parametrize("use_discrete", [True, False])
#def test_2d_ppo(use_discrete):
# env = SimpleEnvironment(

65
ml-agents/mlagents/trainers/torch/action_models.py


EPSILON = 1e-7 # Small value to avoid divide by zero
class ActionModel(nn.Module, abc.ABC):
def _sample_action(self, dists: List[DistInstance]) -> List[torch.Tensor]:
"""
Samples actions from list of distribution instances
"""
actions = []
for action_dist in dists:
action = action_dist.sample()
actions.append(action)
return actions
@abc.abstractmethod
def evaluate(self, inputs: torch.Tensor, masks: torch.Tensor, actions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
"""
Returns the log_probs and entropies of actions
"""
pass
@abc.abstractmethod
def get_action_out(self, inputs: torch.Tensor, masks: torch.Tensor) -> torch.Tensor:
"""
Returns the tensor to be exported to ONNX for the distribution
"""
pass
@abc.abstractmethod
def forward(self, inputs: torch.Tensor, masks: torch.Tensor):
"""
Returns the actions, log probs and entropies for given input
"""
pass
class HybridActionModel(ActionModel):
class HybridActionModel(nn.Module):
def __init__(
self,
hidden_size: int,

)
)
self._split_list.append(continuous_act_size)
def _sample_action(self, dists: List[DistInstance]) -> List[torch.Tensor]:
"""
Samples actions from list of distribution instances
"""
actions = []
for action_dist in dists:
action = action_dist.sample()
actions.append(action)
return actions
def _get_dists(self, inputs: torch.Tensor, masks: torch.Tensor) -> Tuple[List[DistInstance], List[DiscreteDistInstance]]:
distribution_instances: List[DistInstance] = []
for distribution in self._distributions:
dist_instances = distribution(inputs, masks)
for dist_instance in dist_instances:
distribution_instances.append(dist_instance)
return distribution_instances
def evaluate(self, inputs: torch.Tensor, masks: torch.Tensor, actions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
dists = self._get_dists(inputs, masks)
split_actions = torch.split(actions, self._split_list, dim=1)

dists = self._get_dists(inputs, masks)
return torch.cat([dist.exported_model_output() for dist in dists], dim=1)
def _get_dists(self, inputs: torch.Tensor, masks: torch.Tensor) -> Tuple[List[DistInstance], List[DiscreteDistInstance]]:
distribution_instances: List[DistInstance] = []
for distribution in self._distributions:
dist_instances = distribution(inputs, masks)
for dist_instance in dist_instances:
distribution_instances.append(dist_instance)
return distribution_instances
def forward(self, inputs: torch.Tensor, masks: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
dists = self._get_dists(inputs, masks)
action_outs : List[torch.Tensor] = []

正在加载...
取消
保存