浏览代码

[MLA-1172] Reduce calls to training_behaviors (#4259)

/MLA-1734-demo-provider
GitHub 5 年前
当前提交
ac36b31f
共有 5 个文件被更改,包括 49 次插入20 次删除
  1. 5
      com.unity.ml-agents/CHANGELOG.md
  2. 23
      ml-agents/mlagents/trainers/env_manager.py
  3. 2
      ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py
  4. 3
      ml-agents/mlagents/trainers/tests/test_trainer_controller.py
  5. 36
      ml-agents/mlagents/trainers/trainer_controller.py

5
com.unity.ml-agents/CHANGELOG.md


### Major Changes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
The minimum supported python version for ml-agents-envs was changed to 3.6.1. (#4244)
- The minimum supported python version for ml-agents-envs was changed to 3.6.1. (#4244)
- The interaction between EnvManager and TrainerController was changed; EnvManager.advance() was split into to stages,
and TrainerController now uses the results from the first stage to handle new behavior names. This change speeds up
Python training by approximately 5-10%. (#4259)
### Minor Changes
#### com.unity.ml-agents (C#)

23
ml-agents/mlagents/trainers/env_manager.py


def __init__(self):
self.policies: Dict[BehaviorName, TFPolicy] = {}
self.agent_managers: Dict[BehaviorName, AgentManager] = {}
self.first_step_infos: List[EnvironmentStep] = None
self.first_step_infos: List[EnvironmentStep] = []
def set_policy(self, brain_name: BehaviorName, policy: TFPolicy) -> None:
self.policies[brain_name] = policy

def close(self):
pass
def advance(self):
def get_steps(self) -> List[EnvironmentStep]:
"""
Updates the policies, steps the environments, and returns the step information from the environments.
Calling code should pass the returned EnvironmentSteps to process_steps() after calling this.
:return: The list of EnvironmentSteps
"""
if self.first_step_infos is not None:
if self.first_step_infos:
self.first_step_infos = None
self.first_step_infos = []
for brain_name in self.training_behaviors:
for brain_name in self.agent_managers.keys():
_policy = None
try:
# We make sure to empty the policy queue before continuing to produce steps.

except AgentManagerQueue.Empty:
if _policy is not None:
self.set_policy(brain_name, _policy)
# Step the environment
# policy_queue contains Policy, but we need a TFPolicy here
self.set_policy(brain_name, _policy) # type: ignore
# Step the environments
return new_step_infos
def process_steps(self, new_step_infos: List[EnvironmentStep]) -> int:
# Add to AgentProcessor
num_step_infos = self._process_step_infos(new_step_infos)
return num_step_infos

2
ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py


}
step_info = EnvironmentStep(step_info_dict, 0, action_info_dict, env_stats)
step_mock.return_value = [step_info]
env_manager.advance()
env_manager.process_steps(env_manager.get_steps())
# Test add_experiences
env_manager._step.assert_called_once()

3
ml-agents/mlagents/trainers/tests/test_trainer_controller.py


tc.advance(env_mock)
env_mock.reset.assert_not_called()
env_mock.advance.assert_called_once()
env_mock.get_steps.assert_called_once()
env_mock.process_steps.assert_called_once()
# May have been called many times due to thread
trainer_mock.advance.call_count > 0

36
ml-agents/mlagents/trainers/trainer_controller.py


from mlagents.tf_utils import tf
from mlagents_envs.logging_util import get_logger
from mlagents.trainers.env_manager import EnvManager
from mlagents.trainers.env_manager import EnvManager, EnvironmentStep
from mlagents_envs.exception import (
UnityEnvironmentException,
UnityCommunicationException,

self.train_model = train
self.param_manager = param_manager
self.ghost_controller = self.trainer_factory.ghost_controller
self.registered_behavior_ids: Set[str] = set()
self.trainer_threads: List[threading.Thread] = []
self.kill_trainers = False

)
@timed
def _reset_env(self, env: EnvManager) -> None:
def _reset_env(self, env_manager: EnvManager) -> None:
"""Resets the environment.
Returns:

new_config = self.param_manager.get_current_samplers()
env.reset(config=new_config)
env_manager.reset(config=new_config)
# Register any new behavior ids that were generated on the reset.
self._register_new_behaviors(env_manager, env_manager.first_step_infos)
def _not_done_training(self) -> bool:
return (

def start_learning(self, env_manager: EnvManager) -> None:
self._create_output_path(self.output_path)
tf.reset_default_graph()
last_brain_behavior_ids: Set[str] = set()
external_brain_behavior_ids = set(env_manager.training_behaviors.keys())
new_behavior_ids = external_brain_behavior_ids - last_brain_behavior_ids
self._create_trainers_and_managers(env_manager, new_behavior_ids)
last_brain_behavior_ids = external_brain_behavior_ids
n_steps = self.advance(env_manager)
for _ in range(n_steps):
self.reset_env_if_ready(env_manager)

env.set_env_parameters(self.param_manager.get_current_samplers())
@timed
def advance(self, env: EnvManager) -> int:
def advance(self, env_manager: EnvManager) -> int:
num_steps = env.advance()
new_step_infos = env_manager.get_steps()
self._register_new_behaviors(env_manager, new_step_infos)
num_steps = env_manager.process_steps(new_step_infos)
# Report current lesson for each environment parameter
for (

trainer.advance()
return num_steps
def _register_new_behaviors(
self, env_manager: EnvManager, step_infos: List[EnvironmentStep]
) -> None:
"""
Handle registration (adding trainers and managers) of new behaviors ids.
:param env_manager:
:param step_infos:
:return:
"""
step_behavior_ids: Set[str] = set()
for s in step_infos:
step_behavior_ids |= set(s.name_behavior_ids)
new_behavior_ids = step_behavior_ids - self.registered_behavior_ids
self._create_trainers_and_managers(env_manager, new_behavior_ids)
self.registered_behavior_ids |= step_behavior_ids
def join_threads(self, timeout_seconds: float = 1.0) -> None:
"""

正在加载...
取消
保存