浏览代码

Merge pull request #1922 from Unity-Technologies/release-v08-slowflag

Fix '--slow' flag after environment updates
/develop-generalizationTraining-TrainerController
GitHub 5 年前
当前提交
c613df3a
共有 8 个文件被更改,包括 17 次插入23 次删除
  1. 11
      ml-agents-envs/mlagents/envs/environment.py
  2. 11
      ml-agents/mlagents/trainers/learn.py
  3. 3
      ml-agents/mlagents/trainers/tests/test_learn.py
  4. 5
      ml-agents/mlagents/trainers/tests/test_trainer_controller.py
  5. 10
      ml-agents/mlagents/trainers/trainer_controller.py
  6. 0
      /ml-agents/mlagents/trainers/tests/test_trainer_metrics.py
  7. 0
      /ml-agents-envs/mlagents/envs/tests/test_subprocess_unity_environment.py

11
ml-agents-envs/mlagents/envs/environment.py


seed: int = 0,
docker_training: bool = False,
no_graphics: bool = False,
timeout_wait: int = 30,
train_mode: bool = True):
timeout_wait: int = 30):
"""
Starts a new unity environment and establishes a connection with the environment.
Notice: Currently communication between Unity and Python takes place over an open socket without authentication.

self._loaded = False # If true, this means the environment was successfully loaded
self.proc1 = None # The process that is started. If None, no process was started
self.communicator = self.get_communicator(worker_id, base_port, timeout_wait)
self._train_mode = train_mode
# If the environment name is None, a new environment will not be launched
# and the communicator will directly try to connect to an existing unity environment.

for k in self._resetParameters])) + '\n' + \
'\n'.join([str(self._brains[b]) for b in self._brains])
def reset(self, config=None, train_mode=None, custom_reset_parameters=None) -> AllBrainInfo:
def reset(self, config=None, train_mode=True, custom_reset_parameters=None) -> AllBrainInfo:
"""
Sends a signal to reset the unity environment.
:return: AllBrainInfo : A data structure corresponding to the initial reset state of the environment.

else:
raise UnityEnvironmentException(
"The parameter '{0}' is not a valid parameter.".format(k))
if train_mode is None:
train_mode = self._train_mode
else:
self._train_mode = train_mode
if self._loaded:
outputs = self.communicator.exchange(

11
ml-agents/mlagents/trainers/learn.py


docker_target_name,
no_graphics,
run_seed,
base_port + (sub_id * num_envs),
fast_simulation
base_port + (sub_id * num_envs)
)
env = SubprocessUnityEnvironment(env_factory, num_envs)
maybe_meta_curriculum = try_create_meta_curriculum(curriculum_folder, env)

save_freq, maybe_meta_curriculum,
load_model, train_model,
keep_checkpoints, lesson, env.external_brains,
run_seed)
run_seed, fast_simulation)
# Signal that environment has been launched.
process_queue.put(True)

docker_target_name: str,
no_graphics: bool,
seed: Optional[int],
start_port: int,
fast_simulation: bool
start_port: int
) -> Callable[[int], BaseUnityEnvironment]:
if env_path is not None:
# Strip out executable extensions if passed

seed=env_seed,
docker_training=docker_training,
no_graphics=no_graphics,
base_port=start_port,
train_mode=(not fast_simulation)
base_port=start_port
)
return create_unity_environment

3
ml-agents/mlagents/trainers/tests/test_learn.py


5,
0,
subproc_env_mock.return_value.external_brains,
0
0,
True
)

5
ml-agents/mlagents/trainers/tests/test_trainer_controller.py


keep_checkpoints=False,
lesson=None,
external_brains={'testbrain': brain_info},
training_seed=99
training_seed=99,
fast_simulation=True
)
@patch('numpy.random.seed')

TrainerController('', '', '1', 1, None, True, False, False, None, {}, seed)
TrainerController('', '', '1', 1, None, True, False, False, None, {}, seed, True)
numpy_random_seed.assert_called_with(seed)
tensorflow_set_seed.assert_called_with(seed)

10
ml-agents/mlagents/trainers/trainer_controller.py


from mlagents.envs import AllBrainInfo, BrainParameters
from mlagents.envs.base_unity_environment import BaseUnityEnvironment
from mlagents.envs.exception import UnityEnvironmentException
from mlagents.trainers import Trainer, Policy
from mlagents.trainers import Trainer
from mlagents.trainers.ppo.trainer import PPOTrainer
from mlagents.trainers.bc.offline_trainer import OfflineBCTrainer
from mlagents.trainers.bc.online_trainer import OnlineBCTrainer

keep_checkpoints: int,
lesson: Optional[int],
external_brains: Dict[str, BrainParameters],
training_seed: int):
training_seed: int,
fast_simulation: bool):
"""
:param model_path: Path to save the model.
:param summaries_dir: Folder to save training summaries.

self.meta_curriculum = meta_curriculum
self.seed = training_seed
self.training_start_time = time()
self.fast_simulation = fast_simulation
np.random.seed(self.seed)
tf.set_random_seed(self.seed)

environment.
"""
if self.meta_curriculum is not None:
return env.reset(config=self.meta_curriculum.get_config())
return env.reset(train_mode=self.fast_simulation, config=self.meta_curriculum.get_config())
return env.reset()
return env.reset(train_mode=self.fast_simulation)
def start_learning(self, env: BaseUnityEnvironment, trainer_config):
# TODO: Should be able to start learning at different lesson numbers

/ml-agents/tests/trainers/test_trainer_metrics.py → /ml-agents/mlagents/trainers/tests/test_trainer_metrics.py

/ml-agents/tests/envs/test_subprocess_unity_environment.py → /ml-agents-envs/mlagents/envs/tests/test_subprocess_unity_environment.py

正在加载...
取消
保存