浏览代码

[Bug fix] Hard reset when team changes (#3870) (#3899)

/release_1_branch
GitHub 4 年前
当前提交
d8b93f8f
共有 7 个文件被更改,包括 633 次插入426 次删除
  1. 26
      Project/Assets/ML-Agents/Examples/Tennis/Prefabs/TennisArea.prefab
  2. 1001
      Project/Assets/ML-Agents/Examples/Tennis/TFModels/Tennis.nn
  3. 3
      com.unity.ml-agents/CHANGELOG.md
  4. 13
      ml-agents/mlagents/trainers/ghost/controller.py
  5. 3
      ml-agents/mlagents/trainers/tests/simple_test_envs.py
  6. 9
      ml-agents/mlagents/trainers/tests/test_trainer_controller.py
  7. 4
      ml-agents/mlagents/trainers/trainer_controller.py

26
Project/Assets/ML-Agents/Examples/Tennis/Prefabs/TennisArea.prefab


m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
vectorObservationSize: 9
numStackedVectorObservations: 3
vectorActionSize: 03000000
vectorActionDescriptions: []
vectorActionSpaceType: 1
VectorObservationSize: 9
NumStackedVectorObservations: 3
VectorActionSize: 03000000
VectorActionDescriptions: []
VectorActionSpaceType: 1
m_Model: {fileID: 11400000, guid: d6c5e749e4ceb4cf79640a5955706d3d, type: 3}
m_InferenceDevice: 0
m_BehaviorType: 0

agentParameters:
maxStep: 0
hasUpgradedFromAgentParameters: 1
maxStep: 0
MaxStep: 0
ball: {fileID: 1273406647218856}
invertX: 0
score: 0

m_EditorClassIdentifier:
DecisionPeriod: 5
TakeActionsBetweenDecisions: 1
offsetStep: 0
--- !u!114 &6467897465973556822
MonoBehaviour:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
vectorObservationSize: 9
numStackedVectorObservations: 3
vectorActionSize: 03000000
vectorActionDescriptions: []
vectorActionSpaceType: 1
VectorObservationSize: 9
NumStackedVectorObservations: 3
VectorActionSize: 03000000
VectorActionDescriptions: []
VectorActionSpaceType: 1
m_Model: {fileID: 11400000, guid: d6c5e749e4ceb4cf79640a5955706d3d, type: 3}
m_InferenceDevice: 0
m_BehaviorType: 0

agentParameters:
maxStep: 0
hasUpgradedFromAgentParameters: 1
maxStep: 0
MaxStep: 0
ball: {fileID: 1273406647218856}
invertX: 1
score: 0

m_EditorClassIdentifier:
DecisionPeriod: 5
TakeActionsBetweenDecisions: 1
offsetStep: 0
--- !u!114 &1420140102966759323
MonoBehaviour:
m_ObjectHideFlags: 0

1001
Project/Assets/ML-Agents/Examples/Tennis/TFModels/Tennis.nn
文件差异内容过多而无法显示
查看文件

3
com.unity.ml-agents/CHANGELOG.md


#### ml-agents / ml-agents-envs / gym-unity (Python)
- Fixed an issue where exceptions from environments provided a returncode of 0.
(#3680)
- Self-Play team changes will now trigger a full environment reset. This prevents trajectories
in progress during a team change from getting into the buffer. (#3870)
## [0.15.1-preview] - 2020-03-30

13
ml-agents/mlagents/trainers/ghost/controller.py


self._learning_team: int = -1
# Dict from team id to GhostTrainer for ELO calculation
self._ghost_trainers: Dict[int, GhostTrainer] = {}
# Signals to the trainer control to perform a hard change_training_team
self._changed_training_team = False
@property
def get_learning_team(self) -> int:

"""
return self._learning_team
def should_reset(self) -> bool:
"""
Whether or not team change occurred. Causes full reset in trainer_controller
:return: The truth value of the team changing
"""
changed_team = self._changed_training_team
if self._changed_training_team:
self._changed_training_team = False
return changed_team
def subscribe_team_id(self, team_id: int, trainer: GhostTrainer) -> None:
"""

logger.debug(
"Learning team {} swapped on step {}".format(self._learning_team, step)
)
self._changed_training_team = True
# Adapted from https://github.com/Unity-Technologies/ml-agents/pull/1975 and
# https://metinmediamath.wordpress.com/2013/11/27/how-to-calculate-the-elo-rating-including-example/

3
ml-agents/mlagents/trainers/tests/simple_test_envs.py


self.goal[name] = self.random.choice([-1, 1])
self.positions[name] = [0.0 for _ in range(self.action_size)]
self.step_count[name] = 0
self.final_rewards[name].append(self.rewards[name])
self.rewards[name] = 0
self.agent_id[name] = self.agent_id[name] + 1

decision_step = DecisionSteps(m_vector_obs, m_reward, m_agent_id, action_mask)
terminal_step = TerminalSteps.empty(self.behavior_spec)
if done:
self.final_rewards[name].append(self.rewards[name])
self._reset_agent(name)
new_vector_obs = self._make_obs(self.goal[name])
(

decision_step = DecisionSteps(m_vector_obs, m_reward, m_agent_id, action_mask)
terminal_step = TerminalSteps.empty(self.behavior_spec)
if done:
self.final_rewards[name].append(self.rewards[name])
self._reset_agent(name)
recurrent_obs_val = (
self.goal[name] if self.step_count[name] <= self.num_show_steps else 0

9
ml-agents/mlagents/trainers/tests/test_trainer_controller.py


from mlagents.tf_utils import tf
from mlagents.trainers.trainer_controller import TrainerController
from mlagents.trainers.ghost.controller import GhostController
trainer_factory_mock = MagicMock()
trainer_factory_mock.ghost_controller = GhostController()
trainer_factory=None,
trainer_factory=trainer_factory_mock,
model_path="test_model_path",
summaries_dir="test_summaries_dir",
run_id="test_run_id",

@patch.object(tf, "set_random_seed")
def test_initialization_seed(numpy_random_seed, tensorflow_set_seed):
seed = 27
trainer_factory_mock = MagicMock()
trainer_factory_mock.ghost_controller = GhostController()
trainer_factory=None,
trainer_factory=trainer_factory_mock,
model_path="",
summaries_dir="",
run_id="1",

4
ml-agents/mlagents/trainers/trainer_controller.py


self.meta_curriculum = meta_curriculum
self.sampler_manager = sampler_manager
self.resampling_interval = resampling_interval
self.ghost_controller = self.trainer_factory.ghost_controller
self.trainer_threads: List[threading.Thread] = []
self.kill_trainers = False

and (self.resampling_interval)
and (steps % self.resampling_interval == 0)
)
if meta_curriculum_reset or generalization_reset:
ghost_controller_reset = self.ghost_controller.should_reset()
if meta_curriculum_reset or generalization_reset or ghost_controller_reset:
self.end_trainer_episodes(env, lessons_incremented)
@timed

正在加载...
取消
保存