浏览代码

passes all pytest and C# tests

/develop/magic-string
Andrew Cohen 5 年前
当前提交
de902fbb
共有 5 个文件被更改,包括 28 次插入21 次删除
  1. 2
      UnitySDK/Assets/ML-Agents/Editor/BehaviorParametersEditor.cs
  2. 2
      ml-agents/mlagents/trainers/ppo/trainer.py
  3. 39
      ml-agents/mlagents/trainers/sac/trainer.py
  4. 4
      ml-agents/mlagents/trainers/tests/test_trainer_controller.py
  5. 2
      ml-agents/mlagents/trainers/trainer_controller.py

2
UnitySDK/Assets/ML-Agents/Editor/BehaviorParametersEditor.cs


EditorGUI.indentLevel--;
EditorGUILayout.PropertyField(so.FindProperty("m_BehaviorType"));
EditorGUILayout.PropertyField(so.FindProperty("m_TeamID"));
// EditorGUILayout.PropertyField(serializedObject.FindProperty("m_Heuristic"), true);
EditorGUILayout.PropertyField(so.FindProperty("m_TeamID"));
EditorGUI.indentLevel--;
if (EditorGUI.EndChangeCheck())
{

2
ml-agents/mlagents/trainers/ppo/trainer.py


import numpy as np
from mlagents.envs.brain import BrainParameters, BrainInfo
from mlagents.trainers.brain import BrainParameters, BrainInfo
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.ppo.policy import PPOPolicy
from mlagents.trainers.ppo.multi_gpu_policy import MultiGpuPPOPolicy, get_devices

39
ml-agents/mlagents/trainers/sac/trainer.py


import numpy as np
from mlagents.envs.brain import BrainParameters, BrainInfo
from mlagents.envs.action_info import ActionInfoOutputs
from mlagents.trainers.brain import BrainParameters, BrainInfo
from mlagents.trainers.action_info import ActionInfoOutputs
from mlagents.envs.timers import timed
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.sac.policy import SACPolicy

else False
)
# Load the replay buffer if load
if load and self.checkpoint_replay_buffer:
try:
self.load_replay_buffer()
except (AttributeError, FileNotFoundError):
LOGGER.warning(
"Replay buffer was unable to load, starting from scratch."
)
LOGGER.debug(
"Loaded update buffer with {} sequences".format(
self.update_buffer.num_experiences
)
)
self.episode_steps = {}
def save_model(self) -> None:

"""
Save the training buffer's update buffer to a pickle file.
"""
filename = os.path.join(self.policy.model_path, "last_replay_buffer.hdf5")
filename = os.path.join(
self.trainer_parameters["model_path"], "last_replay_buffer.hdf5"
)
LOGGER.info("Saving Experience Replay Buffer to {}".format(filename))
with open(filename, "wb") as file_object:
self.update_buffer.save_to_file(file_object)

Loads the last saved replay buffer from a file.
"""
filename = os.path.join(self.policy.model_path, "last_replay_buffer.hdf5")
filename = os.path.join(
self.trainer_parameters["model_path"], "last_replay_buffer.hdf5"
)
LOGGER.info("Loading Experience Replay Buffer from {}".format(filename))
with open(filename, "rb+") as file_object:
self.update_buffer.load_from_file(file_object)

for _reward_signal in policy.reward_signals.keys():
self.collected_rewards[_reward_signal] = {}
# Load the replay buffer if load
if self.load and self.checkpoint_replay_buffer:
try:
self.load_replay_buffer()
except (AttributeError, FileNotFoundError):
LOGGER.warning(
"Replay buffer was unable to load, starting from scratch."
)
LOGGER.debug(
"Loaded update buffer with {} sequences".format(
self.update_buffer.num_experiences
)
)
return policy
def update_sac_policy(self) -> None:

4
ml-agents/mlagents/trainers/tests/test_trainer_controller.py


env_mock.step.return_value = [new_step_info]
env_mock.reset.return_value = [old_step_info]
tc.brain_name_to_identifier[brain_name].add(brain_name)
tc.advance(env_mock)
env_mock.reset.assert_not_called()
env_mock.step.assert_called_once()

env_mock = MagicMock()
env_mock.step.return_value = [new_step_info]
env_mock.reset.return_value = [old_step_info]
tc.brain_name_to_identifier[brain_name].add(brain_name)
tc.advance(env_mock)
env_mock.reset.assert_not_called()

2
ml-agents/mlagents/trainers/trainer_controller.py


if brain_name in self.trainer_metrics:
self.trainer_metrics[brain_name].add_delta_step(delta_time_step)
print(brain_name)
print(self.brain_name_to_identifier[brain_name])
for name_behavior_id in self.brain_name_to_identifier[brain_name]:
if step_info.has_actions_for_brain(name_behavior_id):
trainer.add_experiences(

正在加载...
取消
保存