浏览代码

update

/develop/add-fire/ckpt-2
Ruo-Ping Dong 4 年前
当前提交
523248be
共有 9 个文件被更改,包括 67 次插入44 次删除
  1. 2
      .yamato/com.unity.ml-agents-performance.yml
  2. 5
      ml-agents/mlagents/trainers/ghost/trainer.py
  3. 12
      ml-agents/mlagents/trainers/ppo/trainer.py
  4. 12
      ml-agents/mlagents/trainers/sac/trainer.py
  5. 29
      ml-agents/mlagents/trainers/saver/saver.py
  6. 29
      ml-agents/mlagents/trainers/saver/tf_saver.py
  7. 11
      ml-agents/mlagents/trainers/tests/test_ppo.py
  8. 6
      ml-agents/mlagents/trainers/tests/test_sac.py
  9. 5
      ml-agents/mlagents/trainers/trainer/trainer.py

2
.yamato/com.unity.ml-agents-performance.yml


variables:
UNITY_VERSION: {{ editor.version }}
commands:
- python -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
- python -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade
- unity-downloader-cli -u {{ editor.version }} -c editor --wait --fast
- curl -s https://artifactory.internal.unity3d.com/core-automation/tools/utr-standalone/utr --output utr
- chmod +x ./utr

5
ml-agents/mlagents/trainers/ghost/trainer.py


return policy
def add_policy(
self,
parsed_behavior_id: BehaviorIdentifiers,
policy: Policy,
register_saver: bool = True,
self, parsed_behavior_id: BehaviorIdentifiers, policy: Policy
) -> None:
"""
Adds policy to GhostTrainer.

12
ml-agents/mlagents/trainers/ppo/trainer.py


return policy
def add_policy(
self,
parsed_behavior_id: BehaviorIdentifiers,
policy: Policy,
register_saver: bool = True,
self, parsed_behavior_id: BehaviorIdentifiers, policy: Policy
) -> None:
"""
Adds policy to trainer.

for _reward_signal in self.optimizer.reward_signals.keys():
self.collected_rewards[_reward_signal] = defaultdict(lambda: 0)
if register_saver:
self.saver.register(self.policy)
self.saver.register(self.optimizer)
self.saver.initialize_or_load(self.policy)
self.saver.register(self.policy)
self.saver.register(self.optimizer)
self.saver.initialize_or_load(self.policy)
# Needed to resume loads properly
self.step = policy.get_current_step()

12
ml-agents/mlagents/trainers/sac/trainer.py


self._stats_reporter.add_stat(stat, np.mean(stat_list))
def add_policy(
self,
parsed_behavior_id: BehaviorIdentifiers,
policy: Policy,
register_saver: bool = True,
self, parsed_behavior_id: BehaviorIdentifiers, policy: Policy
) -> None:
"""
Adds policy to trainer.

for _reward_signal in self.optimizer.reward_signals.keys():
self.collected_rewards[_reward_signal] = defaultdict(lambda: 0)
if register_saver:
self.saver.register(self.policy)
self.saver.register(self.optimizer)
self.saver.initialize_or_load(self.policy)
self.saver.register(self.policy)
self.saver.register(self.optimizer)
self.saver.initialize_or_load(self.policy)
# Needed to resume loads properly
self.step = policy.get_current_step()

29
ml-agents/mlagents/trainers/saver/saver.py


def __init__(self):
pass
@abc.abstractmethod
"""
Register the modules to the Saver.
The Saver will store the module and include it in the saved files
when saving checkpoint/exporting graph.
:param module: the module to be registered
"""
pass
def _register_policy(self, policy):
"""
Helper function for registering policy to the Saver.
:param policy: the policy to be registered
"""
pass
def _register_optimizer(self, optimizer):
"""
Helper function for registering optimizer to the Saver.
:param optimizer: the optimizer to be registered
"""
pass
@abc.abstractmethod

@abc.abstractmethod
def initialize_or_load(self, policy):
"""
If there is an initialize path, load from that. Else, load from the set model path.
If load is set to True, don't reset steps to 0. Else, do. This allows a user to,
e.g., resume from an initialize path.
Initialize/Load registered modules by default.
If given input argument policy, do with the input policy instead.
This argument is mainly for the initialization of the ghost trainer's fixed policy.
:param policy (optional): if given, perform the initializing/loading on this input policy.
Otherwise, do with the registered policy
"""
pass

29
ml-agents/mlagents/trainers/saver/tf_saver.py


def register(self, module: Union[TFPolicy, TFOptimizer]) -> None:
if isinstance(module, TFPolicy):
if self.policy is None:
self.policy = module
self.graph = self.policy.graph
self.sess = self.policy.sess
with self.policy.graph.as_default():
self.tf_saver = tf.train.Saver(max_to_keep=self._keep_checkpoints)
self._register_policy(module)
elif isinstance(module, TFOptimizer):
self._register_optimizer(module)
else:
raise UnityPolicyException(
"Registering Object of unsupported type {} to Saver ".format(
type(module)
)
)
def _register_policy(self, policy: TFPolicy) -> None:
if self.policy is None:
self.policy = policy
self.graph = self.policy.graph
self.sess = self.policy.sess
with self.policy.graph.as_default():
self.tf_saver = tf.train.Saver(max_to_keep=self._keep_checkpoints)
def save_checkpoint(self, brain_name: str, step: int) -> str:
checkpoint_path = os.path.join(self.model_path, f"{brain_name}-{step}")

)
def initialize_or_load(self, policy: Optional[TFPolicy] = None) -> None:
# Initialize/Load registered self.policy by default.
# If given input argument policy, use the input policy instead.
# This argument is mainly for initialization of the ghost trainer's fixed policy.
# If there is an initialize path, load from that. Else, load from the set model path.
# If load is set to True, don't reset steps to 0. Else, do. This allows a user to,
# e.g., resume from an initialize path.
if policy is None:
policy = self.policy
policy = cast(TFPolicy, policy)

11
ml-agents/mlagents/trainers/tests/test_ppo.py


import attr
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.trainer.rl_trainer import RLTrainer
from mlagents.trainers.ppo.trainer import PPOTrainer, discount_rewards
from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer
from mlagents.trainers.policy.tf_policy import TFPolicy

)
@mock.patch.object(RLTrainer, "create_saver")
def test_trainer_increment_step(ppo_optimizer, dummy_config):
def test_trainer_increment_step(ppo_optimizer, mock_create_saver):
trainer_params = PPO_CONFIG
mock_optimizer = mock.Mock()
mock_optimizer.reward_signals = {}

)
policy_mock.increment_step = mock.Mock(return_value=step_count)
behavior_id = BehaviorIdentifiers.from_name_behavior_id(trainer.brain_name)
trainer.add_policy(behavior_id, policy_mock, register_saver=False)
trainer.add_policy(behavior_id, policy_mock)
trainer._increment_step(5, trainer.brain_name)
policy_mock.increment_step.assert_called_with(5)

assert trainer.stats_reporter.get_stats_summaries("Policy/Extrinsic Reward").num > 0
@mock.patch.object(RLTrainer, "create_saver")
def test_add_get_policy(ppo_optimizer, dummy_config):
def test_add_get_policy(ppo_optimizer, mock_create_saver, dummy_config):
mock_optimizer = mock.Mock()
mock_optimizer.reward_signals = {}
ppo_optimizer.return_value = mock_optimizer

policy.get_current_step.return_value = 2000
behavior_id = BehaviorIdentifiers.from_name_behavior_id(trainer.brain_name)
trainer.add_policy(behavior_id, policy, register_saver=False)
trainer.add_policy(behavior_id, policy)
assert trainer.get_policy("test_policy") == policy
# Make sure the summary steps were loaded properly

6
ml-agents/mlagents/trainers/tests/test_sac.py


from mlagents.tf_utils import tf
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.trainer.rl_trainer import RLTrainer
from mlagents.trainers.sac.trainer import SACTrainer
from mlagents.trainers.sac.optimizer import SACOptimizer
from mlagents.trainers.policy.tf_policy import TFPolicy

assert trainer2.update_buffer.num_experiences == buffer_len
@mock.patch.object(RLTrainer, "create_saver")
def test_add_get_policy(sac_optimizer, dummy_config):
def test_add_get_policy(sac_optimizer, mock_create_saver, dummy_config):
mock_optimizer = mock.Mock()
mock_optimizer.reward_signals = {}
sac_optimizer.return_value = mock_optimizer

policy.get_current_step.return_value = 2000
behavior_id = BehaviorIdentifiers.from_name_behavior_id(trainer.brain_name)
trainer.add_policy(behavior_id, policy, register_saver=False)
trainer.add_policy(behavior_id, policy)
assert trainer.get_policy(behavior_id.behavior_id) == policy
# Make sure the summary steps were loaded properly

5
ml-agents/mlagents/trainers/trainer/trainer.py


@abc.abstractmethod
def add_policy(
self,
parsed_behavior_id: BehaviorIdentifiers,
policy: Policy,
register_saver: bool = True,
self, parsed_behavior_id: BehaviorIdentifiers, policy: Policy
) -> None:
"""
Adds policy to trainer.

正在加载...
取消
保存