ml-agents/ml-agents/mlagents/trainers/saver/tf_saver.py


								from typing import Tuple

								from distutils.version import LooseVersion

								from mlagents_envs.exception import UnityException

								from mlagents_envs.logging_util import get_logger

								from mlagents.tf_utils import tf

								from mlagents.trainers.saver.saver import Saver

								from mlagents.trainers.tf.model_serialization import export_policy_model

								from mlagents_envs.base_env import BehaviorSpec

								from mlagents.trainers.settings import TrainerSettings

								from mlagents.trainers.policy.tf_policy import TFPolicy

								from mlagents.trainers import __version__


								logger = get_logger(__name__)


								class TFSaver(Saver):

								    """

								    Saver class for TensorFlow

								    """

								    def __init__(

								        self,

								        policy: TFPolicy,

								        trainer_settings: TrainerSettings,

								        model_path: str,

								        load: bool = False,

								    ):

								        super().__init__()

								        self.policy = policy

								        self.model_path = model_path

								        self.initialize_path = trainer_settings.init_path

								        self._keep_checkpoints = trainer_settings.keep_checkpoints

								        self.load = load


								        self.graph = self.policy.graph

								        self.sess = self.policy.sess

								        with self.graph.as_default():

								            self.saver = tf.train.Saver(max_to_keep=self._keep_checkpoints)


								    def register(self, module_dict):

								        pass


								    def save_checkpoint(self, checkpoint_path: str, brain_name: str) -> None:

								        """

								        Checkpoints the policy on disk.


								        :param checkpoint_path: filepath to write the checkpoint

								        :param brain_name: Brain name of brain to be trained

								        """

								        print('save checkpoint_path:', checkpoint_path)

								        # Save the TF checkpoint and graph definition

								        with self.graph.as_default():

								            if self.saver:

								                self.saver.save(self.sess, f"{checkpoint_path}.ckpt")

								            tf.train.write_graph(

								                self.graph, self.model_path, "raw_graph_def.pb", as_text=False

								            )

								        # also save the policy so we have optimized model files for each checkpoint

								        self.export(checkpoint_path, brain_name)


								    def export(self, output_filepath: str, brain_name: str) -> None:

								        """

								        Saves the serialized model, given a path and brain name.


								        This method will save the policy graph to the given filepath.  The path

								        should be provided without an extension as multiple serialized model formats

								        may be generated as a result.


								        :param output_filepath: path (without suffix) for the model file(s)

								        :param brain_name: Brain name of brain to be trained.

								        """

								        print('export output_filepath:', output_filepath)

								        export_policy_model(output_filepath, brain_name, self.graph, self.sess)


								    def maybe_load(self):

								        # If there is an initialize path, load from that. Else, load from the set model path.

								        # If load is set to True, don't reset steps to 0. Else, do. This allows a user to,

								        # e.g., resume from an initialize path.

								        reset_steps = not self.load

								        if self.initialize_path is not None:

								            self._load_graph(self.initialize_path, reset_global_steps=reset_steps)

								        elif self.load:

								            self._load_graph(self.model_path, reset_global_steps=reset_steps)

								        else:

								            self.policy._initialize_graph()


								    def _load_graph(self, model_path: str, reset_global_steps: bool = False) -> None:

								        print('load model_path:', model_path)

								        with self.graph.as_default():

								            logger.info(f"Loading model from {model_path}.")

								            ckpt = tf.train.get_checkpoint_state(model_path)

								            if ckpt is None:

								                raise UnityPolicyException(

								                    "The model {} could not be loaded. Make "

								                    "sure you specified the right "

								                    "--run-id and that the previous run you are loading from had the same "

								                    "behavior names.".format(model_path)

								                )

								            try:

								                self.saver.restore(self.sess, ckpt.model_checkpoint_path)

								            except tf.errors.NotFoundError:

								                raise UnityPolicyException(

								                    "The model {} was found but could not be loaded. Make "

								                    "sure the model is from the same version of ML-Agents, has the same behavior parameters, "

								                    "and is using the same trainer configuration as the current run.".format(

								                        model_path

								                    )

								                )

								            self._check_model_version(__version__)

								            if reset_global_steps:

								                self.policy._set_step(0)

								                logger.info(

								                    "Starting training from step 0 and saving to {}.".format(

								                        self.model_path

								                    )

								                )

								            else:

								                logger.info(f"Resuming training from step {self.policy.get_current_step()}.")


								    def _check_model_version(self, version: str) -> None:

								        """

								        Checks whether the model being loaded was created with the same version of

								        ML-Agents, and throw a warning if not so.

								        """

								        if self.policy.version_tensors is not None:

								            loaded_ver = tuple(

								                num.eval(session=self.sess) for num in self.policy.version_tensors

								            )

								            if loaded_ver != TFPolicy._convert_version_string(version):

								                logger.warning(

								                    f"The model checkpoint you are loading from was saved with ML-Agents version "

								                    f"{loaded_ver[0]}.{loaded_ver[1]}.{loaded_ver[2]} but your current ML-Agents"

								                    f"version is {version}. Model may not behave properly."

								                )