Move check for creation into nn_policy

5 年前 · 328476d8
--- a/ml-agents/mlagents/trainers/common/nn_policy.py
+++ b/ml-agents/mlagents/trainers/common/nn_policy.py
        Builds the tensorflow graph needed for this policy.
        """
        with self.graph.as_default():
+            _vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
+            if len(_vars) > 0:
+                # We assume the first thing created in the graph is the Policy. If
+                # already populated, don't create more tensors.
+                return
+
            self.create_input_placeholders()
            if self.use_continuous_act:
                self.create_cc_actor(
--- a/ml-agents/mlagents/trainers/optimizer.py
+++ b/ml-agents/mlagents/trainers/optimizer.py
    def create_tf_optimizer(self, learning_rate, name="Adam"):
        return tf.train.AdamOptimizer(learning_rate=learning_rate, name=name)

-    def _create_policy_tf_graph_if_needed(self, policy):
-        """
-        Creates the policy TF graph. If already created, don't do anything.
-        """
-        with policy.graph.as_default():
-            _vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
-            if len(_vars) == 0:
-                policy.create_tf_graph()
-
    def _execute_model(self, feed_dict, out_dict):
        """
        Executes model.
--- a/ml-agents/mlagents/trainers/ppo/optimizer.py
+++ b/ml-agents/mlagents/trainers/ppo/optimizer.py
        :param trainer_params: Trainer parameters dictionary that specifies the properties of the trainer.
        """
        # Create the graph here to give more granular control of the TF graph to the Optimizer.
-        self._create_policy_tf_graph_if_needed(policy)
+        policy.create_tf_graph()

        with policy.graph.as_default():
            with tf.variable_scope("optimizer/"):
--- a/ml-agents/mlagents/trainers/sac/optimizer.py
+++ b/ml-agents/mlagents/trainers/sac/optimizer.py
        :param m_size: Size of brain memory.
        """
        # Create the graph here to give more granular control of the TF graph to the Optimizer.
-        self._create_policy_tf_graph_if_needed(policy)
+        policy.create_tf_graph()

        with policy.graph.as_default():
            with tf.variable_scope(""):