Rename LearningModel to ModelUtils

5 年前 · 14f2a7f2
--- a/ml-agents/mlagents/trainers/common/nn_policy.py
+++ b/ml-agents/mlagents/trainers/common/nn_policy.py
 from mlagents_envs.base_env import BatchedStepResult
 from mlagents.trainers.brain import BrainParameters
 from mlagents.trainers.models import EncoderType
-from mlagents.trainers.models import LearningModel
+from mlagents.trainers.models import ModelUtils
 from mlagents.trainers.tf_policy import TFPolicy

 logger = logging.getLogger("mlagents.trainers")
        :param resample: Whether we are using the resampling trick to update the policy.
        """
        with tf.variable_scope("policy"):
-            hidden_stream = LearningModel.create_observation_streams(
+            hidden_stream = ModelUtils.create_observation_streams(
                self.visual_in,
                self.processed_vector_in,
                1,
            self.memory_in = tf.placeholder(
                shape=[None, self.m_size], dtype=tf.float32, name="recurrent_in"
            )
-            hidden_policy, memory_policy_out = LearningModel.create_recurrent_encoder(
+            hidden_policy, memory_policy_out = ModelUtils.create_recurrent_encoder(
                hidden_stream,
                self.memory_in,
                self.sequence_length_ph,
                self.act_size[0],
                activation=None,
                name="mu",
-                kernel_initializer=LearningModel.scaled_init(0.01),
+                kernel_initializer=ModelUtils.scaled_init(0.01),
                reuse=tf.AUTO_REUSE,
            )

                    self.act_size[0],
                    activation=None,
                    name="log_sigma",
-                    kernel_initializer=LearningModel.scaled_init(0.01),
+                    kernel_initializer=ModelUtils.scaled_init(0.01),
                )
            else:
                log_sigma = tf.get_variable(
        :param vis_encode_type: Type of visual encoder to use if visual input.
        """
        with tf.variable_scope("policy"):
-            hidden_stream = LearningModel.create_observation_streams(
+            hidden_stream = ModelUtils.create_observation_streams(
                self.visual_in,
                self.processed_vector_in,
                1,
            self.memory_in = tf.placeholder(
                shape=[None, self.m_size], dtype=tf.float32, name="recurrent_in"
            )
-            hidden_policy, memory_policy_out = LearningModel.create_recurrent_encoder(
+            hidden_policy, memory_policy_out = ModelUtils.create_recurrent_encoder(
                hidden_policy,
                self.memory_in,
                self.sequence_length_ph,
                        size,
                        activation=None,
                        use_bias=False,
-                        kernel_initializer=LearningModel.scaled_init(0.01),
+                        kernel_initializer=ModelUtils.scaled_init(0.01),
                    )
                )

            shape=[None, sum(self.act_size)], dtype=tf.float32, name="action_masks"
        )
-        output, self.action_probs, normalized_logits = LearningModel.create_discrete_action_masking_layer(
+        output, self.action_probs, normalized_logits = ModelUtils.create_discrete_action_masking_layer(
            raw_log_probs, self.action_masks, self.act_size
        )

--- a/ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py
+++ b/ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py
 from typing import List, Tuple
 from mlagents.tf_utils import tf

-from mlagents.trainers.models import LearningModel
+from mlagents.trainers.models import ModelUtils
 from mlagents.trainers.tf_policy import TFPolicy


            next_visual_encoders = []
            for i in range(self.policy.vis_obs_size):
                # Create input ops for next (t+1) visual observations.
-                next_visual_input = LearningModel.create_visual_input(
+                next_visual_input = ModelUtils.create_visual_input(
                    self.policy.brain.camera_resolutions[i],
                    name="curiosity_next_visual_observation_" + str(i),
                )
                # Note that these encoders are siamese.
-                encoded_visual = LearningModel.create_visual_observation_encoder(
+                encoded_visual = ModelUtils.create_visual_observation_encoder(
-                    LearningModel.swish,
+                    ModelUtils.swish,
-                encoded_next_visual = LearningModel.create_visual_observation_encoder(
+                encoded_next_visual = ModelUtils.create_visual_observation_encoder(
-                    LearningModel.swish,
+                    ModelUtils.swish,
                    1,
                    "curiosity_stream_{}_visual_obs_encoder".format(i),
                    True,
                name="curiosity_next_vector_observation",
            )

-            encoded_vector_obs = LearningModel.create_vector_observation_encoder(
+            encoded_vector_obs = ModelUtils.create_vector_observation_encoder(
-                LearningModel.swish,
+                ModelUtils.swish,
-            encoded_next_vector_obs = LearningModel.create_vector_observation_encoder(
+            encoded_next_vector_obs = ModelUtils.create_vector_observation_encoder(
-                LearningModel.swish,
+                ModelUtils.swish,
                2,
                "curiosity_vector_obs_encoder",
                True,
        :param encoded_next_state: Tensor corresponding to encoded next state.
        """
        combined_input = tf.concat([encoded_state, encoded_next_state], axis=1)
-        hidden = tf.layers.dense(combined_input, 256, activation=LearningModel.swish)
+        hidden = tf.layers.dense(combined_input, 256, activation=ModelUtils.swish)
        if self.policy.brain.vector_action_space_type == "continuous":
            pred_action = tf.layers.dense(
                hidden, self.policy.act_size[0], activation=None
        combined_input = tf.concat(
            [encoded_state, self.policy.selected_actions], axis=1
        )
-        hidden = tf.layers.dense(combined_input, 256, activation=LearningModel.swish)
+        hidden = tf.layers.dense(combined_input, 256, activation=ModelUtils.swish)
        pred_next_state = tf.layers.dense(
            hidden,
            self.encoding_size
--- a/ml-agents/mlagents/trainers/components/reward_signals/gail/model.py
+++ b/ml-agents/mlagents/trainers/components/reward_signals/gail/model.py
 from mlagents.tf_utils import tf

 from mlagents.trainers.tf_policy import TFPolicy
-from mlagents.trainers.models import LearningModel
+from mlagents.trainers.models import ModelUtils

 EPSILON = 1e-7

            )
            if self.policy.normalize:
                encoded_expert_list.append(
-                    LearningModel.normalize_vector_obs(
+                    ModelUtils.normalize_vector_obs(
                        self.obs_in_expert,
                        self.policy.running_mean,
                        self.policy.running_variance,
            visual_expert_encoders = []
            for i in range(self.policy.vis_obs_size):
                # Create input ops for next (t+1) visual observations.
-                visual_input = LearningModel.create_visual_input(
+                visual_input = ModelUtils.create_visual_input(
-                encoded_policy_visual = LearningModel.create_visual_observation_encoder(
+                encoded_policy_visual = ModelUtils.create_visual_observation_encoder(
-                    LearningModel.swish,
+                    ModelUtils.swish,
-                encoded_expert_visual = LearningModel.create_visual_observation_encoder(
+                encoded_expert_visual = ModelUtils.create_visual_observation_encoder(
-                    LearningModel.swish,
+                    ModelUtils.swish,
                    1,
                    "gail_stream_{}_visual_obs_encoder".format(i),
                    True,
            hidden_1 = tf.layers.dense(
                concat_input,
                self.h_size,
-                activation=LearningModel.swish,
+                activation=ModelUtils.swish,
                name="gail_d_hidden_1",
                reuse=reuse,
            )
                self.h_size,
-                activation=LearningModel.swish,
+                activation=ModelUtils.swish,
                name="gail_d_hidden_2",
                reuse=reuse,
            )
                    self.z_size,
                    reuse=reuse,
                    name="gail_z_mean",
-                    kernel_initializer=LearningModel.scaled_init(0.01),
+                    kernel_initializer=ModelUtils.scaled_init(0.01),
                )

                self.noise = tf.random_normal(tf.shape(z_mean), dtype=tf.float32)
--- a/ml-agents/mlagents/trainers/models.py
+++ b/ml-agents/mlagents/trainers/models.py
    running_variance: tf.Tensor


-class LearningModel:
+class ModelUtils:
    # Minimum supported side for each encoder type. If refactoring an encoder, please
    # adjust these also.
    MIN_RESOLUTION_FOR_ENCODER = {
        """
        visual_in: List[tf.Tensor] = []
        for i, camera_resolution in enumerate(camera_resolutions):
-            visual_input = LearningModel.create_visual_input(
+            visual_input = ModelUtils.create_visual_input(
                camera_resolution, name="visual_observation_" + str(i)
            )
            visual_in.append(visual_input)
            dtype=tf.float32,
            initializer=tf.ones_initializer(),
        )
-        update_normalization = LearningModel.create_normalizer_update(
+        update_normalization = ModelUtils.create_normalizer_update(
            vector_obs, steps, running_mean, running_variance
        )
        return NormalizerTensors(
            hidden = tf.layers.flatten(conv2)

        with tf.variable_scope(scope + "/" + "flat_encoding"):
-            hidden_flat = LearningModel.create_vector_observation_encoder(
+            hidden_flat = ModelUtils.create_vector_observation_encoder(
                hidden, h_size, activation, num_layers, scope, reuse
            )
        return hidden_flat
            hidden = tf.layers.flatten(conv3)

        with tf.variable_scope(scope + "/" + "flat_encoding"):
-            hidden_flat = LearningModel.create_vector_observation_encoder(
+            hidden_flat = ModelUtils.create_vector_observation_encoder(
                hidden, h_size, activation, num_layers, scope, reuse
            )
        return hidden_flat
            hidden = tf.layers.flatten(hidden)

        with tf.variable_scope(scope + "/" + "flat_encoding"):
-            hidden_flat = LearningModel.create_vector_observation_encoder(
+            hidden_flat = ModelUtils.create_vector_observation_encoder(
                hidden, h_size, activation, num_layers, scope, reuse
            )
        return hidden_flat
        ENCODER_FUNCTION_BY_TYPE = {
-            EncoderType.SIMPLE: LearningModel.create_visual_observation_encoder,
-            EncoderType.NATURE_CNN: LearningModel.create_nature_cnn_visual_observation_encoder,
-            EncoderType.RESNET: LearningModel.create_resnet_visual_observation_encoder,
+            EncoderType.SIMPLE: ModelUtils.create_visual_observation_encoder,
+            EncoderType.NATURE_CNN: ModelUtils.create_nature_cnn_visual_observation_encoder,
+            EncoderType.RESNET: ModelUtils.create_resnet_visual_observation_encoder,
-            encoder_type, LearningModel.create_visual_observation_encoder
+            encoder_type, ModelUtils.create_visual_observation_encoder
        )

    @staticmethod
    def _check_resolution_for_encoder(
        vis_in: tf.Tensor, vis_encoder_type: EncoderType
    ) -> None:
-        min_res = LearningModel.MIN_RESOLUTION_FOR_ENCODER[vis_encoder_type]
+        min_res = ModelUtils.MIN_RESOLUTION_FOR_ENCODER[vis_encoder_type]
        height = vis_in.shape[1]
        width = vis_in.shape[2]
        if height < min_res or width < min_res:
            the scopes for each of the streams. None if all under the same TF scope.
        :return: List of encoded streams.
        """
-        activation_fn = LearningModel.swish
+        activation_fn = ModelUtils.swish
-            create_encoder_func = LearningModel.get_encoder_for_type(vis_encode_type)
+            create_encoder_func = ModelUtils.get_encoder_for_type(vis_encode_type)

            visual_encoders = []
            hidden_state, hidden_visual = None, None
-                    LearningModel._check_resolution_for_encoder(vis_in, vis_encode_type)
+                    ModelUtils._check_resolution_for_encoder(vis_in, vis_encode_type)
                    encoded_visual = create_encoder_func(
                        vis_in,
                        h_size,
                    visual_encoders.append(encoded_visual)
                hidden_visual = tf.concat(visual_encoders, axis=1)
            if vector_in.get_shape()[-1] > 0:  # Don't encode 0-shape inputs
-                hidden_state = LearningModel.create_vector_observation_encoder(
+                hidden_state = ModelUtils.create_vector_observation_encoder(
                    vector_observation_input,
                    h_size,
                    activation_fn,
--- a/ml-agents/mlagents/trainers/ppo/optimizer.py
+++ b/ml-agents/mlagents/trainers/ppo/optimizer.py
 import numpy as np
 from mlagents.tf_utils import tf
 from mlagents_envs.timers import timed
-from mlagents.trainers.models import LearningModel, EncoderType, LearningRateSchedule
+from mlagents.trainers.models import ModelUtils, EncoderType, LearningRateSchedule
 from mlagents.trainers.tf_policy import TFPolicy
 from mlagents.trainers.common.tf_optimizer import TFOptimizer
 from mlagents.trainers.buffer import AgentBuffer
                else:
                    self._create_dc_critic(h_size, num_layers, vis_encode_type)

-                self.learning_rate = LearningModel.create_learning_rate(
+                self.learning_rate = ModelUtils.create_learning_rate(
                    lr_schedule, lr, self.policy.global_step, int(max_step)
                )
                self._create_losses(
                    lr,
                    max_step,
                )
-                self.create_ppo_optimizer()
+                self._create_ppo_optimizer_ops()

            self.update_dict.update(
                {
        :param num_layers: Number of hidden linear layers.
        :param vis_encode_type: The type of visual encoder to use.
        """
-        hidden_stream = LearningModel.create_observation_streams(
+        hidden_stream = ModelUtils.create_observation_streams(
            self.policy.visual_in,
            self.policy.processed_vector_in,
            1,
        )[0]

        if self.policy.use_recurrent:
-            hidden_value, memory_value_out = LearningModel.create_recurrent_encoder(
+            hidden_value, memory_value_out = ModelUtils.create_recurrent_encoder(
                hidden_stream,
                self.memory_in,
                self.policy.sequence_length_ph,
        else:
            hidden_value = hidden_stream

-        self.value_heads, self.value = LearningModel.create_value_heads(
+        self.value_heads, self.value = ModelUtils.create_value_heads(
            self.stream_names, hidden_value
        )
        self.all_old_log_probs = tf.placeholder(
        :param num_layers: Number of hidden linear layers.
        :param vis_encode_type: The type of visual encoder to use.
        """
-        hidden_stream = LearningModel.create_observation_streams(
+        hidden_stream = ModelUtils.create_observation_streams(
            self.policy.visual_in,
            self.policy.processed_vector_in,
            1,
        )[0]

        if self.policy.use_recurrent:
-            hidden_value, memory_value_out = LearningModel.create_recurrent_encoder(
+            hidden_value, memory_value_out = ModelUtils.create_recurrent_encoder(
                hidden_stream,
                self.memory_in,
                self.policy.sequence_length_ph,
        else:
            hidden_value = hidden_stream

-        self.value_heads, self.value = LearningModel.create_value_heads(
+        self.value_heads, self.value = ModelUtils.create_value_heads(
            self.stream_names, hidden_value
        )

            name="old_probabilities",
        )
-        _, _, old_normalized_logits = LearningModel.create_discrete_action_masking_layer(
+        _, _, old_normalized_logits = ModelUtils.create_discrete_action_masking_layer(
            self.all_old_log_probs, self.policy.action_masks, self.policy.act_size
        )

            * tf.reduce_mean(tf.dynamic_partition(entropy, self.policy.mask, 2)[1])
        )

-    def create_ppo_optimizer(self):
+    def _create_ppo_optimizer_ops(self):
        self.tf_optimizer = self.create_optimizer_op(self.learning_rate)
        self.grads = self.tf_optimizer.compute_gradients(self.loss)
        self.update_batch = self.tf_optimizer.minimize(self.loss)
        :param num_sequences: Number of sequences to process.
        :return: Results of update.
        """
-        feed_dict = self.construct_feed_dict(batch, num_sequences)
+        feed_dict = self._construct_feed_dict(batch, num_sequences)
        stats_needed = self.stats_name_to_update_name
        update_stats = {}
        # Collect feed dicts for all reward signals.
            update_stats[stat_name] = update_vals[update_name]
        return update_stats

-    def construct_feed_dict(
+    def _construct_feed_dict(
        self, mini_batch: AgentBuffer, num_sequences: int
    ) -> Dict[tf.Tensor, Any]:
        # Do an optional burn-in for memories
--- a/ml-agents/mlagents/trainers/sac/network.py
+++ b/ml-agents/mlagents/trainers/sac/network.py

 from mlagents.tf_utils import tf

-from mlagents.trainers.models import LearningModel, EncoderType
+from mlagents.trainers.models import ModelUtils, EncoderType

 LOG_STD_MAX = 2
 LOG_STD_MIN = -20
        self.num_layers = num_layers
        self.stream_names = stream_names
        self.h_size = h_size
-        self.activ_fn = LearningModel.swish
+        self.activ_fn = ModelUtils.swish

        self.sequence_length_ph = tf.placeholder(
            shape=None, dtype=tf.int32, name="sac_sequence_length"
        :param scope: TF scope for value network.
        """
        with tf.variable_scope(scope):
-            value_hidden = LearningModel.create_vector_observation_encoder(
+            value_hidden = ModelUtils.create_vector_observation_encoder(
-                value_hidden, memory_out = LearningModel.create_recurrent_encoder(
+                value_hidden, memory_out = ModelUtils.create_recurrent_encoder(
                    value_hidden,
                    self.value_memory_in,
                    self.sequence_length_ph,
        :param num_outputs: Number of outputs of each Q function. If discrete, equal to number of actions.
        """
        with tf.variable_scope(self.join_scopes(scope, "q1_encoding"), reuse=reuse):
-            q1_hidden = LearningModel.create_vector_observation_encoder(
+            q1_hidden = ModelUtils.create_vector_observation_encoder(
-                q1_hidden, memory_out = LearningModel.create_recurrent_encoder(
+                q1_hidden, memory_out = ModelUtils.create_recurrent_encoder(
                    q1_hidden,
                    self.q1_memory_in,
                    self.sequence_length_ph,

            q1 = tf.reduce_mean(list(q1_heads.values()), axis=0)
        with tf.variable_scope(self.join_scopes(scope, "q2_encoding"), reuse=reuse):
-            q2_hidden = LearningModel.create_vector_observation_encoder(
+            q2_hidden = ModelUtils.create_vector_observation_encoder(
-                q2_hidden, memory_out = LearningModel.create_recurrent_encoder(
+                q2_hidden, memory_out = ModelUtils.create_recurrent_encoder(
                    q2_hidden,
                    self.q2_memory_in,
                    self.sequence_length_ph,
            vis_encode_type,
        )
        with tf.variable_scope(TARGET_SCOPE):
-            self.visual_in = LearningModel.create_visual_input_placeholders(
+            self.visual_in = ModelUtils.create_visual_input_placeholders(
-            self.vector_in = LearningModel.create_vector_input(policy.vec_obs_size)
+            self.vector_in = ModelUtils.create_vector_input(policy.vec_obs_size)
-                normalization_tensors = LearningModel.create_normalizer(self.vector_in)
+                normalization_tensors = ModelUtils.create_normalizer(self.vector_in)
-                self.processed_vector_in = LearningModel.normalize_vector_obs(
+                self.processed_vector_in = ModelUtils.normalize_vector_obs(
                    self.vector_in,
                    self.running_mean,
                    self.running_variance,
                    shape=[None, m_size], dtype=tf.float32, name="target_recurrent_in"
                )
                self.value_memory_in = self.memory_in
-            hidden_streams = LearningModel.create_observation_streams(
+            hidden_streams = ModelUtils.create_observation_streams(
                self.visual_in,
                self.processed_vector_in,
                1,
            vis_encode_type,
        )
        if self.policy.use_recurrent:
-            self.create_memory_ins(m_size)
+            self._create_memory_ins(m_size)
-        hidden_critic = self.create_observation_in(vis_encode_type)
+        hidden_critic = self._create_observation_in(vis_encode_type)
        self.policy.output = self.policy.output
        # Use the sequence length of the policy
        self.sequence_length_ph = self.policy.sequence_length_ph
            mem_outs = [self.value_memory_out, self.q1_memory_out, self.q2_memory_out]
            self.memory_out = tf.concat(mem_outs, axis=1)

-    def create_memory_ins(self, m_size):
+    def _create_memory_ins(self, m_size):
        """
        Creates the memory input placeholders for LSTM.
        :param m_size: the total size of the memory.
        self.q1_memory_in = mem_ins[1]
        self.q2_memory_in = mem_ins[2]

-    def create_observation_in(self, vis_encode_type):
+    def _create_observation_in(self, vis_encode_type):
        """
        Creates the observation inputs, and a CNN if needed,
        :param vis_encode_type: Type of CNN encoder.
        """
        with tf.variable_scope(POLICY_SCOPE):
-            hidden_streams = LearningModel.create_observation_streams(
+            hidden_streams = ModelUtils.create_observation_streams(
                self.policy.visual_in,
                self.policy.processed_vector_in,
                1,
--- a/ml-agents/mlagents/trainers/sac/optimizer.py
+++ b/ml-agents/mlagents/trainers/sac/optimizer.py
 from mlagents.tf_utils import tf

 from mlagents.trainers.sac.network import SACPolicyNetwork, SACTargetNetwork
-from mlagents.trainers.models import LearningRateSchedule, EncoderType, LearningModel
+from mlagents.trainers.models import LearningRateSchedule, EncoderType, ModelUtils
 from mlagents.trainers.common.tf_optimizer import TFOptimizer
 from mlagents.trainers.tf_policy import TFPolicy
 from mlagents.trainers.buffer import AgentBuffer
                )
                # The optimizer's m_size is 3 times the policy (Q1, Q2, and Value)
                self.m_size = 3 * self.policy.m_size
-                self.create_inputs_and_outputs()
-                self.learning_rate = LearningModel.create_learning_rate(
+                self._create_inputs_and_outputs()
+                self.learning_rate = ModelUtils.create_learning_rate(
                    lr_schedule, lr, self.policy.global_step, int(max_step)
                )
                self._create_losses(
                    stream_names,
                    discrete=not self.policy.use_continuous_act,
                )
-                self.create_sac_optimizers()
+                self._create_sac_optimizer_ops()

                self.selected_actions = (
                    self.policy.selected_actions
            "learning_rate": self.learning_rate,
        }

-    def create_inputs_and_outputs(self) -> None:
+    def _create_inputs_and_outputs(self) -> None:
        """
        Assign the higher-level SACModel's inputs and outputs to those of its policy or
        target network.

        for name in stream_names:
            if discrete:
-                _branched_mpq1 = self.apply_as_branches(
+                _branched_mpq1 = self._apply_as_branches(
                    self.policy_network.q1_pheads[name] * discrete_action_probs
                )
                branched_mpq1 = tf.stack(
                )
                _q1_p_mean = tf.reduce_mean(branched_mpq1, axis=0)

-                _branched_mpq2 = self.apply_as_branches(
+                _branched_mpq2 = self._apply_as_branches(
                    self.policy_network.q2_pheads[name] * discrete_action_probs
                )
                branched_mpq2 = tf.stack(

            if discrete:
                # We need to break up the Q functions by branch, and update them individually.
-                branched_q1_stream = self.apply_as_branches(
+                branched_q1_stream = self._apply_as_branches(
-                branched_q2_stream = self.apply_as_branches(
+                branched_q2_stream = self._apply_as_branches(
                    self.policy.action_oh * q2_streams[name]
                )

        self.ent_coef = tf.exp(self.log_ent_coef)
        if discrete:
            # We also have to do a different entropy and target_entropy per branch.
-            branched_per_action_ent = self.apply_as_branches(per_action_entropy)
+            branched_per_action_ent = self._apply_as_branches(per_action_entropy)
            branched_ent_sums = tf.stack(
                [
                    tf.reduce_sum(_lp, axis=1, keep_dims=True) + _te
            # Same with policy loss, we have to do the loss per branch and average them,
            # so that larger branches don't get more weight.
            # The equivalent KL divergence from Eq 10 of Haarnoja et al. is also pi*log(pi) - Q
-            branched_q_term = self.apply_as_branches(
+            branched_q_term = self._apply_as_branches(
                discrete_action_probs * self.policy_network.q1_p
            )


        self.entropy = self.policy_network.entropy

-    def apply_as_branches(self, concat_logits: tf.Tensor) -> List[tf.Tensor]:
+    def _apply_as_branches(self, concat_logits: tf.Tensor) -> List[tf.Tensor]:
        """
        Takes in a concatenated set of logits and breaks it up into a list of non-concatenated logits, one per
        action branch
        ]
        return branches_logits

-    def create_sac_optimizers(self) -> None:
+    def _create_sac_optimizer_ops(self) -> None:
        """
        Creates the Adam optimizers and update ops for SAC, including
        the policy, value, and entropy updates, as well as the target network update.
            indexed by name. If none, don't update the reward signals.
        :return: Output from update process.
        """
-        feed_dict = self.construct_feed_dict(self.policy, batch, num_sequences)
+        feed_dict = self._construct_feed_dict(self.policy, batch, num_sequences)
        stats_needed = self.stats_name_to_update_name
        update_stats: Dict[str, float] = {}
        update_vals = self._execute_model(feed_dict, self.update_dict)
            update_dict.update(self.reward_signals[name].update_dict)
            stats_needed.update(self.reward_signals[name].stats_name_to_update_name)

-    def construct_feed_dict(
+    def _construct_feed_dict(
        self, policy: TFPolicy, batch: AgentBuffer, num_sequences: int
    ) -> Dict[tf.Tensor, Any]:
        """
--- a/ml-agents/mlagents/trainers/tests/test_nn_policy.py
+++ b/ml-agents/mlagents/trainers/tests/test_nn_policy.py
 import yaml

 from mlagents.trainers.common.nn_policy import NNPolicy
-from mlagents.trainers.models import EncoderType, LearningModel
+from mlagents.trainers.models import EncoderType, ModelUtils
 from mlagents.trainers.exception import UnityTrainerException
 from mlagents.trainers.brain import BrainParameters, CameraResolution
 from mlagents.trainers.tests import mock_brain as mb

 def test_min_visual_size():
    # Make sure each EncoderType has an entry in MIS_RESOLUTION_FOR_ENCODER
-    assert set(LearningModel.MIN_RESOLUTION_FOR_ENCODER.keys()) == set(EncoderType)
+    assert set(ModelUtils.MIN_RESOLUTION_FOR_ENCODER.keys()) == set(EncoderType)
-            good_size = LearningModel.MIN_RESOLUTION_FOR_ENCODER[encoder_type]
+            good_size = ModelUtils.MIN_RESOLUTION_FOR_ENCODER[encoder_type]
-            vis_input = LearningModel.create_visual_input(
-                good_res, "test_min_visual_size"
-            )
-            LearningModel._check_resolution_for_encoder(vis_input, encoder_type)
-            enc_func = LearningModel.get_encoder_for_type(encoder_type)
-            enc_func(vis_input, 32, LearningModel.swish, 1, "test", False)
+            vis_input = ModelUtils.create_visual_input(good_res, "test_min_visual_size")
+            ModelUtils._check_resolution_for_encoder(vis_input, encoder_type)
+            enc_func = ModelUtils.get_encoder_for_type(encoder_type)
+            enc_func(vis_input, 32, ModelUtils.swish, 1, "test", False)
-                bad_size = LearningModel.MIN_RESOLUTION_FOR_ENCODER[encoder_type] - 1
+                bad_size = ModelUtils.MIN_RESOLUTION_FOR_ENCODER[encoder_type] - 1
-                vis_input = LearningModel.create_visual_input(
+                vis_input = ModelUtils.create_visual_input(
-                    LearningModel._check_resolution_for_encoder(vis_input, encoder_type)
+                    ModelUtils._check_resolution_for_encoder(vis_input, encoder_type)
-                enc_func = LearningModel.get_encoder_for_type(encoder_type)
-                enc_func(vis_input, 32, LearningModel.swish, 1, "test", False)
+                enc_func = ModelUtils.get_encoder_for_type(encoder_type)
+                enc_func(vis_input, 32, ModelUtils.swish, 1, "test", False)


 if __name__ == "__main__":
--- a/ml-agents/mlagents/trainers/tests/test_ppo.py
+++ b/ml-agents/mlagents/trainers/tests/test_ppo.py
 NUM_AGENTS = 12


-def create_ppo_optimizer_mock(dummy_config, use_rnn, use_discrete, use_visual):
+def _create_ppo_optimizer_ops_mock(dummy_config, use_rnn, use_discrete, use_visual):
    mock_brain = mb.setup_mock_brain(
        use_discrete,
        use_visual,
 def test_ppo_optimizer_update(dummy_config, rnn, visual, discrete):
    # Test evaluate
    tf.reset_default_graph()
-    optimizer = create_ppo_optimizer_mock(
+    optimizer = _create_ppo_optimizer_ops_mock(
        dummy_config, use_rnn=rnn, use_discrete=discrete, use_visual=visual
    )
    # Test update
--- a/ml-agents/mlagents/trainers/tf_policy.py
+++ b/ml-agents/mlagents/trainers/tf_policy.py
 from mlagents.trainers.trajectory import SplitObservations
 from mlagents.trainers.brain_conversion_utils import get_global_agent_id
 from mlagents_envs.base_env import BatchedStepResult
-from mlagents.trainers.models import LearningModel
+from mlagents.trainers.models import ModelUtils


 logger = logging.getLogger("mlagents.trainers")
    def create_input_placeholders(self):
        with self.graph.as_default():
            self.global_step, self.increment_step_op, self.steps_to_increment = (
-                LearningModel.create_global_steps()
+                ModelUtils.create_global_steps()
-            self.visual_in = LearningModel.create_visual_input_placeholders(
+            self.visual_in = ModelUtils.create_visual_input_placeholders(
-            self.vector_in = LearningModel.create_vector_input(self.vec_obs_size)
+            self.vector_in = ModelUtils.create_vector_input(self.vec_obs_size)
-                normalization_tensors = LearningModel.create_normalizer(self.vector_in)
+                normalization_tensors = ModelUtils.create_normalizer(self.vector_in)
-                self.processed_vector_in = LearningModel.normalize_vector_obs(
+                self.processed_vector_in = ModelUtils.normalize_vector_obs(
                    self.vector_in,
                    self.running_mean,
                    self.running_variance,