more pylint fixes (#2842)

5 年前 · 4da157fe
--- a/.pylintrc
+++ b/.pylintrc
    # disabled because black handles this
    C0301,C0330,

+    # C0114: Missing module docstring
-    # C0114: Missing module docstring
-    C0115,C0114,
+    # C0116: Missing function or method docstring
+    C0114,C0115,C0116,

    # All convention and refactor for now
    C,R,
    # W0107: Unnecessary pass statement
    W0107,

-    # W0511 TODO
+    # W0511 "TODO"
-
-    # W0201: Attribute '...' defined outside __init__
-    W0201,
-
-    # We should fix these up ASAP
-    # W0221: Parameters differ from overridden
-    W0221,

    # E0401: Unable to import...
    # E0611: No name '...' in module '...'
--- a/ml-agents-envs/mlagents/envs/base_unity_environment.py
+++ b/ml-agents-envs/mlagents/envs/base_unity_environment.py
    def step(
        self,
        vector_action: Optional[Dict] = None,
-        memory: Optional[Dict] = None,
+        custom_action: Dict[str, Any] = None,
    ) -> AllBrainInfo:
        pass

--- a/ml-agents-envs/mlagents/envs/env_manager.py
+++ b/ml-agents-envs/mlagents/envs/env_manager.py
 from abc import ABC, abstractmethod
-from typing import List, Dict, NamedTuple, Optional
+from typing import Any, List, Dict, NamedTuple, Optional
 from mlagents.envs.brain import AllBrainInfo, BrainParameters
 from mlagents.envs.policy import Policy
 from mlagents.envs.action_info import ActionInfo

    @abstractmethod
    def reset(
-        self, config: Dict = None, train_mode: bool = True
+        self,
+        config: Dict = None,
+        train_mode: bool = True,
+        custom_reset_parameters: Any = None,
    ) -> List[EnvironmentStep]:
        pass

--- a/ml-agents/mlagents/trainers/bc/trainer.py
+++ b/ml-agents/mlagents/trainers/bc/trainer.py

    def add_experiences(
        self,
-        curr_info: AllBrainInfo,
-        next_info: AllBrainInfo,
+        curr_all_info: AllBrainInfo,
+        next_all_info: AllBrainInfo,
-        :param curr_info: Current AllBrainInfo (Dictionary of all current brains and corresponding BrainInfo).
-        :param next_info: Next AllBrainInfo (Dictionary of all current brains and corresponding BrainInfo).
+        :param curr_all_info: Current AllBrainInfo (Dictionary of all current brains and corresponding BrainInfo).
+        :param next_all_info: Next AllBrainInfo (Dictionary of all current brains and corresponding BrainInfo).
-        info_student = curr_info[self.brain_name]
-        next_info_student = next_info[self.brain_name]
+        info_student = curr_all_info[self.brain_name]
+        next_info_student = next_all_info[self.brain_name]
        for agent_id in info_student.agents:
            self.evaluation_buffer[agent_id].last_brain_info = info_student

--- a/ml-agents/mlagents/trainers/components/reward_signals/gail/model.py
+++ b/ml-agents/mlagents/trainers/components/reward_signals/gail/model.py
-from typing import Tuple, List
+from typing import List, Optional, Tuple

 import tensorflow as tf
 from mlagents.trainers.models import LearningModel
        self.gradient_penalty_weight = gradient_penalty_weight
        self.use_vail = use_vail
        self.use_actions = use_actions  # True # Not using actions
+
+        self.noise: Optional[tf.Tensor] = None
+        self.z: Optional[tf.Tensor] = None
+
        self.make_inputs()
        self.create_network()
        self.create_loss(learning_rate)
--- a/ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py
+++ b/ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py
    def prepare_update(
        self,
        policy_model: LearningModel,
-        mini_batch_policy: Dict[str, np.ndarray],
+        mini_batch: Dict[str, np.ndarray],
        num_sequences: int,
    ) -> Dict[tf.Tensor, Any]:
        """
        :return: Feed_dict for update process.
        """
        max_num_experiences = min(
-            len(mini_batch_policy["actions"]),
+            len(mini_batch["actions"]),
-        for key, element in mini_batch_policy.items():
-            mini_batch_policy[key] = element[:max_num_experiences]
+        for key, element in mini_batch.items():
+            mini_batch[key] = element[:max_num_experiences]
-            len(mini_batch_policy["actions"]), 1
+            len(mini_batch["actions"]), 1
-            self.model.done_policy_holder: mini_batch_policy["done"],
+            self.model.done_policy_holder: mini_batch["done"],
        }

        if self.model.use_vail:
        if self.policy.use_continuous_act:
-            feed_dict[policy_model.selected_actions] = mini_batch_policy["actions"]
+            feed_dict[policy_model.selected_actions] = mini_batch["actions"]
-            feed_dict[policy_model.action_holder] = mini_batch_policy["actions"]
+            feed_dict[policy_model.action_holder] = mini_batch["actions"]
-                feed_dict[policy_model.visual_in[i]] = mini_batch_policy[
-                    "visual_obs%d" % i
-                ]
+                feed_dict[policy_model.visual_in[i]] = mini_batch["visual_obs%d" % i]
-            feed_dict[policy_model.vector_in] = mini_batch_policy["vector_obs"]
+            feed_dict[policy_model.vector_in] = mini_batch["vector_obs"]
            feed_dict[self.model.obs_in_expert] = mini_batch_demo["vector_obs"]
        self.has_updated = True
        return feed_dict
--- a/ml-agents/mlagents/trainers/models.py
+++ b/ml-agents/mlagents/trainers/models.py
 import logging
 from enum import Enum
-from typing import Callable, List
+from typing import Callable, Dict, List, Optional

 import numpy as np
 import tensorflow as tf
                trainable=False,
                dtype=tf.int32,
            )
+        self.value_heads: Dict[str, tf.Tensor] = {}
+        self.normalization_steps: Optional[tf.Variable] = None
+        self.running_mean: Optional[tf.Variable] = None
+        self.running_variance: Optional[tf.Variable] = None
+        self.update_normalization: Optional[tf.Operation] = None
+        self.value: Optional[tf.Tensor] = None

    @staticmethod
    def create_global_steps():
        :param hidden_input: The last layer of the Critic. The heads will consist of one dense hidden layer on top
        of the hidden input.
        """
-        self.value_heads = {}
        for name in stream_names:
            value = tf.layers.dense(hidden_input, 1, name="{}_value".format(name))
            self.value_heads[name] = value
--- a/ml-agents/mlagents/trainers/ppo/models.py
+++ b/ml-agents/mlagents/trainers/ppo/models.py
 import logging
-import numpy as np
+from typing import Optional
+import numpy as np
+
 from mlagents.trainers.models import LearningModel, EncoderType, LearningRateSchedule

 logger = logging.getLogger("mlagents.trainers")
        LearningModel.__init__(
            self, m_size, normalize, use_recurrent, brain, seed, stream_names
        )
+
+        self.optimizer: Optional[tf.train.AdamOptimizer] = None
+        self.grads = None
+        self.update_batch: Optional[tf.Operation] = None
+
        if num_layers < 1:
            num_layers = 1
        if brain.vector_action_space_type == "continuous":
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
            self.collected_rewards[_reward_signal] = {}

    def process_experiences(
-        self, current_info: AllBrainInfo, new_info: AllBrainInfo
+        self, current_info: AllBrainInfo, next_info: AllBrainInfo
-        :param new_info: Dictionary of all next brains and corresponding BrainInfo.
+        :param next_info: Dictionary of all next brains and corresponding BrainInfo.
-        info = new_info[self.brain_name]
+        info = next_info[self.brain_name]
        if self.is_training:
            self.policy.update_normalization(info.vector_observations)
        for l in range(len(info.agents)):
            number_experiences=buffer_length,
            mean_return=float(np.mean(self.cumulative_returns_since_policy_update)),
        )
-        self.cumulative_returns_since_policy_update = []
+        self.cumulative_returns_since_policy_update.clear()

        # Make sure batch_size is a multiple of sequence length. During training, we
        # will need to reshape the data into a batch_size x sequence_length tensor.
--- a/ml-agents/mlagents/trainers/rl_trainer.py
+++ b/ml-agents/mlagents/trainers/rl_trainer.py
        :param agent_idx: the index of the Agent agent_id
        """
        raise UnityTrainerException(
-            "The process_experiences method was not implemented."
+            "The add_policy_outputs method was not implemented."
        )

    def add_rewards_outputs(
        :param agent_next_idx: the index of the Agent agent_id in the next brain info
        """
        raise UnityTrainerException(
-            "The process_experiences method was not implemented."
+            "The add_rewards_outputs method was not implemented."
        )
--- a/ml-agents/mlagents/trainers/sac/models.py
+++ b/ml-agents/mlagents/trainers/sac/models.py
 import logging
 import numpy as np
-from typing import Optional
+from typing import Dict, List, Optional

 import tensorflow as tf
 from mlagents.trainers.models import LearningModel, LearningRateSchedule, EncoderType
        self.activ_fn = self.swish

        self.policy_memory_in: Optional[tf.Tensor] = None
+        self.policy_memory_out: Optional[tf.Tensor] = None
+        self.value_memory_out: Optional[tf.Tensor] = None
+        self.q1: Optional[tf.Tensor] = None
+        self.q2: Optional[tf.Tensor] = None
+        self.q1_p: Optional[tf.Tensor] = None
+        self.q2_p: Optional[tf.Tensor] = None
+        self.q1_memory_out: Optional[tf.Tensor] = None
+        self.q2_memory_out: Optional[tf.Tensor] = None
+        self.action_holder: Optional[tf.Tensor] = None
+        self.prev_action: Optional[tf.Tensor] = None
+        self.action_masks: Optional[tf.Tensor] = None
+        self.external_action_in: Optional[tf.Tensor] = None
+        self.log_sigma_sq: Optional[tf.Tensor] = None
+        self.entropy: Optional[tf.Tensor] = None
+        self.deterministic_output: Optional[tf.Tensor] = None
+        self.all_log_probs: Optional[tf.Tensor] = None
+        self.normalized_logprobs: Optional[tf.Tensor] = None
+        self.action_probs: Optional[tf.Tensor] = None
+        self.selected_actions: Optional[tf.Tensor] = None
+        self.output: Optional[tf.Tensor] = None
+        self.output_oh: Optional[tf.Tensor] = None
+        self.output_pre: Optional[tf.Tensor] = None
+
+        self.value_vars = None
+        self.q_vars = None
+        self.critic_vars = None
+        self.policy_vars = None
+
+        self.q1_heads: Optional[Dict[str, tf.Tensor]] = None
+        self.q2_heads: Optional[Dict[str, tf.Tensor]] = None
+        self.q1_pheads: Optional[Dict[str, tf.Tensor]] = None
+        self.q2_pheads: Optional[Dict[str, tf.Tensor]] = None

    def get_vars(self, scope):
        return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope)
        :param h_size: size of hidden layers for value network
        :param scope: TF scope for value network.
        """
-        self.value_heads = {}
        with tf.variable_scope(scope):
            value_hidden = self.create_vector_observation_encoder(
                hidden_input, h_size, self.activ_fn, num_layers, "encoder", False
        )
        if num_layers < 1:
            num_layers = 1
+
+        self.target_init_op: List[tf.Tensor] = []
+        self.target_update_op: List[tf.Tensor] = []
+        self.update_batch_policy: Optional[tf.Operation] = None
+        self.update_batch_value: Optional[tf.Operation] = None
+        self.update_batch_entropy: Optional[tf.Operation] = None

        self.policy_network = SACPolicyNetwork(
            brain=brain,
--- a/ml-agents/mlagents/trainers/sac/policy.py
+++ b/ml-agents/mlagents/trainers/sac/policy.py

    @timed
    def update(
-        self, mini_batch: Dict[str, Any], num_sequences: int, update_target: bool = True
+        self, mini_batch: Dict[str, Any], num_sequences: int
    ) -> Dict[str, float]:
        """
        Updates model using buffer.
        update_vals = self._execute_model(feed_dict, self.update_dict)
        for stat_name, update_name in stats_needed.items():
            update_stats[stat_name] = update_vals[update_name]
-        if update_target:
-            self.sess.run(self.model.target_update_op)
+        # Update target network. By default, target update happens at every policy update.
+        self.sess.run(self.model.target_update_op)
        return update_stats

    def update_reward_signals(
--- a/ml-agents/mlagents/trainers/sac/trainer.py
+++ b/ml-agents/mlagents/trainers/sac/trainer.py

 import logging
 from collections import defaultdict
-from typing import List, Dict
+from typing import Dict
 import os

 import numpy as np
        )

    def process_experiences(
-        self, current_info: AllBrainInfo, new_info: AllBrainInfo
+        self, current_info: AllBrainInfo, next_info: AllBrainInfo
-        :param new_info: Dictionary of all next brains and corresponding BrainInfo.
+        :param next_info: Dictionary of all next brains and corresponding BrainInfo.
-        info = new_info[self.brain_name]
+        info = next_info[self.brain_name]
        if self.is_training:
            self.policy.update_normalization(info.vector_observations)
        for l in range(len(info.agents)):
        is greater than 1 and the reward signals are not updated in parallel.
        """

-        self.cumulative_returns_since_policy_update: List[float] = []
+        self.cumulative_returns_since_policy_update.clear()
        n_sequences = max(
            int(self.trainer_parameters["batch_size"] / self.policy.sequence_length), 1
        )
                        "{}_rewards".format(name)
                    ] = signal.evaluate_batch(sampled_minibatch).scaled_reward

-                update_stats = self.policy.update(
-                    sampled_minibatch, n_sequences, update_target=True
-                )
+                update_stats = self.policy.update(sampled_minibatch, n_sequences)
                for stat_name, value in update_stats.items():
                    batch_update_stats[stat_name].append(value)

--- a/ml-agents/mlagents/trainers/trainer.py
+++ b/ml-agents/mlagents/trainers/trainer.py
        :param next_all_info: Dictionary of all current brains and corresponding BrainInfo.
        :param take_action_outputs: The outputs of the Policy's get_action method.
        """
-        raise UnityTrainerException(
-            "The process_experiences method was not implemented."
-        )
+        raise UnityTrainerException("The add_experiences method was not implemented.")

    def process_experiences(
        self, current_info: AllBrainInfo, next_info: AllBrainInfo