fix whitespace and line breaks

6 年前 · dfdf7b83
--- a/ml-agents/mlagents/trainers/components/bc/module.py
+++ b/ml-agents/mlagents/trainers/components/bc/module.py
        """
        A BC trainer that can be used inline with RL, especially for pretraining.
        :param policy: The policy of the learning model
-        :param policy_learning_rate: The initial Learning Rate of the policy. Used to set an appropriate learning rate for the pretrainer.
+        :param policy_learning_rate: The initial Learning Rate of the policy. Used to set an appropriate learning rate
+            for the pretrainer.
        :param default_batch_size: The default batch size to use if batch_size isn't provided.
        :param default_num_epoch: The default num_epoch to use if num_epoch isn't provided.
        :param strength: The proportion of learning rate used to update through BC.
--- a/ml-agents/mlagents/trainers/components/reward_signals/curiosity/signal.py
+++ b/ml-agents/mlagents/trainers/components/reward_signals/curiosity/signal.py
        :param gamma: The time discounting factor used for this reward.
        :param encoding_size: The size of the hidden encoding layer for the ICM
        :param learning_rate: The learning rate for the ICM.
-        :param num_epoch: The number of epochs to train over the training buffer for the ICM. 
+        :param num_epoch: The number of epochs to train over the training buffer for the ICM.
        """
        super().__init__(policy, strength, gamma)
        self.model = CuriosityModel(
--- a/ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py
+++ b/ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py
        :param learning_rate: The Learning Rate used during GAIL updates.
        :param samples_per_update: The maximum number of samples to update during GAIL updates.
        :param use_actions: Whether or not to use the actions for the discriminator.
-        :param use_vail: Whether or not to use a variational bottleneck for the discriminator. 
+        :param use_vail: Whether or not to use a variational bottleneck for the discriminator.
        See https://arxiv.org/abs/1810.00821.
        """
        super().__init__(policy, strength, gamma)
        cls, config_dict: Dict[str, Any], param_keys: List[str] = None
    ) -> None:
        """
-        Checks the config and throw an exception if a hyperparameter is missing. GAIL requires strength and gamma 
-        at minimum. 
+        Checks the config and throw an exception if a hyperparameter is missing. GAIL requires strength and gamma
+        at minimum.
        """
        param_keys = ["strength", "gamma", "demo_path"]
        super().check_config(config_dict, param_keys)
--- a/ml-agents/mlagents/trainers/ppo/policy.py
+++ b/ml-agents/mlagents/trainers/ppo/policy.py
        Generates value estimates for bootstrapping.
        :param brain_info: BrainInfo to be used for bootstrapping.
        :param idx: Index in BrainInfo of agent.
-        :param done: Whether or not this is the last element of the episode, in which case we want the value estimate to be 0. 
+        :param done: Whether or not this is the last element of the episode, in which case the value estimate will be 0.
        :return: The value estimate dictionary with key being the name of the reward signal and the value the
        corresponding value estimate.
        """