浏览代码

more pylint fixes (#2842)

/develop-newnormalization
GitHub 5 年前
当前提交
4da157fe
共有 14 个文件被更改,包括 101 次插入57 次删除
  1. 14
      .pylintrc
  2. 2
      ml-agents-envs/mlagents/envs/base_unity_environment.py
  3. 7
      ml-agents-envs/mlagents/envs/env_manager.py
  4. 12
      ml-agents/mlagents/trainers/bc/trainer.py
  5. 6
      ml-agents/mlagents/trainers/components/reward_signals/gail/model.py
  6. 22
      ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py
  7. 9
      ml-agents/mlagents/trainers/models.py
  8. 9
      ml-agents/mlagents/trainers/ppo/models.py
  9. 8
      ml-agents/mlagents/trainers/ppo/trainer.py
  10. 4
      ml-agents/mlagents/trainers/rl_trainer.py
  11. 41
      ml-agents/mlagents/trainers/sac/models.py
  12. 6
      ml-agents/mlagents/trainers/sac/policy.py
  13. 14
      ml-agents/mlagents/trainers/sac/trainer.py
  14. 4
      ml-agents/mlagents/trainers/trainer.py

14
.pylintrc


# disabled because black handles this
C0301,C0330,
# C0114: Missing module docstring
# C0114: Missing module docstring
C0115,C0114,
# C0116: Missing function or method docstring
C0114,C0115,C0116,
# All convention and refactor for now
C,R,

# W0107: Unnecessary pass statement
W0107,
# W0511 TODO
# W0511 "TODO"
# W0201: Attribute '...' defined outside __init__
W0201,
# We should fix these up ASAP
# W0221: Parameters differ from overridden
W0221,
# E0401: Unable to import...
# E0611: No name '...' in module '...'

2
ml-agents-envs/mlagents/envs/base_unity_environment.py


def step(
self,
vector_action: Optional[Dict] = None,
memory: Optional[Dict] = None,
custom_action: Dict[str, Any] = None,
) -> AllBrainInfo:
pass

7
ml-agents-envs/mlagents/envs/env_manager.py


from abc import ABC, abstractmethod
from typing import List, Dict, NamedTuple, Optional
from typing import Any, List, Dict, NamedTuple, Optional
from mlagents.envs.brain import AllBrainInfo, BrainParameters
from mlagents.envs.policy import Policy
from mlagents.envs.action_info import ActionInfo

@abstractmethod
def reset(
self, config: Dict = None, train_mode: bool = True
self,
config: Dict = None,
train_mode: bool = True,
custom_reset_parameters: Any = None,
) -> List[EnvironmentStep]:
pass

12
ml-agents/mlagents/trainers/bc/trainer.py


def add_experiences(
self,
curr_info: AllBrainInfo,
next_info: AllBrainInfo,
curr_all_info: AllBrainInfo,
next_all_info: AllBrainInfo,
:param curr_info: Current AllBrainInfo (Dictionary of all current brains and corresponding BrainInfo).
:param next_info: Next AllBrainInfo (Dictionary of all current brains and corresponding BrainInfo).
:param curr_all_info: Current AllBrainInfo (Dictionary of all current brains and corresponding BrainInfo).
:param next_all_info: Next AllBrainInfo (Dictionary of all current brains and corresponding BrainInfo).
info_student = curr_info[self.brain_name]
next_info_student = next_info[self.brain_name]
info_student = curr_all_info[self.brain_name]
next_info_student = next_all_info[self.brain_name]
for agent_id in info_student.agents:
self.evaluation_buffer[agent_id].last_brain_info = info_student

6
ml-agents/mlagents/trainers/components/reward_signals/gail/model.py


from typing import Tuple, List
from typing import List, Optional, Tuple
import tensorflow as tf
from mlagents.trainers.models import LearningModel

self.gradient_penalty_weight = gradient_penalty_weight
self.use_vail = use_vail
self.use_actions = use_actions # True # Not using actions
self.noise: Optional[tf.Tensor] = None
self.z: Optional[tf.Tensor] = None
self.make_inputs()
self.create_network()
self.create_loss(learning_rate)

22
ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py


def prepare_update(
self,
policy_model: LearningModel,
mini_batch_policy: Dict[str, np.ndarray],
mini_batch: Dict[str, np.ndarray],
num_sequences: int,
) -> Dict[tf.Tensor, Any]:
"""

:return: Feed_dict for update process.
"""
max_num_experiences = min(
len(mini_batch_policy["actions"]),
len(mini_batch["actions"]),
for key, element in mini_batch_policy.items():
mini_batch_policy[key] = element[:max_num_experiences]
for key, element in mini_batch.items():
mini_batch[key] = element[:max_num_experiences]
len(mini_batch_policy["actions"]), 1
len(mini_batch["actions"]), 1
self.model.done_policy_holder: mini_batch_policy["done"],
self.model.done_policy_holder: mini_batch["done"],
}
if self.model.use_vail:

if self.policy.use_continuous_act:
feed_dict[policy_model.selected_actions] = mini_batch_policy["actions"]
feed_dict[policy_model.selected_actions] = mini_batch["actions"]
feed_dict[policy_model.action_holder] = mini_batch_policy["actions"]
feed_dict[policy_model.action_holder] = mini_batch["actions"]
feed_dict[policy_model.visual_in[i]] = mini_batch_policy[
"visual_obs%d" % i
]
feed_dict[policy_model.visual_in[i]] = mini_batch["visual_obs%d" % i]
feed_dict[policy_model.vector_in] = mini_batch_policy["vector_obs"]
feed_dict[policy_model.vector_in] = mini_batch["vector_obs"]
feed_dict[self.model.obs_in_expert] = mini_batch_demo["vector_obs"]
self.has_updated = True
return feed_dict

9
ml-agents/mlagents/trainers/models.py


import logging
from enum import Enum
from typing import Callable, List
from typing import Callable, Dict, List, Optional
import numpy as np
import tensorflow as tf

trainable=False,
dtype=tf.int32,
)
self.value_heads: Dict[str, tf.Tensor] = {}
self.normalization_steps: Optional[tf.Variable] = None
self.running_mean: Optional[tf.Variable] = None
self.running_variance: Optional[tf.Variable] = None
self.update_normalization: Optional[tf.Operation] = None
self.value: Optional[tf.Tensor] = None
@staticmethod
def create_global_steps():

:param hidden_input: The last layer of the Critic. The heads will consist of one dense hidden layer on top
of the hidden input.
"""
self.value_heads = {}
for name in stream_names:
value = tf.layers.dense(hidden_input, 1, name="{}_value".format(name))
self.value_heads[name] = value

9
ml-agents/mlagents/trainers/ppo/models.py


import logging
import numpy as np
from typing import Optional
import numpy as np
from mlagents.trainers.models import LearningModel, EncoderType, LearningRateSchedule
logger = logging.getLogger("mlagents.trainers")

LearningModel.__init__(
self, m_size, normalize, use_recurrent, brain, seed, stream_names
)
self.optimizer: Optional[tf.train.AdamOptimizer] = None
self.grads = None
self.update_batch: Optional[tf.Operation] = None
if num_layers < 1:
num_layers = 1
if brain.vector_action_space_type == "continuous":

8
ml-agents/mlagents/trainers/ppo/trainer.py


self.collected_rewards[_reward_signal] = {}
def process_experiences(
self, current_info: AllBrainInfo, new_info: AllBrainInfo
self, current_info: AllBrainInfo, next_info: AllBrainInfo
:param new_info: Dictionary of all next brains and corresponding BrainInfo.
:param next_info: Dictionary of all next brains and corresponding BrainInfo.
info = new_info[self.brain_name]
info = next_info[self.brain_name]
if self.is_training:
self.policy.update_normalization(info.vector_observations)
for l in range(len(info.agents)):

number_experiences=buffer_length,
mean_return=float(np.mean(self.cumulative_returns_since_policy_update)),
)
self.cumulative_returns_since_policy_update = []
self.cumulative_returns_since_policy_update.clear()
# Make sure batch_size is a multiple of sequence length. During training, we
# will need to reshape the data into a batch_size x sequence_length tensor.

4
ml-agents/mlagents/trainers/rl_trainer.py


:param agent_idx: the index of the Agent agent_id
"""
raise UnityTrainerException(
"The process_experiences method was not implemented."
"The add_policy_outputs method was not implemented."
)
def add_rewards_outputs(

:param agent_next_idx: the index of the Agent agent_id in the next brain info
"""
raise UnityTrainerException(
"The process_experiences method was not implemented."
"The add_rewards_outputs method was not implemented."
)

41
ml-agents/mlagents/trainers/sac/models.py


import logging
import numpy as np
from typing import Optional
from typing import Dict, List, Optional
import tensorflow as tf
from mlagents.trainers.models import LearningModel, LearningRateSchedule, EncoderType

self.activ_fn = self.swish
self.policy_memory_in: Optional[tf.Tensor] = None
self.policy_memory_out: Optional[tf.Tensor] = None
self.value_memory_out: Optional[tf.Tensor] = None
self.q1: Optional[tf.Tensor] = None
self.q2: Optional[tf.Tensor] = None
self.q1_p: Optional[tf.Tensor] = None
self.q2_p: Optional[tf.Tensor] = None
self.q1_memory_out: Optional[tf.Tensor] = None
self.q2_memory_out: Optional[tf.Tensor] = None
self.action_holder: Optional[tf.Tensor] = None
self.prev_action: Optional[tf.Tensor] = None
self.action_masks: Optional[tf.Tensor] = None
self.external_action_in: Optional[tf.Tensor] = None
self.log_sigma_sq: Optional[tf.Tensor] = None
self.entropy: Optional[tf.Tensor] = None
self.deterministic_output: Optional[tf.Tensor] = None
self.all_log_probs: Optional[tf.Tensor] = None
self.normalized_logprobs: Optional[tf.Tensor] = None
self.action_probs: Optional[tf.Tensor] = None
self.selected_actions: Optional[tf.Tensor] = None
self.output: Optional[tf.Tensor] = None
self.output_oh: Optional[tf.Tensor] = None
self.output_pre: Optional[tf.Tensor] = None
self.value_vars = None
self.q_vars = None
self.critic_vars = None
self.policy_vars = None
self.q1_heads: Optional[Dict[str, tf.Tensor]] = None
self.q2_heads: Optional[Dict[str, tf.Tensor]] = None
self.q1_pheads: Optional[Dict[str, tf.Tensor]] = None
self.q2_pheads: Optional[Dict[str, tf.Tensor]] = None
def get_vars(self, scope):
return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope)

:param h_size: size of hidden layers for value network
:param scope: TF scope for value network.
"""
self.value_heads = {}
with tf.variable_scope(scope):
value_hidden = self.create_vector_observation_encoder(
hidden_input, h_size, self.activ_fn, num_layers, "encoder", False

)
if num_layers < 1:
num_layers = 1
self.target_init_op: List[tf.Tensor] = []
self.target_update_op: List[tf.Tensor] = []
self.update_batch_policy: Optional[tf.Operation] = None
self.update_batch_value: Optional[tf.Operation] = None
self.update_batch_entropy: Optional[tf.Operation] = None
self.policy_network = SACPolicyNetwork(
brain=brain,

6
ml-agents/mlagents/trainers/sac/policy.py


@timed
def update(
self, mini_batch: Dict[str, Any], num_sequences: int, update_target: bool = True
self, mini_batch: Dict[str, Any], num_sequences: int
) -> Dict[str, float]:
"""
Updates model using buffer.

update_vals = self._execute_model(feed_dict, self.update_dict)
for stat_name, update_name in stats_needed.items():
update_stats[stat_name] = update_vals[update_name]
if update_target:
self.sess.run(self.model.target_update_op)
# Update target network. By default, target update happens at every policy update.
self.sess.run(self.model.target_update_op)
return update_stats
def update_reward_signals(

14
ml-agents/mlagents/trainers/sac/trainer.py


import logging
from collections import defaultdict
from typing import List, Dict
from typing import Dict
import os
import numpy as np

)
def process_experiences(
self, current_info: AllBrainInfo, new_info: AllBrainInfo
self, current_info: AllBrainInfo, next_info: AllBrainInfo
:param new_info: Dictionary of all next brains and corresponding BrainInfo.
:param next_info: Dictionary of all next brains and corresponding BrainInfo.
info = new_info[self.brain_name]
info = next_info[self.brain_name]
if self.is_training:
self.policy.update_normalization(info.vector_observations)
for l in range(len(info.agents)):

is greater than 1 and the reward signals are not updated in parallel.
"""
self.cumulative_returns_since_policy_update: List[float] = []
self.cumulative_returns_since_policy_update.clear()
n_sequences = max(
int(self.trainer_parameters["batch_size"] / self.policy.sequence_length), 1
)

"{}_rewards".format(name)
] = signal.evaluate_batch(sampled_minibatch).scaled_reward
update_stats = self.policy.update(
sampled_minibatch, n_sequences, update_target=True
)
update_stats = self.policy.update(sampled_minibatch, n_sequences)
for stat_name, value in update_stats.items():
batch_update_stats[stat_name].append(value)

4
ml-agents/mlagents/trainers/trainer.py


:param next_all_info: Dictionary of all current brains and corresponding BrainInfo.
:param take_action_outputs: The outputs of the Policy's get_action method.
"""
raise UnityTrainerException(
"The process_experiences method was not implemented."
)
raise UnityTrainerException("The add_experiences method was not implemented.")
def process_experiences(
self, current_info: AllBrainInfo, next_info: AllBrainInfo

正在加载...
取消
保存