浏览代码

Merge pull request #2236 from Unity-Technologies/enable-flake8

Enable flake8
/develop-generalizationTraining-TrainerController
GitHub 5 年前
当前提交
f8041534
共有 19 个文件被更改,包括 76 次插入67 次删除
  1. 12
      .pre-commit-config.yaml
  2. 3
      gym-unity/gym_unity/envs/unity_env.py
  3. 12
      ml-agents-envs/mlagents/envs/exception.py
  4. 2
      ml-agents-envs/mlagents/envs/mock_communicator.py
  5. 2
      ml-agents-envs/mlagents/envs/rpc_communicator.py
  6. 6
      ml-agents-envs/mlagents/envs/socket_communicator.py
  7. 9
      ml-agents/mlagents/trainers/barracuda.py
  8. 10
      ml-agents/mlagents/trainers/buffer.py
  9. 19
      ml-agents/mlagents/trainers/components/bc/model.py
  10. 3
      ml-agents/mlagents/trainers/components/bc/module.py
  11. 2
      ml-agents/mlagents/trainers/components/reward_signals/curiosity/signal.py
  12. 6
      ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py
  13. 2
      ml-agents/mlagents/trainers/learn.py
  14. 4
      ml-agents/mlagents/trainers/ppo/models.py
  15. 2
      ml-agents/mlagents/trainers/ppo/policy.py
  16. 2
      ml-agents/mlagents/trainers/ppo/trainer.py
  17. 28
      ml-agents/mlagents/trainers/tensorflow_to_barracuda.py
  18. 4
      ml-agents/mlagents/trainers/trainer.py
  19. 15
      setup.cfg

12
.pre-commit-config.yaml


repos:
- repo: https://github.com/python/black
rev: '19.3b0'
rev: 19.3b0
rev: 'v0.701'
rev: v0.720
hooks:
- id: mypy
name: mypy-ml-agents

.*cs.meta|
.*.css
)$
args: [--fix=lf]
args: [--fix=lf]
- id: flake8
exclude: >
(?x)^(
.*_pb2.py|
.*_pb2_grpc.py
)$

3
gym-unity/gym_unity/envs/unity_env.py


:param use_visual: Whether to use visual observation or vector observation.
:param uint8_visual: Return visual observations as uint8 (0-255) matrices instead of float (0.0-1.0).
:param multiagent: Whether to run in multi-agent mode (lists of obs, reward, done).
:param flatten_branched: If True, turn branched discrete action spaces into a Discrete space rather than MultiDiscrete.
:param flatten_branched: If True, turn branched discrete action spaces into a Discrete space rather than
MultiDiscrete.
:param no_graphics: Whether to run the Unity simulator in no-graphics mode
:param allow_multiple_visual_obs: If True, return a list of visual observations instead of only one.
"""

12
ml-agents-envs/mlagents/envs/exception.py


with open(log_file_path, "r") as f:
printing = False
unity_error = "\n"
for l in f:
l = l.strip()
if (l == "Exception") or (l == "Error"):
for line in f:
line = line.strip()
if (line == "Exception") or (line == "Error"):
if l == "":
if line == "":
unity_error += l + "\n"
unity_error += line + "\n"
logger.info(unity_error)
logger.error(
"An error might have occured in the environment. "

)
except:
except Exception:
logger.error(
"An error might have occured in the environment. "
"No UnitySDK.log file could be found."

2
ml-agents-envs/mlagents/envs/mock_communicator.py


try:
fake_brain = inputs.rl_input.agent_actions["RealFakeBrain"]
global_done = fake_brain.value[0].vector_actions[0] == -1
except:
except Exception:
pass
result = UnityRLOutput(global_done=global_done, agentInfos=dict_agent_info)
return UnityOutput(rl_output=result)

2
ml-agents-envs/mlagents/envs/rpc_communicator.py


self.server.add_insecure_port("[::]:" + str(self.port))
self.server.start()
self.is_open = True
except:
except Exception:
raise UnityWorkerInUseException(self.worker_id)
def check_port(self, port):

6
ml-agents-envs/mlagents/envs/socket_communicator.py


self._socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self._socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
self._socket.bind(("localhost", self.port))
except:
except Exception:
raise UnityTimeOutException(
"Couldn't start socket communication because worker number {} is still in use. "
"You may need to manually close a previously opened environment "

self._socket.listen(1)
self._conn, _ = self._socket.accept()
self._conn.settimeout(30)
except:
except Exception:
raise UnityTimeOutException(
"The Unity environment took too long to respond. Make sure that :\n"
"\t The environment does not need user interaction to launch\n"

s = s[4:]
while len(s) != message_length:
s += self._conn.recv(self._buffer_size)
except socket.timeout as e:
except socket.timeout:
raise UnityTimeOutException("The environment took too long to respond.")
return s

9
ml-agents/mlagents/trainers/barracuda.py


BARRACUDA_VERSION = 16
# Definition of Barracuda model
class Model:
def __init__(self):

# Recur for all the vertices adjacent to this vertex
for i in self.graph[v]:
if visited[i] == False:
if not visited[i]:
self.topologicalSortUtil(i, visited, stack)
# Push current vertex to stack which stores result

# Call the recursive helper function to store Topological
# Sort starting from all vertices one by one
for i in range(self.V):
if visited[i] == False:
if not visited[i]:
self.topologicalSortUtil(i, visited, stack)
# print(stack)

def write_shape(self, s):
self.write_int32(len(s))
for el in s:
self.write_int32(el if el != None else -1)
self.write_int32(el if el is not None else -1)
def close(self):
self.f.close()

w.write_int32(len(model.layers))
for l in model.layers:
assert not l.name in l.inputs
assert l.name not in l.inputs
w.write_str(l.name)
w.write_int32(l.type)

10
ml-agents/mlagents/trainers/buffer.py


def append(self, element, padding_value=0):
"""
Adds an element to this list. Also lets you change the padding
Adds an element to this list. Also lets you change the padding
be padded with 1.)
be padded with 1.)
:param element: The element to append to the list.
:param padding_value: The value used to pad when get_batch is called.
"""

"""
if len(key_list) < 2:
return True
l = None
length = None
if (l is not None) and (l != len(self[key])):
if (length is not None) and (length != len(self[key])):
l = len(self[key])
length = len(self[key])
return True
def shuffle(self, key_list=None):

19
ml-agents/mlagents/trainers/components/bc/model.py


:param anneal_steps: Number of steps over which to anneal the learning_rate
"""
selected_action = self.policy_model.output
action_size = self.policy_model.act_size
if self.policy_model.brain.vector_action_space_type == "continuous":
self.loss = tf.reduce_mean(
tf.squared_difference(selected_action, self.expert_action)

action_idx = [0] + list(np.cumsum(action_size))
entropy = tf.reduce_sum(
(
tf.stack(
[
tf.nn.softmax_cross_entropy_with_logits_v2(
labels=tf.nn.softmax(
log_probs[:, action_idx[i] : action_idx[i + 1]]
),
logits=log_probs[:, action_idx[i] : action_idx[i + 1]],
)
for i in range(len(action_size))
],
axis=1,
)
),
axis=1,
)
self.loss = tf.reduce_mean(
-tf.log(tf.nn.softmax(log_probs) + 1e-7) * self.expert_action
)

3
ml-agents/mlagents/trainers/components/bc/module.py


"""
A BC trainer that can be used inline with RL, especially for pretraining.
:param policy: The policy of the learning model
:param policy_learning_rate: The initial Learning Rate of the policy. Used to set an appropriate learning rate for the pretrainer.
:param policy_learning_rate: The initial Learning Rate of the policy. Used to set an appropriate learning rate
for the pretrainer.
:param default_batch_size: The default batch size to use if batch_size isn't provided.
:param default_num_epoch: The default num_epoch to use if num_epoch isn't provided.
:param strength: The proportion of learning rate used to update through BC.

2
ml-agents/mlagents/trainers/components/reward_signals/curiosity/signal.py


:param gamma: The time discounting factor used for this reward.
:param encoding_size: The size of the hidden encoding layer for the ICM
:param learning_rate: The learning rate for the ICM.
:param num_epoch: The number of epochs to train over the training buffer for the ICM.
:param num_epoch: The number of epochs to train over the training buffer for the ICM.
"""
super().__init__(policy, strength, gamma)
self.model = CuriosityModel(

6
ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py


:param learning_rate: The Learning Rate used during GAIL updates.
:param samples_per_update: The maximum number of samples to update during GAIL updates.
:param use_actions: Whether or not to use the actions for the discriminator.
:param use_vail: Whether or not to use a variational bottleneck for the discriminator.
:param use_vail: Whether or not to use a variational bottleneck for the discriminator.
See https://arxiv.org/abs/1810.00821.
"""
super().__init__(policy, strength, gamma)

cls, config_dict: Dict[str, Any], param_keys: List[str] = None
) -> None:
"""
Checks the config and throw an exception if a hyperparameter is missing. GAIL requires strength and gamma
at minimum.
Checks the config and throw an exception if a hyperparameter is missing. GAIL requires strength and gamma
at minimum.
"""
param_keys = ["strength", "gamma", "demo_path"]
super().check_config(config_dict, param_keys)

2
ml-agents/mlagents/trainers/learn.py


"""
)
except:
except Exception:
print("\n\n\tUnity Technologies\n")
_USAGE = """

4
ml-agents/mlagents/trainers/ppo/models.py


:param use_recurrent: Whether to use an LSTM layer in the network.
:param num_layers Number of hidden layers between encoded input and policy & value layers
:param m_size: Size of brain memory.
:param seed: Seed to use for initialization of model.
:param stream_names: List of names of value streams. Usually, a list of the Reward Signals being used.
:param seed: Seed to use for initialization of model.
:param stream_names: List of names of value streams. Usually, a list of the Reward Signals being used.
:return: a sub-class of PPOAgent tailored to the environment.
"""
LearningModel.__init__(

2
ml-agents/mlagents/trainers/ppo/policy.py


Generates value estimates for bootstrapping.
:param brain_info: BrainInfo to be used for bootstrapping.
:param idx: Index in BrainInfo of agent.
:param done: Whether or not this is the last element of the episode, in which case we want the value estimate to be 0.
:param done: Whether or not this is the last element of the episode, in which case the value estimate will be 0.
:return: The value estimate dictionary with key being the name of the reward signal and the value the
corresponding value estimate.
"""

2
ml-agents/mlagents/trainers/ppo/trainer.py


n_sequences = max(
int(self.trainer_parameters["batch_size"] / self.policy.sequence_length), 1
)
value_total, policy_total, forward_total, inverse_total = [], [], [], []
value_total, policy_total = [], []
advantages = self.training_buffer.update_buffer["advantages"].get_batch()
self.training_buffer.update_buffer["advantages"].set(
(advantages - advantages.mean()) / (advantages.std() + 1e-10)

28
ml-agents/mlagents/trainers/tensorflow_to_barracuda.py


op="Flatten",
input=[
inputs[-1]
], # take only the last input, assume all other arguments are trivial (like sequence_length==1 always in ML-agents LSTM nets)
], # take only the last input, assume all other arguments are trivial (like sequence_length==1
# always in ML-agents LSTM nets)
),
"Reshape": lambda nodes, inputs, tensors, context: Struct(
op="Reshape",

input=[i for i in inputs]
+ [t.name for t in tensors][1:][
-2:
], # [1:] - skips the 0th tensor, since Conv2DBackpropInput 0th tensor is 'input_sizes' (which differs from other Conv layers)
], # [1:] - skips the 0th tensor, since Conv2DBackpropInput 0th tensor is 'input_sizes'
# (which differs from other Conv layers)
# [-2:] - take only last 2 tensors, this allows to process large patterns with the same code
padding=get_attr(by_op(nodes, "Conv2DBackpropInput"), "padding"),
strides=get_attr(by_op(nodes, "Conv2DBackpropInput"), "strides"),

# TODO:'Round'
# TODO:'Rsqrt'
}
# Debug
def debug(s):

end = end.astype(np.int32).tolist()
strides = strides.astype(np.int32).tolist()
# StridedSlice range and mask descriptions: https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/strided-slice
# StridedSlice range and mask descriptions:
# https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/strided-slice
# TODO: I don't think elipsis and newaxis would work together well with current implementation
assert len(begin) == len(end)

else:
activation = "Linear"
if not class_name in known_classes:
if class_name not in known_classes:
if class_name in requires_runtime_flag:
print("SKIP:", class_name, "layer is used only for training")
else:

auto_pad = get_attr(layer, "padding") # layer.attr['padding'].s.decode("utf-8")
pads = get_attr(layer, "pads")
strides = get_attr(layer, "strides") # layer.attr['strides'].list.i
dilations = get_attr(layer, "dilations") # layer.attr['dilations'].list.i
pool_size = get_attr(layer, "ksize") # layer.attr['ksize'].list.i
shape = get_attr(layer, "shape")
starts = get_attr(layer, "starts")

alpha = get_attr(layer, "alpha", default=1)
beta = get_attr(layer, "beta")
if activation and not activation in known_activations:
if activation and activation not in known_activations:
if auto_pad and not auto_pad in known_paddings:
if auto_pad and auto_pad not in known_paddings:
if data_frmt and not data_frmt in supported_data_formats:
if data_frmt and data_frmt not in supported_data_formats:
print("UNSUPPORTED: data format", data_frmt)
o_l.activation = known_activations.get(activation) or 0

-1 not in input_ranks
) # for rank() lambda all input ranks have to be known (not -1)
rank = rank(input_ranks)
if rank == None:
if rank is None:
def all_elements_equal(arr): # http://stackoverflow.com/q/3844948/
return arr.count(arr[0]) == len(arr)

# filter only inputs that are coming from nodes that are outside this pattern
# preserve the order
pattern_nodes = [n.name for n in pattern_nodes] + tensor_names
# inputs_from_outside_pattern = remove_duplicates_from_list([i for i in inputs_to_op_nodes if nodes_by_name[i] not in pattern_nodes])
# inputs_from_outside_pattern = remove_duplicates_from_list([i for i in inputs_to_op_nodes if
# nodes_by_name[i] not in pattern_nodes])
inputs_from_outside_pattern = remove_duplicates_from_list(
[i for i in inputs_to_op_nodes if i not in pattern_nodes]
)

Converts a TensorFlow model into a Barracuda model.
:param source_file: The TensorFlow Model
:param target_file: The name of the file the converted model will be saved to
:param trim_unused_by_output: The regexp to match output nodes to remain in the model. All other uconnected nodes will be removed.
:param trim_unused_by_output: The regexp to match output nodes to remain in the model.
All other unconnected nodes will be removed.
:param verbose: If True, will display debug messages
:param compress_f16: If true, the float values will be converted to f16
:return:

o_model.layers = cleanup_layers(o_model.layers)
all_inputs = {i for l in o_model.layers for i in l.inputs}
embedded_tensors = {t.name for l in o_model.layers for t in l.tensors}
# Trim
if trim_unused_by_output:

4
ml-agents/mlagents/trainers/trainer.py


"""
Takes a parameter dictionary and converts it to a human-readable string.
Recurses if there are multiple levels of dict. Used to print out hyperaparameters.
param: param_dict: A Dictionary of key, value parameters.
param: param_dict: A Dictionary of key, value parameters.
return: A string version of this dictionary.
"""
if not isinstance(param_dict, dict):

)
s = sess.run(s_op)
self.summary_writer.add_summary(s, self.get_step)
except:
except Exception:
LOGGER.info(
"Cannot write text summary for Tensorboard. Tensorflow version must be r1.2 or above."
)

15
setup.cfg


# Run "pytest --cov=mlagents" to see the current coverage percentage.
# Run "pytest --cov=mlagents --cov-report html" to get a nice visualization of what is/isn't coverge in html format.
fail_under = 60
[flake8]
# black will apply a line length of 88 to code but not docstrings/comments
# This seems like a decent compromise between readability and redoing all the docstrings.
max-line-length=120
ignore =
# Black tends to introduce things flake8 doesn't like, such as "line break before binary operator"
# or whitespace before ':'. Rather than fight with black, just ignore these for now.
W503, E203,
# "may be undefined, or defined from star imports" and related warnings
# We should stop doing these, but for now, leave them in.
F405, F403, F401,
正在加载...
取消
保存