浏览代码

[MLA-427] make pyupgrade convert f-strings too (#4244)

* make pyupgrade convert f-strings too
/MLA-1734-demo-provider
GitHub 4 年前
当前提交
129f9ddc
共有 24 个文件被更改,包括 50 次插入63 次删除
  1. 2
      .pre-commit-config.yaml
  2. 1
      com.unity.ml-agents/CHANGELOG.md
  3. 2
      gym-unity/setup.py
  4. 8
      ml-agents-envs/mlagents_envs/base_env.py
  5. 4
      ml-agents-envs/mlagents_envs/env_utils.py
  6. 2
      ml-agents-envs/setup.py
  7. 2
      ml-agents/mlagents/trainers/buffer.py
  8. 4
      ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py
  9. 4
      ml-agents/mlagents/trainers/components/reward_signals/gail/model.py
  10. 2
      ml-agents/mlagents/trainers/components/reward_signals/reward_signal_factory.py
  11. 4
      ml-agents/mlagents/trainers/ghost/controller.py
  12. 6
      ml-agents/mlagents/trainers/models.py
  13. 4
      ml-agents/mlagents/trainers/policy/tf_policy.py
  14. 12
      ml-agents/mlagents/trainers/ppo/optimizer.py
  15. 14
      ml-agents/mlagents/trainers/ppo/trainer.py
  16. 6
      ml-agents/mlagents/trainers/sac/network.py
  17. 4
      ml-agents/mlagents/trainers/sac/optimizer.py
  18. 14
      ml-agents/mlagents/trainers/sac/trainer.py
  19. 6
      ml-agents/mlagents/trainers/stats.py
  20. 2
      ml-agents/mlagents/trainers/tests/test_simple_rl.py
  21. 2
      ml-agents/setup.py
  22. 2
      ml-agents/tests/yamato/check_coverage_percent.py
  23. 4
      ml-agents/tests/yamato/scripts/run_gym.py
  24. 2
      ml-agents/tests/yamato/scripts/run_llapi.py

2
.pre-commit-config.yaml


rev: v2.7.0
hooks:
- id: pyupgrade
args: [--py3-plus]
args: [--py3-plus, --py36-plus]
exclude: .*barracuda.py
- repo: https://github.com/pre-commit/pre-commit-hooks

1
com.unity.ml-agents/CHANGELOG.md


### Major Changes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
The minimum supported python version for ml-agents-envs was changed to 3.6.1. (#4244)
### Minor Changes
#### com.unity.ml-agents (C#)

2
gym-unity/setup.py


author_email="ML-Agents@unity3d.com",
url="https://github.com/Unity-Technologies/ml-agents",
packages=find_packages(),
install_requires=["gym", "mlagents_envs=={}".format(VERSION)],
install_requires=["gym", f"mlagents_envs=={VERSION}"],
cmdclass={"verify": VerifyVersionCommand},
)

8
ml-agents-envs/mlagents_envs/base_env.py


:returns: The DecisionStep
"""
if agent_id not in self.agent_id_to_index:
raise KeyError(
"agent_id {} is not present in the DecisionSteps".format(agent_id)
)
raise KeyError(f"agent_id {agent_id} is not present in the DecisionSteps")
agent_index = self._agent_id_to_index[agent_id] # type: ignore
agent_obs = []
for batched_obs in self.obs:

specific agent
"""
if agent_id not in self.agent_id_to_index:
raise KeyError(
"agent_id {} is not present in the TerminalSteps".format(agent_id)
)
raise KeyError(f"agent_id {agent_id} is not present in the TerminalSteps")
agent_index = self._agent_id_to_index[agent_id] # type: ignore
agent_obs = []
for batched_obs in self.obs:

4
ml-agents-envs/mlagents_envs/env_utils.py


.replace(".x86", "")
)
true_filename = os.path.basename(os.path.normpath(env_path))
get_logger(__name__).debug("The true file name is {}".format(true_filename))
get_logger(__name__).debug(f"The true file name is {true_filename}")
if not (glob.glob(env_path) or glob.glob(env_path + ".*")):
return None

f"Couldn't launch the {file_name} environment. Provided filename does not match any environments."
)
else:
get_logger(__name__).debug("This is the launch string {}".format(launch_string))
get_logger(__name__).debug(f"This is the launch string {launch_string}")
# Launch Unity environment
subprocess_args = [launch_string] + args
try:

2
ml-agents-envs/setup.py


"protobuf>=3.6",
"pyyaml>=3.1.0",
],
python_requires=">=3.5",
python_requires=">=3.6.1",
cmdclass={"verify": VerifyVersionCommand},
)

2
ml-agents/mlagents/trainers/buffer.py


key_list = list(self.keys())
if not self.check_length(key_list):
raise BufferException(
"The length of the fields {} were not of same length".format(key_list)
f"The length of the fields {key_list} were not of same length"
)
for field_key in key_list:
target_buffer[field_key].extend(

4
ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py


self.encoding_size,
ModelUtils.swish,
1,
"curiosity_stream_{}_visual_obs_encoder".format(i),
f"curiosity_stream_{i}_visual_obs_encoder",
False,
)

ModelUtils.swish,
1,
"curiosity_stream_{}_visual_obs_encoder".format(i),
f"curiosity_stream_{i}_visual_obs_encoder",
True,
)
visual_encoders.append(encoded_visual)

4
ml-agents/mlagents/trainers/components/reward_signals/gail/model.py


self.encoding_size,
ModelUtils.swish,
1,
"gail_stream_{}_visual_obs_encoder".format(i),
f"gail_stream_{i}_visual_obs_encoder",
False,
)

ModelUtils.swish,
1,
"gail_stream_{}_visual_obs_encoder".format(i),
f"gail_stream_{i}_visual_obs_encoder",
True,
)
visual_policy_encoders.append(encoded_policy_visual)

2
ml-agents/mlagents/trainers/components/reward_signals/reward_signal_factory.py


"""
rcls = NAME_TO_CLASS.get(name)
if not rcls:
raise UnityTrainerException("Unknown reward signal type {}".format(name))
raise UnityTrainerException(f"Unknown reward signal type {name}")
class_inst = rcls(policy, settings)
return class_inst

4
ml-agents/mlagents/trainers/ghost/controller.py


"""
self._queue.append(self._learning_team)
self._learning_team = self._queue.popleft()
logger.debug(
"Learning team {} swapped on step {}".format(self._learning_team, step)
)
logger.debug(f"Learning team {self._learning_team} swapped on step {step}")
self._changed_training_team = True
# Adapted from https://github.com/Unity-Technologies/ml-agents/pull/1975 and

6
ml-agents/mlagents/trainers/models.py


parameter, global_step, max_step, min_value, power=1.0
)
else:
raise UnityTrainerException("The schedule {} is invalid.".format(schedule))
raise UnityTrainerException(f"The schedule {schedule} is invalid.")
return parameter_rate
@staticmethod

h_size,
activation=activation,
reuse=reuse,
name="hidden_{}".format(i),
name=f"hidden_{i}",
kernel_initializer=tf.initializers.variance_scaling(1.0),
)
return hidden

"""
value_heads = {}
for name in stream_names:
value = tf.layers.dense(hidden_input, 1, name="{}_value".format(name))
value = tf.layers.dense(hidden_input, 1, name=f"{name}_value")
value_heads[name] = value
value = tf.reduce_mean(list(value_heads.values()), 0)
return value_heads, value

4
ml-agents/mlagents/trainers/policy/tf_policy.py


)
)
else:
logger.info(
"Resuming training from step {}.".format(self.get_current_step())
)
logger.info(f"Resuming training from step {self.get_current_step()}.")
def initialize_or_load(self):
# If there is an initialize path, load from that. Else, load from the set model path.

12
ml-agents/mlagents/trainers/ppo/optimizer.py


self.old_values = {}
for name in value_heads.keys():
returns_holder = tf.placeholder(
shape=[None], dtype=tf.float32, name="{}_returns".format(name)
shape=[None], dtype=tf.float32, name=f"{name}_returns"
shape=[None], dtype=tf.float32, name="{}_value_estimate".format(name)
shape=[None], dtype=tf.float32, name=f"{name}_value_estimate"
)
self.returns_holders[name] = returns_holder
self.old_values[name] = old_value

self.all_old_log_probs: mini_batch["action_probs"],
}
for name in self.reward_signals:
feed_dict[self.returns_holders[name]] = mini_batch[
"{}_returns".format(name)
]
feed_dict[self.old_values[name]] = mini_batch[
"{}_value_estimates".format(name)
]
feed_dict[self.returns_holders[name]] = mini_batch[f"{name}_returns"]
feed_dict[self.old_values[name]] = mini_batch[f"{name}_value_estimates"]
if self.policy.output_pre is not None and "actions_pre" in mini_batch:
feed_dict[self.policy.output_pre] = mini_batch["actions_pre"]

14
ml-agents/mlagents/trainers/ppo/trainer.py


trajectory.done_reached and not trajectory.interrupted,
)
for name, v in value_estimates.items():
agent_buffer_trajectory["{}_value_estimates".format(name)].extend(v)
agent_buffer_trajectory[f"{name}_value_estimates"].extend(v)
self._stats_reporter.add_stat(
self.optimizer.reward_signals[name].value_name, np.mean(v)
)

evaluate_result = reward_signal.evaluate_batch(
agent_buffer_trajectory
).scaled_reward
agent_buffer_trajectory["{}_rewards".format(name)].extend(evaluate_result)
agent_buffer_trajectory[f"{name}_rewards"].extend(evaluate_result)
# Report the reward signals
self.collected_rewards[name][agent_id] += np.sum(evaluate_result)

for name in self.optimizer.reward_signals:
bootstrap_value = value_next[name]
local_rewards = agent_buffer_trajectory[
"{}_rewards".format(name)
].get_batch()
local_rewards = agent_buffer_trajectory[f"{name}_rewards"].get_batch()
"{}_value_estimates".format(name)
f"{name}_value_estimates"
].get_batch()
local_advantage = get_gae(
rewards=local_rewards,

)
local_return = local_advantage + local_value_estimates
# This is later use as target for the different value estimates
agent_buffer_trajectory["{}_returns".format(name)].set(local_return)
agent_buffer_trajectory["{}_advantage".format(name)].set(local_advantage)
agent_buffer_trajectory[f"{name}_returns"].set(local_return)
agent_buffer_trajectory[f"{name}_advantage"].set(local_advantage)
tmp_advantages.append(local_advantage)
tmp_returns.append(local_return)

6
ml-agents/mlagents/trainers/sac/network.py


"""
self.value_heads = {}
for name in stream_names:
value = tf.layers.dense(hidden_input, 1, name="{}_value".format(name))
value = tf.layers.dense(hidden_input, 1, name=f"{name}_value")
self.value_heads[name] = value
self.value = tf.reduce_mean(list(self.value_heads.values()), 0)

q1_heads = {}
for name in stream_names:
_q1 = tf.layers.dense(q1_hidden, num_outputs, name="{}_q1".format(name))
_q1 = tf.layers.dense(q1_hidden, num_outputs, name=f"{name}_q1")
q1_heads[name] = _q1
q1 = tf.reduce_mean(list(q1_heads.values()), axis=0)

q2_heads = {}
for name in stream_names:
_q2 = tf.layers.dense(q2_hidden, num_outputs, name="{}_q2".format(name))
_q2 = tf.layers.dense(q2_hidden, num_outputs, name=f"{name}_q2")
q2_heads[name] = _q2
q2 = tf.reduce_mean(list(q2_heads.values()), axis=0)

4
ml-agents/mlagents/trainers/sac/optimizer.py


)
rewards_holder = tf.placeholder(
shape=[None], dtype=tf.float32, name="{}_rewards".format(name)
shape=[None], dtype=tf.float32, name=f"{name}_rewards"
)
self.rewards_holders[name] = rewards_holder

self.policy.mask_input: batch["masks"] * burn_in_mask,
}
for name in self.reward_signals:
feed_dict[self.rewards_holders[name]] = batch["{}_rewards".format(name)]
feed_dict[self.rewards_holders[name]] = batch[f"{name}_rewards"]
if self.policy.use_continuous_act:
feed_dict[self.policy_network.external_action_in] = batch["actions"]

14
ml-agents/mlagents/trainers/sac/trainer.py


Save the training buffer's update buffer to a pickle file.
"""
filename = os.path.join(self.artifact_path, "last_replay_buffer.hdf5")
logger.info("Saving Experience Replay Buffer to {}".format(filename))
logger.info(f"Saving Experience Replay Buffer to {filename}")
with open(filename, "wb") as file_object:
self.update_buffer.save_to_file(file_object)

"""
filename = os.path.join(self.artifact_path, "last_replay_buffer.hdf5")
logger.info("Loading Experience Replay Buffer from {}".format(filename))
logger.info(f"Loading Experience Replay Buffer from {filename}")
with open(filename, "rb+") as file_object:
self.update_buffer.load_from_file(file_object)
logger.info(

while (
self.step - self.hyperparameters.buffer_init_steps
) / self.update_steps > self.steps_per_update:
logger.debug("Updating SAC policy at step {}".format(self.step))
logger.debug(f"Updating SAC policy at step {self.step}")
buffer = self.update_buffer
if self.update_buffer.num_experiences >= self.hyperparameters.batch_size:
sampled_minibatch = buffer.sample_mini_batch(

# Get rewards for each reward
for name, signal in self.optimizer.reward_signals.items():
sampled_minibatch[
"{}_rewards".format(name)
] = signal.evaluate_batch(sampled_minibatch).scaled_reward
sampled_minibatch[f"{name}_rewards"] = signal.evaluate_batch(
sampled_minibatch
).scaled_reward
update_stats = self.optimizer.update(sampled_minibatch, n_sequences)
for stat_name, value in update_stats.items():

# Get minibatches for reward signal update if needed
reward_signal_minibatches = {}
for name, signal in self.optimizer.reward_signals.items():
logger.debug("Updating {} at step {}".format(name, self.step))
logger.debug(f"Updating {name} at step {self.step}")
# Some signals don't need a minibatch to be sampled - so we don't!
if signal.update_dict:
reward_signal_minibatches[name] = buffer.sample_mini_batch(

6
ml-agents/mlagents/trainers/stats.py


)
if self.self_play and "Self-play/ELO" in values:
elo_stats = values["Self-play/ELO"]
logger.info("{} ELO: {:0.3f}. ".format(category, elo_stats.mean))
logger.info(f"{category} ELO: {elo_stats.mean:0.3f}. ")
else:
logger.info(
"{}: Step: {}. No episode was completed since last summary. {}".format(

self._maybe_create_summary_writer(category)
for key, value in values.items():
summary = tf.Summary()
summary.value.add(tag="{}".format(key), simple_value=value.mean)
summary.value.add(tag=f"{key}", simple_value=value.mean)
self.summary_writers[category].add_summary(summary, step)
self.summary_writers[category].flush()

for file_name in os.listdir(directory_name):
if file_name.startswith("events.out"):
logger.warning(
"{} was left over from a previous run. Deleting.".format(file_name)
f"{file_name} was left over from a previous run. Deleting."
)
full_fname = os.path.join(directory_name, file_name)
try:

2
ml-agents/mlagents/trainers/tests/test_simple_rl.py


def default_reward_processor(rewards, last_n_rewards=5):
rewards_to_use = rewards[-last_n_rewards:]
# For debugging tests
print("Last {} rewards:".format(last_n_rewards), rewards_to_use)
print(f"Last {last_n_rewards} rewards:", rewards_to_use)
return np.array(rewards[-last_n_rewards:], dtype=np.float32).mean()

2
ml-agents/setup.py


# Test-only dependencies should go in test_requirements.txt, not here.
"grpcio>=1.11.0",
"h5py>=2.9.0",
"mlagents_envs=={}".format(VERSION),
f"mlagents_envs=={VERSION}",
"numpy>=1.13.3,<2.0",
"Pillow>=4.2.1",
"protobuf>=3.6",

2
ml-agents/tests/yamato/check_coverage_percent.py


summary_xml = os.path.join(dirpath, SUMMARY_XML_FILENAME)
break
if not summary_xml:
print("Couldn't find {} in root directory".format(SUMMARY_XML_FILENAME))
print(f"Couldn't find {SUMMARY_XML_FILENAME} in root directory")
sys.exit(1)
with open(summary_xml) as f:

4
ml-agents/tests/yamato/scripts/run_gym.py


if len(env.observation_space.shape) == 1:
# Examine the initial vector observation
print("Agent observations look like: \n{}".format(initial_observations))
print(f"Agent observations look like: \n{initial_observations}")
for _episode in range(10):
env.reset()

actions = env.action_space.sample()
obs, reward, done, _ = env.step(actions)
episode_rewards += reward
print("Total reward this episode: {}".format(episode_rewards))
print(f"Total reward this episode: {episode_rewards}")
finally:
env.close()

2
ml-agents/tests/yamato/scripts/run_llapi.py


if tracked_agent in terminal_steps:
episode_rewards += terminal_steps[tracked_agent].reward
done = True
print("Total reward this episode: {}".format(episode_rewards))
print(f"Total reward this episode: {episode_rewards}")
finally:
env.close()

正在加载...
取消
保存