浏览代码

Add --namespace-packages to mypy for mlagents (#3075)

/develop
GitHub 5 年前
当前提交
90db165f
共有 10 个文件被更改,包括 34 次插入21 次删除
  1. 6
      .pre-commit-config.yaml
  2. 4
      ml-agents/mlagents/trainers/action_info.py
  3. 4
      ml-agents/mlagents/trainers/demo_loader.py
  4. 9
      ml-agents/mlagents/trainers/env_manager.py
  5. 7
      ml-agents/mlagents/trainers/ppo/trainer.py
  6. 12
      ml-agents/mlagents/trainers/sac/trainer.py
  7. 4
      ml-agents/mlagents/trainers/simple_env_manager.py
  8. 4
      ml-agents/mlagents/trainers/subprocess_env_manager.py
  9. 2
      ml-agents/mlagents/trainers/tests/test_policy.py
  10. 3
      ml-agents/mlagents/trainers/tf_policy.py

6
.pre-commit-config.yaml


- repo: https://github.com/pre-commit/mirrors-mypy
rev: v0.750
# Currently mypy may assert after logging one message. To get all the messages at once, change repo and rev to
# repo: https://github.com/chriselion/mypy
# rev: 3d0b6164a9487a6c5cf9d144110b86600fd85e25
# This is a fork with the assert disabled, although precommit has trouble installing it sometimes.
args: [--ignore-missing-imports, --disallow-incomplete-defs]
args: [--ignore-missing-imports, --disallow-incomplete-defs, --namespace-packages]
- id: mypy
name: mypy-ml-agents-envs
files: "ml-agents-envs/.*"

4
ml-agents/mlagents/trainers/action_info.py


from typing import NamedTuple, Any, Dict, Optional
from typing import NamedTuple, Any, Dict
ActionInfoOutputs = Optional[Dict[str, Any]]
ActionInfoOutputs = Dict[str, Any]
class ActionInfo(NamedTuple):

4
ml-agents/mlagents/trainers/demo_loader.py


break
pos += next_pos
obs_decoded += 1
if not brain_params:
raise RuntimeError(
f"No BrainParameters found in demonstration file at {file_path}."
)
return brain_params, info_action_pairs, total_expected

9
ml-agents/mlagents/trainers/env_manager.py


from abc import ABC, abstractmethod
from typing import List, Dict, NamedTuple, Optional
from typing import List, Dict, NamedTuple
from mlagents.trainers.brain import AllBrainInfo, BrainParameters
from mlagents.trainers.policy import Policy
from mlagents.trainers.action_info import ActionInfo

previous_all_brain_info: Optional[AllBrainInfo]
previous_all_brain_info: AllBrainInfo
brain_name_to_action_info: Optional[Dict[str, ActionInfo]]
brain_name_to_action_info: Dict[str, ActionInfo]
self.brain_name_to_action_info is not None
and brain_name in self.brain_name_to_action_info
brain_name in self.brain_name_to_action_info
and self.brain_name_to_action_info[brain_name].outputs is not None
)

7
ml-agents/mlagents/trainers/ppo/trainer.py


self.check_param_keys()
if multi_gpu and len(get_devices()) > 1:
self.policy = MultiGpuPPOPolicy(
self.ppo_policy = MultiGpuPPOPolicy(
self.policy = PPOPolicy(
self.ppo_policy = PPOPolicy(
self.policy = self.ppo_policy
for _reward_signal in self.policy.reward_signals.keys():
self.collected_rewards[_reward_signal] = {}

else:
bootstrapping_info = next_info
idx = l
value_next = self.policy.get_value_estimates(
value_next = self.ppo_policy.get_value_estimates(
bootstrapping_info,
idx,
next_info.local_done[l] and not next_info.max_reached[l],

12
ml-agents/mlagents/trainers/sac/trainer.py


if "save_replay_buffer" in trainer_parameters
else False
)
self.policy = SACPolicy(seed, brain, trainer_parameters, self.is_training, load)
self.sac_policy = SACPolicy(
seed, brain, trainer_parameters, self.is_training, load
)
self.policy = self.sac_policy
# Load the replay buffer if load
if load and self.checkpoint_replay_buffer:

for stat, stat_list in batch_update_stats.items():
self.stats[stat].append(np.mean(stat_list))
if self.policy.bc_module:
update_stats = self.policy.bc_module.update()
bc_module = self.sac_policy.bc_module
if bc_module:
update_stats = bc_module.update()
for stat, val in update_stats.items():
self.stats[stat].append(val)

self.trainer_parameters["batch_size"],
sequence_length=self.policy.sequence_length,
)
update_stats = self.policy.update_reward_signals(
update_stats = self.sac_policy.update_reward_signals(
reward_signal_minibatches, n_sequences
)
for stat_name, value in update_stats.items():

4
ml-agents/mlagents/trainers/simple_env_manager.py


super().__init__()
self.shared_float_properties = float_prop_channel
self.env = env
self.previous_step: EnvironmentStep = EnvironmentStep(None, {}, None)
self.previous_step: EnvironmentStep = EnvironmentStep({}, {}, {})
self.previous_all_action_info: Dict[str, ActionInfo] = {}
def step(self) -> List[EnvironmentStep]:

self.shared_float_properties.set_property(k, v)
self.env.reset()
all_brain_info = self._generate_all_brain_info()
self.previous_step = EnvironmentStep(None, all_brain_info, None)
self.previous_step = EnvironmentStep({}, all_brain_info, {})
return [self.previous_step]
@property

4
ml-agents/mlagents/trainers/subprocess_env_manager.py


self.process = process
self.worker_id = worker_id
self.conn = conn
self.previous_step: EnvironmentStep = EnvironmentStep(None, {}, None)
self.previous_step: EnvironmentStep = EnvironmentStep({}, {}, {})
self.previous_all_action_info: Dict[str, ActionInfo] = {}
self.waiting = False

ew.send("reset", config)
# Next (synchronously) collect the reset observations from each worker in sequence
for ew in self.env_workers:
ew.previous_step = EnvironmentStep(None, ew.recv().payload, None)
ew.previous_step = EnvironmentStep({}, ew.recv().payload, {})
return list(map(lambda ew: ew.previous_step, self.env_workers))
@property

2
ml-agents/mlagents/trainers/tests/test_policy.py


policy = TFPolicy(test_seed, basic_mock_brain(), basic_params())
no_agent_brain_info = BrainInfo([], [], [], agents=[])
result = policy.get_action(no_agent_brain_info)
assert result == ActionInfo([], [], None)
assert result == ActionInfo([], [], {})
def test_take_action_returns_nones_on_missing_values():

3
ml-agents/mlagents/trainers/tf_policy.py


self.brain = brain
self.use_recurrent = trainer_parameters["use_recurrent"]
self.memory_dict: Dict[str, np.ndarray] = {}
self.reward_signals: Dict[str, "RewardSignal"] = {}
self.num_branches = len(self.brain.vector_action_space_size)
self.previous_action_dict: Dict[str, np.array] = {}
self.normalize = trainer_parameters.get("normalize", False)

to be passed to add experiences
"""
if len(brain_info.agents) == 0:
return ActionInfo([], [], None)
return ActionInfo([], [], {})
agents_done = [
agent

正在加载...
取消
保存