浏览代码

[change] Organize trainer files a bit better (#3538)

/bug-failed-api-check
GitHub 5 年前
当前提交
e4177de0
共有 42 个文件被更改,包括 39 次插入36 次删除
  1. 2
      ml-agents/mlagents/trainers/agent_processor.py
  2. 2
      ml-agents/mlagents/trainers/components/bc/model.py
  3. 2
      ml-agents/mlagents/trainers/components/bc/module.py
  4. 2
      ml-agents/mlagents/trainers/components/reward_signals/__init__.py
  5. 2
      ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py
  6. 2
      ml-agents/mlagents/trainers/components/reward_signals/curiosity/signal.py
  7. 2
      ml-agents/mlagents/trainers/components/reward_signals/gail/model.py
  8. 2
      ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py
  9. 2
      ml-agents/mlagents/trainers/components/reward_signals/reward_signal_factory.py
  10. 2
      ml-agents/mlagents/trainers/env_manager.py
  11. 2
      ml-agents/mlagents/trainers/ghost/trainer.py
  12. 4
      ml-agents/mlagents/trainers/ppo/optimizer.py
  13. 6
      ml-agents/mlagents/trainers/ppo/trainer.py
  14. 4
      ml-agents/mlagents/trainers/sac/optimizer.py
  15. 6
      ml-agents/mlagents/trainers/sac/trainer.py
  16. 2
      ml-agents/mlagents/trainers/tests/test_bcmodule.py
  17. 2
      ml-agents/mlagents/trainers/tests/test_distributions.py
  18. 2
      ml-agents/mlagents/trainers/tests/test_nn_policy.py
  19. 2
      ml-agents/mlagents/trainers/tests/test_policy.py
  20. 2
      ml-agents/mlagents/trainers/tests/test_ppo.py
  21. 2
      ml-agents/mlagents/trainers/tests/test_reward_signals.py
  22. 4
      ml-agents/mlagents/trainers/tests/test_rl_trainer.py
  23. 2
      ml-agents/mlagents/trainers/tests/test_sac.py
  24. 2
      ml-agents/mlagents/trainers/trainer/rl_trainer.py
  25. 2
      ml-agents/mlagents/trainers/trainer/trainer.py
  26. 4
      ml-agents/mlagents/trainers/optimizer/tf_optimizer.py
  27. 4
      ml-agents/mlagents/trainers/policy/nn_policy.py
  28. 1
      ml-agents/mlagents/trainers/optimizer/__init__.py
  29. 1
      ml-agents/mlagents/trainers/policy/__init__.py
  30. 1
      ml-agents/mlagents/trainers/trainer/__init__.py
  31. 0
      /ml-agents/mlagents/trainers/policy/policy.py
  32. 0
      /ml-agents/mlagents/trainers/trainer/rl_trainer.py
  33. 0
      /ml-agents/mlagents/trainers/policy/tf_policy.py
  34. 0
      /ml-agents/mlagents/trainers/trainer/trainer.py
  35. 0
      /ml-agents/mlagents/trainers/optimizer/optimizer.py
  36. 0
      /ml-agents/mlagents/trainers/optimizer/tf_optimizer.py
  37. 0
      /ml-agents/mlagents/trainers/distributions.py
  38. 0
      /ml-agents/mlagents/trainers/policy/nn_policy.py

2
ml-agents/mlagents/trainers/agent_processor.py


from mlagents_envs.base_env import BatchedStepResult, StepResult
from mlagents.trainers.trajectory import Trajectory, AgentExperience
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.policy import Policy
from mlagents.trainers.action_info import ActionInfo, ActionInfoOutputs
from mlagents.trainers.stats import StatsReporter

2
ml-agents/mlagents/trainers/components/bc/model.py


from mlagents.tf_utils import tf
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.policy.tf_policy import TFPolicy
class BCModel(object):

2
ml-agents/mlagents/trainers/components/bc/module.py


from typing import Dict, Any
import numpy as np
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.policy.tf_policy import TFPolicy
from .model import BCModel
from mlagents.trainers.demo_loader import demo_to_buffer
from mlagents.trainers.exception import UnityTrainerException

2
ml-agents/mlagents/trainers/components/reward_signals/__init__.py


from mlagents.tf_utils import tf
from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.policy.tf_policy import TFPolicy
logger = logging.getLogger("mlagents.trainers")

2
ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py


from mlagents.tf_utils import tf
from mlagents.trainers.models import ModelUtils
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.policy.tf_policy import TFPolicy
class CuriosityModel(object):

2
ml-agents/mlagents/trainers/components/reward_signals/curiosity/signal.py


from mlagents.trainers.components.reward_signals import RewardSignal, RewardSignalResult
from mlagents.trainers.components.reward_signals.curiosity.model import CuriosityModel
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.policy.tf_policy import TFPolicy
class CuriosityRewardSignal(RewardSignal):

2
ml-agents/mlagents/trainers/components/reward_signals/gail/model.py


from mlagents.tf_utils import tf
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.models import ModelUtils
EPSILON = 1e-7

2
ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py


from mlagents.tf_utils import tf
from mlagents.trainers.components.reward_signals import RewardSignal, RewardSignalResult
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.policy.tf_policy import TFPolicy
from .model import GAILModel
from mlagents.trainers.demo_loader import demo_to_buffer

2
ml-agents/mlagents/trainers/components/reward_signals/reward_signal_factory.py


from mlagents.trainers.components.reward_signals.curiosity.signal import (
CuriosityRewardSignal,
)
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.policy.tf_policy import TFPolicy
logger = logging.getLogger("mlagents.trainers")

2
ml-agents/mlagents/trainers/env_manager.py


from typing import List, Dict, NamedTuple, Iterable
from mlagents_envs.base_env import BatchedStepResult, AgentGroupSpec, AgentGroup
from mlagents.trainers.brain import BrainParameters
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.agent_processor import AgentManager, AgentManagerQueue
from mlagents.trainers.action_info import ActionInfo

2
ml-agents/mlagents/trainers/ghost/trainer.py


from mlagents.trainers.brain import BrainParameters
from mlagents.trainers.policy import Policy
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.trainer import Trainer
from mlagents.trainers.trajectory import Trajectory

4
ml-agents/mlagents/trainers/ppo/optimizer.py


from mlagents.tf_utils import tf
from mlagents_envs.timers import timed
from mlagents.trainers.models import ModelUtils, EncoderType, LearningRateSchedule
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.common.tf_optimizer import TFOptimizer
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.optimizer.tf_optimizer import TFOptimizer
from mlagents.trainers.buffer import AgentBuffer

6
ml-agents/mlagents/trainers/ppo/trainer.py


import numpy as np
from mlagents.trainers.common.nn_policy import NNPolicy
from mlagents.trainers.rl_trainer import RLTrainer
from mlagents.trainers.policy.nn_policy import NNPolicy
from mlagents.trainers.trainer.rl_trainer import RLTrainer
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.ppo.optimizer import PPOOptimizer
from mlagents.trainers.trajectory import Trajectory

4
ml-agents/mlagents/trainers/sac/optimizer.py


from mlagents.trainers.sac.network import SACPolicyNetwork, SACTargetNetwork
from mlagents.trainers.models import LearningRateSchedule, EncoderType, ModelUtils
from mlagents.trainers.common.tf_optimizer import TFOptimizer
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.optimizer.tf_optimizer import TFOptimizer
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.buffer import AgentBuffer
from mlagents_envs.timers import timed

6
ml-agents/mlagents/trainers/sac/trainer.py


from mlagents_envs.timers import timed
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.common.nn_policy import NNPolicy
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.policy.nn_policy import NNPolicy
from mlagents.trainers.rl_trainer import RLTrainer
from mlagents.trainers.trainer.rl_trainer import RLTrainer
from mlagents.trainers.trajectory import Trajectory, SplitObservations
from mlagents.trainers.brain import BrainParameters

2
ml-agents/mlagents/trainers/tests/test_bcmodule.py


import yaml
import os
from mlagents.trainers.common.nn_policy import NNPolicy
from mlagents.trainers.policy.nn_policy import NNPolicy
from mlagents.trainers.components.bc.module import BCModule

2
ml-agents/mlagents/trainers/tests/test_distributions.py


import yaml
from mlagents.trainers.common.distributions import (
from mlagents.trainers.distributions import (
GaussianDistribution,
MultiCategoricalDistribution,
)

2
ml-agents/mlagents/trainers/tests/test_nn_policy.py


import yaml
from mlagents.trainers.common.nn_policy import NNPolicy
from mlagents.trainers.policy.nn_policy import NNPolicy
from mlagents.trainers.models import EncoderType, ModelUtils
from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.brain import BrainParameters, CameraResolution

2
ml-agents/mlagents/trainers/tests/test_policy.py


from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents_envs.base_env import BatchedStepResult, AgentGroupSpec
from mlagents.trainers.action_info import ActionInfo
from unittest.mock import MagicMock

2
ml-agents/mlagents/trainers/tests/test_ppo.py


from mlagents.trainers.ppo.trainer import PPOTrainer, discount_rewards
from mlagents.trainers.ppo.optimizer import PPOOptimizer
from mlagents.trainers.common.nn_policy import NNPolicy
from mlagents.trainers.policy.nn_policy import NNPolicy
from mlagents.trainers.brain import BrainParameters
from mlagents.trainers.agent_processor import AgentManagerQueue
from mlagents.trainers.tests import mock_brain as mb

2
ml-agents/mlagents/trainers/tests/test_reward_signals.py


import yaml
import os
import mlagents.trainers.tests.mock_brain as mb
from mlagents.trainers.common.nn_policy import NNPolicy
from mlagents.trainers.policy.nn_policy import NNPolicy
from mlagents.trainers.sac.optimizer import SACOptimizer
from mlagents.trainers.ppo.optimizer import PPOOptimizer

4
ml-agents/mlagents/trainers/tests/test_rl_trainer.py


import yaml
from unittest import mock
import mlagents.trainers.tests.mock_brain as mb
from mlagents.trainers.rl_trainer import RLTrainer
from mlagents.trainers.trainer.rl_trainer import RLTrainer
from mlagents.trainers.tests.test_buffer import construct_fake_buffer
from mlagents.trainers.agent_processor import AgentManagerQueue

assert len(arr) == 0
@mock.patch("mlagents.trainers.rl_trainer.RLTrainer.clear_update_buffer")
@mock.patch("mlagents.trainers.trainer.rl_trainer.RLTrainer.clear_update_buffer")
def test_advance(mocked_clear_update_buffer):
trainer = create_rl_trainer()
trajectory_queue = AgentManagerQueue("testbrain")

2
ml-agents/mlagents/trainers/tests/test_sac.py


from mlagents.trainers.sac.trainer import SACTrainer
from mlagents.trainers.sac.optimizer import SACOptimizer
from mlagents.trainers.common.nn_policy import NNPolicy
from mlagents.trainers.policy.nn_policy import NNPolicy
from mlagents.trainers.agent_processor import AgentManagerQueue
from mlagents.trainers.tests import mock_brain as mb
from mlagents.trainers.tests.mock_brain import make_brain_parameters

2
ml-agents/mlagents/trainers/trainer/rl_trainer.py


from typing import Dict
from collections import defaultdict
from mlagents.trainers.common.tf_optimizer import TFOptimizer
from mlagents.trainers.optimizer.tf_optimizer import TFOptimizer
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.trainer import Trainer
from mlagents.trainers.exception import UnityTrainerException

2
ml-agents/mlagents/trainers/trainer/trainer.py


from mlagents_envs.timers import set_gauge
from mlagents.model_serialization import export_policy_model, SerializationSettings
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.stats import StatsReporter
from mlagents.trainers.trajectory import Trajectory
from mlagents.trainers.agent_processor import AgentManagerQueue

4
ml-agents/mlagents/trainers/optimizer/tf_optimizer.py


from mlagents.tf_utils.tf import tf
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.common.optimizer import Optimizer
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.optimizer import Optimizer
from mlagents.trainers.trajectory import SplitObservations
from mlagents.trainers.components.reward_signals.reward_signal_factory import (
create_reward_signal,

4
ml-agents/mlagents/trainers/policy/nn_policy.py


from mlagents.trainers.brain import BrainParameters
from mlagents.trainers.models import EncoderType
from mlagents.trainers.models import ModelUtils
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.common.distributions import (
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.distributions import (
GaussianDistribution,
MultiCategoricalDistribution,
)

1
ml-agents/mlagents/trainers/optimizer/__init__.py


from mlagents.trainers.optimizer.optimizer import Optimizer # noqa

1
ml-agents/mlagents/trainers/policy/__init__.py


from mlagents.trainers.policy.policy import Policy # noqa

1
ml-agents/mlagents/trainers/trainer/__init__.py


from mlagents.trainers.trainer.trainer import Trainer # noqa

/ml-agents/mlagents/trainers/policy.py → /ml-agents/mlagents/trainers/policy/policy.py

/ml-agents/mlagents/trainers/rl_trainer.py → /ml-agents/mlagents/trainers/trainer/rl_trainer.py

/ml-agents/mlagents/trainers/tf_policy.py → /ml-agents/mlagents/trainers/policy/tf_policy.py

/ml-agents/mlagents/trainers/trainer.py → /ml-agents/mlagents/trainers/trainer/trainer.py

/ml-agents/mlagents/trainers/common/optimizer.py → /ml-agents/mlagents/trainers/optimizer/optimizer.py

/ml-agents/mlagents/trainers/common/tf_optimizer.py → /ml-agents/mlagents/trainers/optimizer/tf_optimizer.py

/ml-agents/mlagents/trainers/common/distributions.py → /ml-agents/mlagents/trainers/distributions.py

/ml-agents/mlagents/trainers/common/nn_policy.py → /ml-agents/mlagents/trainers/policy/nn_policy.py

正在加载...
取消
保存