import attr
import cattr
from typing import Dict, Optional, List, Any, DefaultDict, Mapping, Tuple
from enum import Enum
import collections
import argparse
import abc

from mlagents.trainers.cli_utils import StoreConfigFile, DetectDefault, parser
from mlagents.trainers.cli_utils import load_config
from mlagents.trainers.exception import TrainerConfigError
from mlagents.trainers.models import ScheduleType, EncoderType

from mlagents_envs import logging_util
from mlagents_envs.side_channel.environment_parameters_channel import (
    EnvironmentParametersChannel,
)

logger = logging_util.get_logger(__name__)


def check_and_structure(key: str, value: Any, class_type: type) -> Any:
    attr_fields_dict = attr.fields_dict(class_type)
    if key not in attr_fields_dict:
        raise TrainerConfigError(
            f"The option {key} was specified in your YAML file for {class_type.__name__}, but is invalid."
        )
    # Apply cattr structure to the values
    return cattr.structure(value, attr_fields_dict[key].type)


def strict_to_cls(d: Mapping, t: type) -> Any:
    if not isinstance(d, Mapping):
        raise TrainerConfigError(f"Unsupported config {d} for {t.__name__}.")
    d_copy: Dict[str, Any] = {}
    d_copy.update(d)
    for key, val in d_copy.items():
        d_copy[key] = check_and_structure(key, val, t)
    return t(**d_copy)


def defaultdict_to_dict(d: DefaultDict) -> Dict:
    return {key: cattr.unstructure(val) for key, val in d.items()}


@attr.s(auto_attribs=True)
class ExportableSettings:
    def as_dict(self):
        return cattr.unstructure(self)


@attr.s(auto_attribs=True)
class NetworkSettings:
    @attr.s(auto_attribs=True)
    class MemorySettings:
        sequence_length: int = 64
        memory_size: int = 128

    normalize: bool = False
    hidden_units: int = 128
    num_layers: int = 2
    vis_encode_type: EncoderType = EncoderType.SIMPLE
    memory: Optional[MemorySettings] = None


@attr.s(auto_attribs=True)
class BehavioralCloningSettings:
    demo_path: str
    steps: int = 0
    strength: float = 1.0
    samples_per_update: int = 0
    # Setting either of these to None will allow the Optimizer
    # to decide these parameters, based on Trainer hyperparams
    num_epoch: Optional[int] = None
    batch_size: Optional[int] = None


@attr.s(auto_attribs=True)
class HyperparamSettings:
    batch_size: int = 1024
    buffer_size: int = 10240
    learning_rate: float = 3.0e-4
    learning_rate_schedule: ScheduleType = ScheduleType.CONSTANT


@attr.s(auto_attribs=True)
class PPOSettings(HyperparamSettings):
    beta: float = 5.0e-3
    epsilon: float = 0.2
    lambd: float = 0.95
    num_epoch: int = 3
    learning_rate_schedule: ScheduleType = ScheduleType.LINEAR

@attr.s(auto_attribs=True)
class PPOTransferSettings(HyperparamSettings):
    beta: float = 5.0e-3
    epsilon: float = 0.2
    lambd: float = 0.95
    num_epoch: int = 3
    learning_rate_schedule: ScheduleType = ScheduleType.LINEAR
    model_schedule: ScheduleType = ScheduleType.LINEAR

    separate_value_train: bool = False
    separate_policy_train: bool = False
    separate_model_train: bool = False
    separate_value_net: bool = False
    use_var_encoder: bool = False
    use_var_predict: bool = False
    with_prior: bool = False
    use_inverse_model: bool = False
    predict_return: bool = False
    reuse_encoder: bool = False
    use_alter: bool = False
    in_batch_alter: bool = False
    in_epoch_alter: bool = False
    use_op_buffer: bool = False
    train_encoder: bool = True
    train_action: bool = True
    train_model: bool = True
    train_policy: bool = True
    train_value: bool = True
    use_bisim: bool = False
        
    # Transfer
    use_transfer: bool = False
    smart_transfer: bool = False
    conv_thres: float = 1e-3
    transfer_path: str = ""
    load_model: bool = True
    load_value: bool = False
    load_policy: bool = False
    load_encoder: bool = False
    load_action: bool = False

    # Network
    encoder_layers: int = 1
    action_layers: int = -1
    policy_layers: int = 1
    value_layers: int = 1
    forward_layers: int = 1
    inverse_layers: int = 1
    feature_size: int = 16
    action_feature_size: int = 16


@attr.s(auto_attribs=True)
class SACSettings(HyperparamSettings):
    batch_size: int = 128
    buffer_size: int = 50000
    buffer_init_steps: int = 0
    tau: float = 0.005
    steps_per_update: float = 1
    save_replay_buffer: bool = False
    init_entcoef: float = 1.0
    reward_signal_steps_per_update: float = attr.ib()

    @reward_signal_steps_per_update.default
    def _reward_signal_steps_per_update_default(self):
        return self.steps_per_update

@attr.s(auto_attribs=True)
class SACTransferSettings(SACSettings):
    model_schedule: ScheduleType = ScheduleType.LINEAR

    separate_value_train: bool = False
    separate_policy_train: bool = False
    separate_model_train: bool = False
    separate_value_net: bool = False
    use_var_encoder: bool = False
    use_var_predict: bool = False
    with_prior: bool = False
    use_inverse_model: bool = False
    predict_return: bool = False
    reuse_encoder: bool = False
    use_alter: bool = False
    in_batch_alter: bool = False
    in_epoch_alter: bool = False
    use_op_buffer: bool = False
    train_encoder: bool = True
    train_action: bool = True
    train_model: bool = True
    train_policy: bool = True
    train_value: bool = True
    use_bisim: bool = False
        
    # Transfer
    use_transfer: bool = False
    smart_transfer: bool = False
    conv_thres: float = 1e-3
    transfer_path: str = ""
    load_model: bool = True
    load_value: bool = False
    load_policy: bool = False
    load_encoder: bool = False
    load_action: bool = False

    # Network
    encoder_layers: int = 1
    action_layers: int = -1
    policy_layers: int = 1
    value_layers: int = 1
    forward_layers: int = 1
    inverse_layers: int = 1
    feature_size: int = 16
    action_feature_size: int = 16

class RewardSignalType(Enum):
    EXTRINSIC: str = "extrinsic"
    GAIL: str = "gail"
    CURIOSITY: str = "curiosity"

    def to_settings(self) -> type:
        _mapping = {
            RewardSignalType.EXTRINSIC: RewardSignalSettings,
            RewardSignalType.GAIL: GAILSettings,
            RewardSignalType.CURIOSITY: CuriositySettings,
        }
        return _mapping[self]


@attr.s(auto_attribs=True)
class RewardSignalSettings:
    gamma: float = 0.99
    strength: float = 1.0

    @staticmethod
    def structure(d: Mapping, t: type) -> Any:
        """
        Helper method to structure a Dict of RewardSignalSettings class. Meant to be registered with
        cattr.register_structure_hook() and called with cattr.structure(). This is needed to handle
        the special Enum selection of RewardSignalSettings classes.
        """
        if not isinstance(d, Mapping):
            raise TrainerConfigError(f"Unsupported reward signal configuration {d}.")
        d_final: Dict[RewardSignalType, RewardSignalSettings] = {}
        for key, val in d.items():
            enum_key = RewardSignalType(key)
            t = enum_key.to_settings()
            d_final[enum_key] = strict_to_cls(val, t)
        return d_final


@attr.s(auto_attribs=True)
class GAILSettings(RewardSignalSettings):
    encoding_size: int = 64
    learning_rate: float = 3e-4
    use_actions: bool = False
    use_vail: bool = False
    demo_path: str = attr.ib(kw_only=True)


@attr.s(auto_attribs=True)
class CuriositySettings(RewardSignalSettings):
    encoding_size: int = 64
    learning_rate: float = 3e-4


class ParameterRandomizationType(Enum):
    UNIFORM: str = "uniform"
    GAUSSIAN: str = "gaussian"
    MULTIRANGEUNIFORM: str = "multirangeuniform"

    def to_settings(self) -> type:
        _mapping = {
            ParameterRandomizationType.UNIFORM: UniformSettings,
            ParameterRandomizationType.GAUSSIAN: GaussianSettings,
            ParameterRandomizationType.MULTIRANGEUNIFORM: MultiRangeUniformSettings,
        }
        return _mapping[self]


@attr.s(auto_attribs=True)
class ParameterRandomizationSettings(abc.ABC):
    seed: int = parser.get_default("seed")

    @staticmethod
    def structure(d: Mapping, t: type) -> Any:
        """
        Helper method to structure a Dict of ParameterRandomizationSettings class. Meant to be registered with
        cattr.register_structure_hook() and called with cattr.structure(). This is needed to handle
        the special Enum selection of ParameterRandomizationSettings classes.
        """
        if not isinstance(d, Mapping):
            raise TrainerConfigError(
                f"Unsupported parameter randomization configuration {d}."
            )
        d_final: Dict[str, List[float]] = {}
        for environment_parameter, environment_parameter_config in d.items():
            if environment_parameter == "resampling-interval":
                logger.warning(
                    "The resampling-interval is no longer necessary for parameter randomization. It is being ignored."
                )
                continue
            if "sampler_type" not in environment_parameter_config:
                raise TrainerConfigError(
                    f"Sampler configuration for {environment_parameter} does not contain sampler_type."
                )
            if "sampler_parameters" not in environment_parameter_config:
                raise TrainerConfigError(
                    f"Sampler configuration for {environment_parameter} does not contain sampler_parameters."
                )
            enum_key = ParameterRandomizationType(
                environment_parameter_config["sampler_type"]
            )
            t = enum_key.to_settings()
            d_final[environment_parameter] = strict_to_cls(
                environment_parameter_config["sampler_parameters"], t
            )
        return d_final

    @abc.abstractmethod
    def apply(self, key: str, env_channel: EnvironmentParametersChannel) -> None:
        """
        Helper method to send sampler settings over EnvironmentParametersChannel
        Calls the appropriate sampler type set method.
        :param key: environment parameter to be sampled
        :param env_channel: The EnvironmentParametersChannel to communicate sampler settings to environment
        """
        pass


@attr.s(auto_attribs=True)
class UniformSettings(ParameterRandomizationSettings):
    min_value: float = attr.ib()
    max_value: float = 1.0

    @min_value.default
    def _min_value_default(self):
        return 0.0

    @min_value.validator
    def _check_min_value(self, attribute, value):
        if self.min_value > self.max_value:
            raise TrainerConfigError(
                "Minimum value is greater than maximum value in uniform sampler."
            )

    def apply(self, key: str, env_channel: EnvironmentParametersChannel) -> None:
        """
        Helper method to send sampler settings over EnvironmentParametersChannel
        Calls the uniform sampler type set method.
        :param key: environment parameter to be sampled
        :param env_channel: The EnvironmentParametersChannel to communicate sampler settings to environment
        """
        env_channel.set_uniform_sampler_parameters(
            key, self.min_value, self.max_value, self.seed
        )


@attr.s(auto_attribs=True)
class GaussianSettings(ParameterRandomizationSettings):
    mean: float = 1.0
    st_dev: float = 1.0

    def apply(self, key: str, env_channel: EnvironmentParametersChannel) -> None:
        """
        Helper method to send sampler settings over EnvironmentParametersChannel
        Calls the gaussian sampler type set method.
        :param key: environment parameter to be sampled
        :param env_channel: The EnvironmentParametersChannel to communicate sampler settings to environment
        """
        env_channel.set_gaussian_sampler_parameters(
            key, self.mean, self.st_dev, self.seed
        )


@attr.s(auto_attribs=True)
class MultiRangeUniformSettings(ParameterRandomizationSettings):
    intervals: List[Tuple[float, float]] = attr.ib()

    @intervals.default
    def _intervals_default(self):
        return [[0.0, 1.0]]

    @intervals.validator
    def _check_intervals(self, attribute, value):
        for interval in self.intervals:
            if len(interval) != 2:
                raise TrainerConfigError(
                    f"The sampling interval {interval} must contain exactly two values."
                )
            min_value, max_value = interval
            if min_value > max_value:
                raise TrainerConfigError(
                    f"Minimum value is greater than maximum value in interval {interval}."
                )

    def apply(self, key: str, env_channel: EnvironmentParametersChannel) -> None:
        """
        Helper method to send sampler settings over EnvironmentParametersChannel
        Calls the multirangeuniform sampler type set method.
        :param key: environment parameter to be sampled
        :param env_channel: The EnvironmentParametersChannel to communicate sampler settings to environment
        """
        env_channel.set_multirangeuniform_sampler_parameters(
            key, self.intervals, self.seed
        )


@attr.s(auto_attribs=True)
class SelfPlaySettings:
    save_steps: int = 20000
    team_change: int = attr.ib()

    @team_change.default
    def _team_change_default(self):
        # Assign team_change to about 4x save_steps
        return self.save_steps * 5

    swap_steps: int = 2000
    window: int = 10
    play_against_latest_model_ratio: float = 0.5
    initial_elo: float = 1200.0


class TrainerType(Enum):
    PPO: str = "ppo"
    SAC: str = "sac"
    PPO_Transfer: str = "ppo_transfer"
    SAC_Transfer: str = "sac_transfer"

    def to_settings(self) -> type:
        _mapping = {TrainerType.PPO: PPOSettings, TrainerType.SAC: SACSettings, 
        TrainerType.PPO_Transfer: PPOTransferSettings, TrainerType.SAC_Transfer: SACTransferSettings}
        return _mapping[self]


@attr.s(auto_attribs=True)
class TrainerSettings(ExportableSettings):
    trainer_type: TrainerType = TrainerType.PPO
    hyperparameters: HyperparamSettings = attr.ib()

    @hyperparameters.default
    def _set_default_hyperparameters(self):
        return self.trainer_type.to_settings()()

    network_settings: NetworkSettings = attr.ib(factory=NetworkSettings)
    reward_signals: Dict[RewardSignalType, RewardSignalSettings] = attr.ib(
        factory=lambda: {RewardSignalType.EXTRINSIC: RewardSignalSettings()}
    )
    init_path: Optional[str] = None
    keep_checkpoints: int = 5
    checkpoint_interval: int = 500000
    max_steps: int = 500000
    time_horizon: int = 64
    summary_freq: int = 50000
    threaded: bool = True
    self_play: Optional[SelfPlaySettings] = None
    behavioral_cloning: Optional[BehavioralCloningSettings] = None

    cattr.register_structure_hook(
        Dict[RewardSignalType, RewardSignalSettings], RewardSignalSettings.structure
    )

    @network_settings.validator
    def _check_batch_size_seq_length(self, attribute, value):
        if self.network_settings.memory is not None:
            if (
                self.network_settings.memory.sequence_length
                > self.hyperparameters.batch_size
            ):
                raise TrainerConfigError(
                    "When using memory, sequence length must be less than or equal to batch size. "
                )

    @staticmethod
    def dict_to_defaultdict(d: Dict, t: type) -> DefaultDict:
        return collections.defaultdict(
            TrainerSettings, cattr.structure(d, Dict[str, TrainerSettings])
        )

    @staticmethod
    def structure(d: Mapping, t: type) -> Any:
        """
        Helper method to structure a TrainerSettings class. Meant to be registered with
        cattr.register_structure_hook() and called with cattr.structure().
        """
        if not isinstance(d, Mapping):
            raise TrainerConfigError(f"Unsupported config {d} for {t.__name__}.")
        d_copy: Dict[str, Any] = {}
        d_copy.update(d)

        for key, val in d_copy.items():
            if attr.has(type(val)):
                # Don't convert already-converted attrs classes.
                continue
            if key == "hyperparameters":
                if "trainer_type" not in d_copy:
                    raise TrainerConfigError(
                        "Hyperparameters were specified but no trainer_type was given."
                    )
                else:
                    d_copy[key] = strict_to_cls(
                        d_copy[key], TrainerType(d_copy["trainer_type"]).to_settings()
                    )
            elif key == "max_steps":
                d_copy[key] = int(float(val))
                # In some legacy configs, max steps was specified as a float
            else:
                d_copy[key] = check_and_structure(key, val, t)
        return t(**d_copy)


@attr.s(auto_attribs=True)
class CurriculumSettings:
    class MeasureType:
        PROGRESS: str = "progress"
        REWARD: str = "reward"

    measure: str = attr.ib(default=MeasureType.REWARD)
    thresholds: List[float] = attr.ib(factory=list)
    min_lesson_length: int = 0
    signal_smoothing: bool = True
    parameters: Dict[str, List[float]] = attr.ib(kw_only=True)


@attr.s(auto_attribs=True)
class CheckpointSettings:
    run_id: str = parser.get_default("run_id")
    initialize_from: str = parser.get_default("initialize_from")
    load_model: bool = parser.get_default("load_model")
    resume: bool = parser.get_default("resume")
    force: bool = parser.get_default("force")
    train_model: bool = parser.get_default("train_model")
    inference: bool = parser.get_default("inference")


@attr.s(auto_attribs=True)
class EnvironmentSettings:
    env_path: Optional[str] = parser.get_default("env_path")
    env_args: Optional[List[str]] = parser.get_default("env_args")
    base_port: int = parser.get_default("base_port")
    num_envs: int = parser.get_default("num_envs")
    seed: int = parser.get_default("seed")


@attr.s(auto_attribs=True)
class EngineSettings:
    width: int = parser.get_default("width")
    height: int = parser.get_default("height")
    quality_level: int = parser.get_default("quality_level")
    time_scale: float = parser.get_default("time_scale")
    target_frame_rate: int = parser.get_default("target_frame_rate")
    capture_frame_rate: int = parser.get_default("capture_frame_rate")
    no_graphics: bool = parser.get_default("no_graphics")


@attr.s(auto_attribs=True)
class RunOptions(ExportableSettings):
    behaviors: DefaultDict[str, TrainerSettings] = attr.ib(
        factory=lambda: collections.defaultdict(TrainerSettings)
    )
    env_settings: EnvironmentSettings = attr.ib(factory=EnvironmentSettings)
    engine_settings: EngineSettings = attr.ib(factory=EngineSettings)
    parameter_randomization: Optional[Dict[str, ParameterRandomizationSettings]] = None
    curriculum: Optional[Dict[str, CurriculumSettings]] = None
    checkpoint_settings: CheckpointSettings = attr.ib(factory=CheckpointSettings)

    # These are options that are relevant to the run itself, and not the engine or environment.
    # They will be left here.
    debug: bool = parser.get_default("debug")
    # Strict conversion
    cattr.register_structure_hook(EnvironmentSettings, strict_to_cls)
    cattr.register_structure_hook(EngineSettings, strict_to_cls)
    cattr.register_structure_hook(CheckpointSettings, strict_to_cls)
    cattr.register_structure_hook(
        Dict[str, ParameterRandomizationSettings],
        ParameterRandomizationSettings.structure,
    )
    cattr.register_structure_hook(CurriculumSettings, strict_to_cls)
    cattr.register_structure_hook(TrainerSettings, TrainerSettings.structure)
    cattr.register_structure_hook(
        DefaultDict[str, TrainerSettings], TrainerSettings.dict_to_defaultdict
    )
    cattr.register_unstructure_hook(collections.defaultdict, defaultdict_to_dict)

    @staticmethod
    def from_argparse(args: argparse.Namespace) -> "RunOptions":
        """
        Takes an argparse.Namespace as specified in `parse_command_line`, loads input configuration files
        from file paths, and converts to a RunOptions instance.
        :param args: collection of command-line parameters passed to mlagents-learn
        :return: RunOptions representing the passed in arguments, with trainer config, curriculum and sampler
          configs loaded from files.
        """
        argparse_args = vars(args)
        config_path = StoreConfigFile.trainer_config_path

        # Load YAML
        configured_dict: Dict[str, Any] = {
            "checkpoint_settings": {},
            "env_settings": {},
            "engine_settings": {},
        }
        if config_path is not None:
            configured_dict.update(load_config(config_path))

        # Use the YAML file values for all values not specified in the CLI.
        for key in configured_dict.keys():
            # Detect bad config options
            if key not in attr.fields_dict(RunOptions):
                raise TrainerConfigError(
                    "The option {} was specified in your YAML file, but is invalid.".format(
                        key
                    )
                )
        # Override with CLI args
        # Keep deprecated --load working, TODO: remove
        argparse_args["resume"] = argparse_args["resume"] or argparse_args["load_model"]
        for key, val in argparse_args.items():
            if key in DetectDefault.non_default_args:
                if key in attr.fields_dict(CheckpointSettings):
                    configured_dict["checkpoint_settings"][key] = val
                elif key in attr.fields_dict(EnvironmentSettings):
                    configured_dict["env_settings"][key] = val
                elif key in attr.fields_dict(EngineSettings):
                    configured_dict["engine_settings"][key] = val
                else:  # Base options
                    configured_dict[key] = val
        return RunOptions.from_dict(configured_dict)

    @staticmethod
    def from_dict(options_dict: Dict[str, Any]) -> "RunOptions":
        return cattr.structure(options_dict, RunOptions)