# # Unity ML-Agents Toolkit
# ## ML-Agent Learning (Behavioral Cloning)
# Contains an implementation of Behavioral Cloning Algorithm

import logging
import copy

from mlagents.trainers.bc.trainer import BCTrainer
from mlagents.trainers.demo_loader import demo_to_buffer
from mlagents.trainers.trainer import UnityTrainerException

logger = logging.getLogger("mlagents.trainers")


class OfflineBCTrainer(BCTrainer):
    """The OfflineBCTrainer is an implementation of Offline Behavioral Cloning."""

    def __init__(self, brain, trainer_parameters, training, load, seed, run_id):
        """
        Responsible for collecting experiences and training PPO model.
        :param  trainer_parameters: The parameters for the trainer (dictionary).
        :param training: Whether the trainer is set for training.
        :param load: Whether the model should be loaded.
        :param seed: The seed the model will be initialized with
        :param run_id: The identifier of the current run
        """
        super(OfflineBCTrainer, self).__init__(
            brain, trainer_parameters, training, load, seed, run_id
        )

        self.param_keys = [
            "batch_size",
            "summary_freq",
            "max_steps",
            "batches_per_epoch",
            "use_recurrent",
            "hidden_units",
            "learning_rate",
            "num_layers",
            "sequence_length",
            "memory_size",
            "model_path",
            "demo_path",
        ]

        self.check_param_keys()
        self.batches_per_epoch = trainer_parameters["batches_per_epoch"]
        self.n_sequences = max(
            int(trainer_parameters["batch_size"] / self.policy.sequence_length), 1
        )

        brain_params, self.demonstration_buffer = demo_to_buffer(
            trainer_parameters["demo_path"], self.policy.sequence_length
        )

        policy_brain = copy.deepcopy(brain.__dict__)
        expert_brain = copy.deepcopy(brain_params.__dict__)
        policy_brain.pop("brain_name")
        expert_brain.pop("brain_name")
        policy_brain.pop("vector_action_descriptions")
        expert_brain.pop("vector_action_descriptions")
        if expert_brain != policy_brain:
            raise UnityTrainerException(
                "The provided demonstration is not compatible with the "
                "brain being used for performance evaluation."
            )