from mlagents_envs.logging_util import get_logger
from typing import Deque, Dict
from collections import deque
from mlagents.trainers.ghost.trainer import GhostTrainer

logger = get_logger(__name__)


class GhostController:
    """
    GhostController contains a queue of team ids. GhostTrainers subscribe to the GhostController and query
    it to get the current learning team.  The GhostController cycles through team ids every 'swap_interval'
    which corresponds to the number of trainer steps between changing learning teams.
    The GhostController is a unique object and there can only be one per training run.
    """

    def __init__(self, maxlen: int = 10):
        """
        Create a GhostController.
        :param maxlen: Maximum number of GhostTrainers allowed in this GhostController
        """

        # Tracks last swap step for  each learning team because trainer
        # steps of all GhostTrainers do not increment together
        self._queue: Deque[int] = deque(maxlen=maxlen)
        self._learning_team: int = -1
        # Dict from team id to GhostTrainer for ELO calculation
        self._ghost_trainers: Dict[int, GhostTrainer] = {}
        # Signals to the trainer control to perform a hard change_training_team
        self._changed_training_team = False

    @property
    def get_learning_team(self) -> int:
        """
        Returns the current learning team.
        :return: The learning team id
        """
        return self._learning_team

    def should_reset(self) -> bool:
        """
        Whether or not team change occurred. Causes full reset in trainer_controller
        :return: The truth value of the team changing
        """
        changed_team = self._changed_training_team
        if self._changed_training_team:
            self._changed_training_team = False
        return changed_team

    def subscribe_team_id(self, team_id: int, trainer: GhostTrainer) -> None:
        """
        Given a team_id and trainer, add to queue and trainers if not already.
        The GhostTrainer is used later by the controller to get ELO ratings of agents.
        :param team_id: The team_id of an agent managed by this GhostTrainer
        :param trainer: A GhostTrainer that manages this team_id.
        """
        if team_id not in self._ghost_trainers:
            self._ghost_trainers[team_id] = trainer
            if self._learning_team < 0:
                self._learning_team = team_id
            else:
                self._queue.append(team_id)

    def change_training_team(self, step: int) -> None:
        """
        The current learning team is added to the end of the queue and then updated with the
        next in line.
        :param step: The step of the trainer for debugging
        """
        self._queue.append(self._learning_team)
        self._learning_team = self._queue.popleft()
        logger.debug(
            "Learning team {} swapped on step {}".format(self._learning_team, step)
        )
        self._changed_training_team = True

    # Adapted from https://github.com/Unity-Technologies/ml-agents/pull/1975 and
    # https://metinmediamath.wordpress.com/2013/11/27/how-to-calculate-the-elo-rating-including-example/
    # ELO calculation
    # TODO : Generalize this to more than two teams
    def compute_elo_rating_changes(self, rating: float, result: float) -> float:
        """
        Calculates ELO. Given the rating of the learning team and result.  The GhostController
        queries the other GhostTrainers for the ELO of their agent that is currently being deployed.
        Note, this could be the current agent or a past snapshot.
        :param rating: Rating of the learning team.
        :param result: Win, loss, or draw from the perspective of the learning team.
        :return: The change in ELO.
        """
        opponent_rating: float = 0.0
        for team_id, trainer in self._ghost_trainers.items():
            if team_id != self._learning_team:
                opponent_rating = trainer.get_opponent_elo()
        r1 = pow(10, rating / 400)
        r2 = pow(10, opponent_rating / 400)

        summed = r1 + r2
        e1 = r1 / summed

        change = result - e1
        for team_id, trainer in self._ghost_trainers.items():
            if team_id != self._learning_team:
                trainer.change_opponent_elo(change)

        return change