import pytest
import attr


from mlagents.trainers.tests.simple_test_envs import (
    SimpleEnvironment,
    MemoryEnvironment,
    RecordEnvironment,
)

from mlagents.trainers.demo_loader import write_demo

from mlagents.trainers.settings import (
    NetworkSettings,
    SelfPlaySettings,
    BehavioralCloningSettings,
    GAILSettings,
    RewardSignalType,
    EncoderType,
    FrameworkType,
)

from mlagents_envs.communicator_objects.demonstration_meta_pb2 import (
    DemonstrationMetaProto,
)
from mlagents_envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto
from mlagents_envs.communicator_objects.space_type_pb2 import discrete, continuous

from mlagents.trainers.tests.dummy_config import ppo_dummy_config, sac_dummy_config
from mlagents.trainers.tests.check_env_trains import (
    check_environment_trains,
    default_reward_processor,
)

BRAIN_NAME = "1D"

PPO_TORCH_CONFIG = attr.evolve(ppo_dummy_config(), framework=FrameworkType.PYTORCH)
SAC_TORCH_CONFIG = attr.evolve(sac_dummy_config(), framework=FrameworkType.PYTORCH)


def test_hybrid_ppo():
    env = SimpleEnvironment(
        [BRAIN_NAME], continuous_action_size=1, discrete_action_size=1
    )
    config = attr.evolve(PPO_TORCH_CONFIG)
    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)


def test_conthybrid_ppo():
    env = SimpleEnvironment(
        [BRAIN_NAME], continuous_action_size=1, discrete_action_size=0
    )
    config = attr.evolve(PPO_TORCH_CONFIG)
    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)


def test_dischybrid_ppo():
    env = SimpleEnvironment(
        [BRAIN_NAME], continuous_action_size=0, discrete_action_size=1
    )
    config = attr.evolve(PPO_TORCH_CONFIG)
    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)


@pytest.mark.parametrize("num_visual", [1, 2])
def test_visual_ppo(num_visual):
    env = SimpleEnvironment(
        [BRAIN_NAME],
        num_visual=num_visual,
        num_vector=0,
        continuous_action_size=1,
        discrete_action_size=1,
    )
    new_hyperparams = attr.evolve(
        PPO_TORCH_CONFIG.hyperparameters, learning_rate=3.0e-4
    )
    config = attr.evolve(PPO_TORCH_CONFIG, hyperparameters=new_hyperparams)
    check_environment_trains(env, {BRAIN_NAME: config})


def test_recurrent_ppo():
    env = MemoryEnvironment(
        [BRAIN_NAME], continuous_action_size=1, discrete_action_size=1
    )
    new_network_settings = attr.evolve(
        PPO_TORCH_CONFIG.network_settings,
        memory=NetworkSettings.MemorySettings(memory_size=16),
    )
    new_hyperparams = attr.evolve(
        PPO_TORCH_CONFIG.hyperparameters,
        learning_rate=1.0e-3,
        batch_size=64,
        buffer_size=128,
    )
    config = attr.evolve(
        PPO_TORCH_CONFIG,
        hyperparameters=new_hyperparams,
        network_settings=new_network_settings,
        max_steps=100000,
    )
    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)


# def test_3cdhybrid_ppo():
#    env = SimpleEnvironment(
#        [BRAIN_NAME], continuous_action_size=2, discrete_action_size=1, step_size=0.8
#    )
#    new_hyperparams = attr.evolve(
#        PPO_TORCH_CONFIG.hyperparameters, batch_size=128, buffer_size=1280, beta=0.01
#    )
#    config = attr.evolve(
#        PPO_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=10000
#    )
#    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)
#
#
# def test_3ddhybrid_ppo():
#    env = SimpleEnvironment(
#        [BRAIN_NAME], continuous_action_size=1, discrete_action_size=2, step_size=0.8
#    )
#    new_hyperparams = attr.evolve(
#        PPO_TORCH_CONFIG.hyperparameters, batch_size=128, buffer_size=1280, beta=0.01
#    )
#    config = attr.evolve(
#        PPO_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=10000
#    )
#    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)