import json import unittest.mock as mock import yaml import pytest import numpy as np from mlagents.trainers.trainer_controller import TrainerController from mlagents.trainers.buffer import Buffer from mlagents.trainers.ppo.trainer import PPOTrainer from mlagents.trainers.bc.trainer import BehavioralCloningTrainer from mlagents.trainers.curriculum import Curriculum from mlagents.trainers.exception import CurriculumError from mlagents.envs.exception import UnityEnvironmentException from tests.mock_communicator import MockCommunicator def assert_array(a, b): assert a.shape == b.shape la = list(a.flatten()) lb = list(b.flatten()) for i in range(len(la)): assert la[i] == lb[i] def test_buffer(): b = Buffer() for fake_agent_id in range(4): for step in range(9): b[fake_agent_id]['vector_observation'].append( [100 * fake_agent_id + 10 * step + 1, 100 * fake_agent_id + 10 * step + 2, 100 * fake_agent_id + 10 * step + 3] ) b[fake_agent_id]['action'].append([100 * fake_agent_id + 10 * step + 4, 100 * fake_agent_id + 10 * step + 5]) a = b[1]['vector_observation'].get_batch(batch_size=2, training_length=1, sequential=True) assert_array(a, np.array([[171, 172, 173], [181, 182, 183]])) a = b[2]['vector_observation'].get_batch(batch_size=2, training_length=3, sequential=True) assert_array(a, np.array([ [[231, 232, 233], [241, 242, 243], [251, 252, 253]], [[261, 262, 263], [271, 272, 273], [281, 282, 283]] ])) a = b[2]['vector_observation'].get_batch(batch_size=2, training_length=3, sequential=False) assert_array(a, np.array([ [[251, 252, 253], [261, 262, 263], [271, 272, 273]], [[261, 262, 263], [271, 272, 273], [281, 282, 283]] ])) b[4].reset_agent() assert len(b[4]) == 0 b.append_update_buffer(3, batch_size=None, training_length=2) b.append_update_buffer(2, batch_size=None, training_length=2) assert len(b.update_buffer['action']) == 10 assert np.array(b.update_buffer['action']).shape == (10, 2, 2)