浏览代码
Python Testing & Image Inference Improvements (#353)
Python Testing & Image Inference Improvements (#353)
* Reorganized python tests into separate folder, and make individiual test files for different (sub) modules. * Add tests for trainer_controller, PPO, and behavioral cloning. More to come soon. * Minor bug fixes discovered while writing tests. * Reworked GirdWorld to reset much faster. * Cleaned ObservationToTex and reworked GetObservationMatrixList to be 3x faster./develop-generalizationTraining-TrainerController
GitHub
7 年前
当前提交
e11dae1d
共有 25 个文件被更改,包括 888 次插入 和 595 次删除
-
11python/unityagents/curriculum.py
-
1python/unityagents/environment.py
-
2python/unitytrainers/bc/trainer.py
-
28python/unitytrainers/models.py
-
26python/unitytrainers/ppo/trainer.py
-
40python/unitytrainers/trainer_controller.py
-
152unity-environment/Assets/ML-Agents/Examples/GridWorld/GridWorld.unity
-
22unity-environment/Assets/ML-Agents/Examples/GridWorld/Resources/agent.prefab
-
80unity-environment/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAcademy.cs
-
59unity-environment/Assets/ML-Agents/Scripts/Brain.cs
-
7python/unitytrainers/__init__.py
-
2python/unitytrainers/bc/__init__.py
-
37python/unitytrainers/bc/models.py
-
2python/unitytrainers/ppo/__init__.py
-
2python/tests/__init__.py
-
103python/tests/test_bc.py
-
105python/tests/test_ppo.py
-
215python/tests/test_unityagents.py
-
205python/tests/test_unitytrainers.py
-
37python/unitytrainers/bc/bc_models.py
-
347python/test_unityagents.py
-
0/python/unitytrainers/bc/trainer.py
-
0/python/unitytrainers/ppo/models.py
-
0/python/unitytrainers/ppo/trainer.py
|
|||
from .buffer import * |
|||
from .models import * |
|||
from .trainer_controller import * |
|||
from .bc.models import * |
|||
from .bc.trainer import * |
|||
from .ppo.models import * |
|||
from .ppo.trainer import * |
|
|||
from .models import * |
|||
from .trainer import * |
|
|||
import tensorflow as tf |
|||
import tensorflow.contrib.layers as c_layers |
|||
from unitytrainers.models import LearningModel |
|||
|
|||
|
|||
class BehavioralCloningModel(LearningModel): |
|||
def __init__(self, brain, h_size=128, lr=1e-4, n_layers=2, m_size=128, |
|||
normalize=False, use_recurrent=False): |
|||
LearningModel.__init__(self, m_size, normalize, use_recurrent, brain) |
|||
|
|||
num_streams = 1 |
|||
hidden_streams = self.create_new_obs(num_streams, h_size, n_layers) |
|||
hidden = hidden_streams[0] |
|||
self.dropout_rate = tf.placeholder(dtype=tf.float32, shape=[], name="dropout_rate") |
|||
hidden_reg = tf.layers.dropout(hidden, self.dropout_rate) |
|||
if self.use_recurrent: |
|||
self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32, name='recurrent_in') |
|||
hidden_reg, self.memory_out = self.create_recurrent_encoder(hidden_reg, self.memory_in) |
|||
self.memory_out = tf.identity(self.memory_out, name='recurrent_out') |
|||
self.policy = tf.layers.dense(hidden_reg, self.a_size, activation=None, use_bias=False, |
|||
kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01)) |
|||
|
|||
if brain.action_space_type == "discrete": |
|||
self.action_probs = tf.nn.softmax(self.policy) |
|||
self.sample_action = tf.cast(tf.multinomial(self.policy, 1, name="action"), tf.int32) |
|||
self.true_action = tf.placeholder(shape=[None], dtype=tf.int32, name="expert_action") |
|||
self.action_oh = tf.one_hot(self.true_action, self.a_size) |
|||
self.loss = tf.reduce_sum(-tf.log(self.action_probs + 1e-10) * self.action_oh) |
|||
self.action_percent = tf.reduce_mean(tf.cast( |
|||
tf.equal(tf.cast(tf.argmax(self.action_probs, axis=1), tf.int32), self.sample_action), tf.float32)) |
|||
else: |
|||
self.sample_action = tf.identity(self.policy, name="action") |
|||
self.true_action = tf.placeholder(shape=[None, self.a_size], dtype=tf.float32, name="expert_action") |
|||
self.loss = tf.reduce_sum(tf.squared_difference(self.true_action, self.sample_action)) |
|||
|
|||
optimizer = tf.train.AdamOptimizer(learning_rate=lr) |
|||
self.update = optimizer.minimize(self.loss) |
|
|||
from .models import * |
|||
from .trainer import * |
|
|||
from unityagents import * |
|||
from unitytrainers import * |
|
|||
import mock |
|||
import pytest |
|||
|
|||
import numpy as np |
|||
import tensorflow as tf |
|||
|
|||
from unitytrainers.bc.models import BehavioralCloningModel |
|||
from unityagents import UnityEnvironment |
|||
|
|||
|
|||
def test_cc_bc_model(): |
|||
c_action_c_state_start = '''{ |
|||
"AcademyName": "RealFakeAcademy", |
|||
"resetParameters": {}, |
|||
"brainNames": ["RealFakeBrain"], |
|||
"externalBrainNames": ["RealFakeBrain"], |
|||
"logPath":"RealFakePath", |
|||
"apiNumber":"API-2", |
|||
"brainParameters": [{ |
|||
"stateSize": 3, |
|||
"stackedStates": 2, |
|||
"actionSize": 2, |
|||
"memorySize": 0, |
|||
"cameraResolutions": [], |
|||
"actionDescriptions": ["",""], |
|||
"actionSpaceType": 1, |
|||
"stateSpaceType": 1 |
|||
}] |
|||
}'''.encode() |
|||
|
|||
tf.reset_default_graph() |
|||
with mock.patch('subprocess.Popen'): |
|||
with mock.patch('socket.socket') as mock_socket: |
|||
with mock.patch('glob.glob') as mock_glob: |
|||
# End of mock |
|||
with tf.Session() as sess: |
|||
with tf.variable_scope("FakeGraphScope"): |
|||
mock_glob.return_value = ['FakeLaunchPath'] |
|||
mock_socket.return_value.accept.return_value = (mock_socket, 0) |
|||
mock_socket.recv.return_value.decode.return_value = c_action_c_state_start |
|||
env = UnityEnvironment(' ') |
|||
|
|||
model = BehavioralCloningModel(env.brains["RealFakeBrain"]) |
|||
init = tf.global_variables_initializer() |
|||
sess.run(init) |
|||
|
|||
run_list = [model.sample_action, model.policy] |
|||
feed_dict = {model.batch_size: 2, |
|||
model.sequence_length: 1, |
|||
model.state_in: np.array([[1, 2, 3, 1, 2, 3], |
|||
[3, 4, 5, 3, 4, 5]])} |
|||
sess.run(run_list, feed_dict=feed_dict) |
|||
env.close() |
|||
|
|||
|
|||
def test_dc_bc_model(): |
|||
d_action_c_state_start = '''{ |
|||
"AcademyName": "RealFakeAcademy", |
|||
"resetParameters": {}, |
|||
"brainNames": ["RealFakeBrain"], |
|||
"externalBrainNames": ["RealFakeBrain"], |
|||
"logPath":"RealFakePath", |
|||
"apiNumber":"API-2", |
|||
"brainParameters": [{ |
|||
"stateSize": 3, |
|||
"stackedStates": 2, |
|||
"actionSize": 2, |
|||
"memorySize": 0, |
|||
"cameraResolutions": [{"width":30,"height":40,"blackAndWhite":false}], |
|||
"actionDescriptions": ["",""], |
|||
"actionSpaceType": 0, |
|||
"stateSpaceType": 1 |
|||
}] |
|||
}'''.encode() |
|||
|
|||
tf.reset_default_graph() |
|||
with mock.patch('subprocess.Popen'): |
|||
with mock.patch('socket.socket') as mock_socket: |
|||
with mock.patch('glob.glob') as mock_glob: |
|||
with tf.Session() as sess: |
|||
with tf.variable_scope("FakeGraphScope"): |
|||
mock_glob.return_value = ['FakeLaunchPath'] |
|||
mock_socket.return_value.accept.return_value = (mock_socket, 0) |
|||
mock_socket.recv.return_value.decode.return_value = d_action_c_state_start |
|||
env = UnityEnvironment(' ') |
|||
|
|||
model = BehavioralCloningModel(env.brains["RealFakeBrain"]) |
|||
init = tf.global_variables_initializer() |
|||
sess.run(init) |
|||
|
|||
run_list = [model.sample_action, model.policy] |
|||
feed_dict = {model.batch_size: 2, |
|||
model.dropout_rate: 1.0, |
|||
model.sequence_length: 1, |
|||
model.state_in: np.array([[1, 2, 3, 1, 2, 3], |
|||
[3, 4, 5, 3, 4, 5]]), |
|||
model.observation_in[0]: np.ones([2, 40, 30, 3])} |
|||
sess.run(run_list, feed_dict=feed_dict) |
|||
env.close() |
|||
|
|||
|
|||
if __name__ == '__main__': |
|||
pytest.main() |
|
|||
import mock |
|||
import pytest |
|||
|
|||
import numpy as np |
|||
import tensorflow as tf |
|||
|
|||
from unitytrainers.ppo.models import PPOModel |
|||
from unityagents import UnityEnvironment |
|||
|
|||
|
|||
def test_ppo_model_continuous(): |
|||
c_action_c_state_start = '''{ |
|||
"AcademyName": "RealFakeAcademy", |
|||
"resetParameters": {}, |
|||
"brainNames": ["RealFakeBrain"], |
|||
"externalBrainNames": ["RealFakeBrain"], |
|||
"logPath":"RealFakePath", |
|||
"apiNumber":"API-2", |
|||
"brainParameters": [{ |
|||
"stateSize": 3, |
|||
"stackedStates": 2, |
|||
"actionSize": 2, |
|||
"memorySize": 0, |
|||
"cameraResolutions": [], |
|||
"actionDescriptions": ["",""], |
|||
"actionSpaceType": 1, |
|||
"stateSpaceType": 1 |
|||
}] |
|||
}'''.encode() |
|||
|
|||
tf.reset_default_graph() |
|||
with mock.patch('subprocess.Popen'): |
|||
with mock.patch('socket.socket') as mock_socket: |
|||
with mock.patch('glob.glob') as mock_glob: |
|||
# End of mock |
|||
with tf.Session() as sess: |
|||
with tf.variable_scope("FakeGraphScope"): |
|||
mock_glob.return_value = ['FakeLaunchPath'] |
|||
mock_socket.return_value.accept.return_value = (mock_socket, 0) |
|||
mock_socket.recv.return_value.decode.return_value = c_action_c_state_start |
|||
env = UnityEnvironment(' ') |
|||
|
|||
model = PPOModel(env.brains["RealFakeBrain"]) |
|||
init = tf.global_variables_initializer() |
|||
sess.run(init) |
|||
|
|||
run_list = [model.output, model.probs, model.value, model.entropy, |
|||
model.learning_rate] |
|||
feed_dict = {model.batch_size: 2, |
|||
model.sequence_length: 1, |
|||
model.state_in: np.array([[1, 2, 3, 1, 2, 3], |
|||
[3, 4, 5, 3, 4, 5]])} |
|||
sess.run(run_list, feed_dict=feed_dict) |
|||
env.close() |
|||
|
|||
|
|||
def test_ppo_model_discrete(): |
|||
d_action_c_state_start = '''{ |
|||
"AcademyName": "RealFakeAcademy", |
|||
"resetParameters": {}, |
|||
"brainNames": ["RealFakeBrain"], |
|||
"externalBrainNames": ["RealFakeBrain"], |
|||
"logPath":"RealFakePath", |
|||
"apiNumber":"API-2", |
|||
"brainParameters": [{ |
|||
"stateSize": 3, |
|||
"stackedStates": 2, |
|||
"actionSize": 2, |
|||
"memorySize": 0, |
|||
"cameraResolutions": [{"width":30,"height":40,"blackAndWhite":false}], |
|||
"actionDescriptions": ["",""], |
|||
"actionSpaceType": 0, |
|||
"stateSpaceType": 1 |
|||
}] |
|||
}'''.encode() |
|||
|
|||
tf.reset_default_graph() |
|||
with mock.patch('subprocess.Popen'): |
|||
with mock.patch('socket.socket') as mock_socket: |
|||
with mock.patch('glob.glob') as mock_glob: |
|||
# End of mock |
|||
with tf.Session() as sess: |
|||
with tf.variable_scope("FakeGraphScope"): |
|||
mock_glob.return_value = ['FakeLaunchPath'] |
|||
mock_socket.return_value.accept.return_value = (mock_socket, 0) |
|||
mock_socket.recv.return_value.decode.return_value = d_action_c_state_start |
|||
env = UnityEnvironment(' ') |
|||
model = PPOModel(env.brains["RealFakeBrain"]) |
|||
init = tf.global_variables_initializer() |
|||
sess.run(init) |
|||
|
|||
run_list = [model.output, model.all_probs, model.value, model.entropy, |
|||
model.learning_rate] |
|||
feed_dict = {model.batch_size: 2, |
|||
model.sequence_length: 1, |
|||
model.state_in: np.array([[1, 2, 3, 1, 2, 3], |
|||
[3, 4, 5, 3, 4, 5]]), |
|||
model.observation_in[0]: np.ones([2, 40, 30, 3]) |
|||
} |
|||
sess.run(run_list, feed_dict=feed_dict) |
|||
env.close() |
|||
|
|||
|
|||
if __name__ == '__main__': |
|||
pytest.main() |
|
|||
import json |
|||
import mock |
|||
import pytest |
|||
import struct |
|||
|
|||
import numpy as np |
|||
|
|||
from unityagents import UnityEnvironment, UnityEnvironmentException, UnityActionException, \ |
|||
BrainInfo, Curriculum |
|||
|
|||
|
|||
def append_length(partial_string): |
|||
return struct.pack("I", len(partial_string.encode())) + partial_string.encode() |
|||
|
|||
|
|||
dummy_start = '''{ |
|||
"AcademyName": "RealFakeAcademy", |
|||
"resetParameters": {}, |
|||
"brainNames": ["RealFakeBrain"], |
|||
"externalBrainNames": ["RealFakeBrain"], |
|||
"logPath":"RealFakePath", |
|||
"apiNumber":"API-2", |
|||
"brainParameters": [{ |
|||
"stateSize": 3, |
|||
"stackedStates" : 2, |
|||
"actionSize": 2, |
|||
"memorySize": 0, |
|||
"cameraResolutions": [], |
|||
"actionDescriptions": ["",""], |
|||
"actionSpaceType": 1, |
|||
"stateSpaceType": 1 |
|||
}] |
|||
}'''.encode() |
|||
|
|||
dummy_reset = [ |
|||
'CONFIG_REQUEST'.encode(), |
|||
append_length( |
|||
''' |
|||
{ |
|||
"brain_name": "RealFakeBrain", |
|||
"agents": [1,2], |
|||
"states": [1,2,3,4,5,6,1,2,3,4,5,6], |
|||
"rewards": [1,2], |
|||
"actions": [1,2,3,4], |
|||
"memories": [], |
|||
"dones": [false, false], |
|||
"maxes": [false, false] |
|||
}'''), |
|||
'False'.encode()] |
|||
|
|||
dummy_step = ['actions'.encode(), |
|||
append_length(''' |
|||
{ |
|||
"brain_name": "RealFakeBrain", |
|||
"agents": [1,2,3], |
|||
"states": [1,2,3,4,5,6,7,8,9,1,2,3,4,5,6,7,8,9], |
|||
"rewards": [1,2,3], |
|||
"actions": [1,2,3,4,5,6], |
|||
"memories": [], |
|||
"dones": [false, false, false], |
|||
"maxes": [false, false, false] |
|||
}'''), |
|||
'False'.encode(), |
|||
'actions'.encode(), |
|||
append_length(''' |
|||
{ |
|||
"brain_name": "RealFakeBrain", |
|||
"agents": [1,2,3], |
|||
"states": [1,2,3,4,5,6,7,8,9,1,2,3,4,5,6,7,8,9], |
|||
"rewards": [1,2,3], |
|||
"actions": [1,2,3,4,5,6], |
|||
"memories": [], |
|||
"dones": [false, false, true], |
|||
"maxes": [false, false, false] |
|||
}'''), |
|||
'True'.encode()] |
|||
|
|||
dummy_curriculum = json.loads('''{ |
|||
"measure" : "reward", |
|||
"thresholds" : [10, 20, 50], |
|||
"min_lesson_length" : 3, |
|||
"signal_smoothing" : true, |
|||
"parameters" : |
|||
{ |
|||
"param1" : [0.7, 0.5, 0.3, 0.1], |
|||
"param2" : [100, 50, 20, 15], |
|||
"param3" : [0.2, 0.3, 0.7, 0.9] |
|||
} |
|||
}''') |
|||
bad_curriculum = json.loads('''{ |
|||
"measure" : "reward", |
|||
"thresholds" : [10, 20, 50], |
|||
"min_lesson_length" : 3, |
|||
"signal_smoothing" : false, |
|||
"parameters" : |
|||
{ |
|||
"param1" : [0.7, 0.5, 0.3, 0.1], |
|||
"param2" : [100, 50, 20], |
|||
"param3" : [0.2, 0.3, 0.7, 0.9] |
|||
} |
|||
}''') |
|||
|
|||
|
|||
def test_handles_bad_filename(): |
|||
with pytest.raises(UnityEnvironmentException): |
|||
UnityEnvironment(' ') |
|||
|
|||
|
|||
def test_initialization(): |
|||
with mock.patch('subprocess.Popen'): |
|||
with mock.patch('socket.socket') as mock_socket: |
|||
with mock.patch('glob.glob') as mock_glob: |
|||
mock_glob.return_value = ['FakeLaunchPath'] |
|||
mock_socket.return_value.accept.return_value = (mock_socket, 0) |
|||
mock_socket.recv.return_value.decode.return_value = dummy_start |
|||
env = UnityEnvironment(' ') |
|||
with pytest.raises(UnityActionException): |
|||
env.step([0]) |
|||
assert env.brain_names[0] == 'RealFakeBrain' |
|||
env.close() |
|||
|
|||
|
|||
def test_reset(): |
|||
with mock.patch('subprocess.Popen'): |
|||
with mock.patch('socket.socket') as mock_socket: |
|||
with mock.patch('glob.glob') as mock_glob: |
|||
mock_glob.return_value = ['FakeLaunchPath'] |
|||
mock_socket.return_value.accept.return_value = (mock_socket, 0) |
|||
mock_socket.recv.return_value.decode.return_value = dummy_start |
|||
env = UnityEnvironment(' ') |
|||
brain = env.brains['RealFakeBrain'] |
|||
mock_socket.recv.side_effect = dummy_reset |
|||
brain_info = env.reset() |
|||
env.close() |
|||
assert not env.global_done |
|||
assert isinstance(brain_info, dict) |
|||
assert isinstance(brain_info['RealFakeBrain'], BrainInfo) |
|||
assert isinstance(brain_info['RealFakeBrain'].observations, list) |
|||
assert isinstance(brain_info['RealFakeBrain'].states, np.ndarray) |
|||
assert len(brain_info['RealFakeBrain'].observations) == brain.number_observations |
|||
assert brain_info['RealFakeBrain'].states.shape[0] == len(brain_info['RealFakeBrain'].agents) |
|||
assert brain_info['RealFakeBrain'].states.shape[1] == brain.state_space_size * brain.stacked_states |
|||
|
|||
|
|||
def test_step(): |
|||
with mock.patch('subprocess.Popen'): |
|||
with mock.patch('socket.socket') as mock_socket: |
|||
with mock.patch('glob.glob') as mock_glob: |
|||
mock_glob.return_value = ['FakeLaunchPath'] |
|||
mock_socket.return_value.accept.return_value = (mock_socket, 0) |
|||
mock_socket.recv.return_value.decode.return_value = dummy_start |
|||
env = UnityEnvironment(' ') |
|||
brain = env.brains['RealFakeBrain'] |
|||
mock_socket.recv.side_effect = dummy_reset |
|||
brain_info = env.reset() |
|||
mock_socket.recv.side_effect = dummy_step |
|||
brain_info = env.step([0] * brain.action_space_size * len(brain_info['RealFakeBrain'].agents)) |
|||
with pytest.raises(UnityActionException): |
|||
env.step([0]) |
|||
brain_info = env.step([0] * brain.action_space_size * len(brain_info['RealFakeBrain'].agents)) |
|||
with pytest.raises(UnityActionException): |
|||
env.step([0] * brain.action_space_size * len(brain_info['RealFakeBrain'].agents)) |
|||
env.close() |
|||
assert env.global_done |
|||
assert isinstance(brain_info, dict) |
|||
assert isinstance(brain_info['RealFakeBrain'], BrainInfo) |
|||
assert isinstance(brain_info['RealFakeBrain'].observations, list) |
|||
assert isinstance(brain_info['RealFakeBrain'].states, np.ndarray) |
|||
assert len(brain_info['RealFakeBrain'].observations) == brain.number_observations |
|||
assert brain_info['RealFakeBrain'].states.shape[0] == len(brain_info['RealFakeBrain'].agents) |
|||
assert brain_info['RealFakeBrain'].states.shape[1] == brain.state_space_size * brain.stacked_states |
|||
assert not brain_info['RealFakeBrain'].local_done[0] |
|||
assert brain_info['RealFakeBrain'].local_done[2] |
|||
|
|||
|
|||
def test_close(): |
|||
with mock.patch('subprocess.Popen'): |
|||
with mock.patch('socket.socket') as mock_socket: |
|||
with mock.patch('glob.glob') as mock_glob: |
|||
mock_glob.return_value = ['FakeLaunchPath'] |
|||
mock_socket.return_value.accept.return_value = (mock_socket, 0) |
|||
mock_socket.recv.return_value.decode.return_value = dummy_start |
|||
env = UnityEnvironment(' ') |
|||
assert env._loaded |
|||
env.close() |
|||
assert not env._loaded |
|||
mock_socket.close.assert_called_once() |
|||
|
|||
|
|||
def test_curriculum(): |
|||
open_name = '%s.open' % __name__ |
|||
with mock.patch('json.load') as mock_load: |
|||
with mock.patch(open_name, create=True) as mock_open: |
|||
mock_open.return_value = 0 |
|||
mock_load.return_value = bad_curriculum |
|||
with pytest.raises(UnityEnvironmentException): |
|||
Curriculum('tests/test_unityagents.py', {"param1": 1, "param2": 1, "param3": 1}) |
|||
mock_load.return_value = dummy_curriculum |
|||
with pytest.raises(UnityEnvironmentException): |
|||
Curriculum('tests/test_unityagents.py', {"param1": 1, "param2": 1}) |
|||
curriculum = Curriculum('tests/test_unityagents.py', {"param1": 1, "param2": 1, "param3": 1}) |
|||
assert curriculum.get_lesson_number == 0 |
|||
curriculum.set_lesson_number(1) |
|||
assert curriculum.get_lesson_number == 1 |
|||
curriculum.increment_lesson(10) |
|||
assert curriculum.get_lesson_number == 1 |
|||
curriculum.increment_lesson(30) |
|||
curriculum.increment_lesson(30) |
|||
assert curriculum.get_lesson_number == 1 |
|||
assert curriculum.lesson_length == 3 |
|||
curriculum.increment_lesson(30) |
|||
assert curriculum.get_config() == {'param1': 0.3, 'param2': 20, 'param3': 0.7} |
|||
assert curriculum.get_config(0) == {"param1": 0.7, "param2": 100, "param3": 0.2} |
|||
assert curriculum.lesson_length == 0 |
|||
assert curriculum.get_lesson_number == 2 |
|
|||
import yaml |
|||
import mock |
|||
import pytest |
|||
|
|||
from unitytrainers.trainer_controller import TrainerController |
|||
from unitytrainers.buffer import Buffer |
|||
from unitytrainers.models import * |
|||
from unitytrainers.ppo.trainer import PPOTrainer |
|||
from unitytrainers.bc.trainer import BehavioralCloningTrainer |
|||
from unityagents import UnityEnvironmentException |
|||
|
|||
dummy_start = '''{ |
|||
"AcademyName": "RealFakeAcademy", |
|||
"resetParameters": {}, |
|||
"brainNames": ["RealFakeBrain"], |
|||
"externalBrainNames": ["RealFakeBrain"], |
|||
"logPath":"RealFakePath", |
|||
"apiNumber":"API-2", |
|||
"brainParameters": [{ |
|||
"stateSize": 3, |
|||
"stackedStates" : 2, |
|||
"actionSize": 2, |
|||
"memorySize": 0, |
|||
"cameraResolutions": [], |
|||
"actionDescriptions": ["",""], |
|||
"actionSpaceType": 1, |
|||
"stateSpaceType": 1 |
|||
}] |
|||
}'''.encode() |
|||
|
|||
|
|||
dummy_config = yaml.load(''' |
|||
default: |
|||
trainer: ppo |
|||
batch_size: 32 |
|||
beta: 5.0e-3 |
|||
buffer_size: 512 |
|||
epsilon: 0.2 |
|||
gamma: 0.99 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 3.0e-4 |
|||
max_steps: 5.0e4 |
|||
normalize: true |
|||
num_epoch: 5 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 1000 |
|||
use_recurrent: false |
|||
''') |
|||
|
|||
dummy_bc_config = yaml.load(''' |
|||
default: |
|||
trainer: imitation |
|||
brain_to_imitate: ExpertBrain |
|||
batches_per_epoch: 16 |
|||
batch_size: 32 |
|||
beta: 5.0e-3 |
|||
buffer_size: 512 |
|||
epsilon: 0.2 |
|||
gamma: 0.99 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 3.0e-4 |
|||
max_steps: 5.0e4 |
|||
normalize: true |
|||
num_epoch: 5 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 1000 |
|||
use_recurrent: false |
|||
''') |
|||
|
|||
dummy_bad_config = yaml.load(''' |
|||
default: |
|||
trainer: incorrect_trainer |
|||
brain_to_imitate: ExpertBrain |
|||
batches_per_epoch: 16 |
|||
batch_size: 32 |
|||
beta: 5.0e-3 |
|||
buffer_size: 512 |
|||
epsilon: 0.2 |
|||
gamma: 0.99 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 3.0e-4 |
|||
max_steps: 5.0e4 |
|||
normalize: true |
|||
num_epoch: 5 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 1000 |
|||
use_recurrent: false |
|||
''') |
|||
|
|||
|
|||
def test_initialization(): |
|||
with mock.patch('subprocess.Popen'): |
|||
with mock.patch('socket.socket') as mock_socket: |
|||
with mock.patch('glob.glob') as mock_glob: |
|||
mock_glob.return_value = ['FakeLaunchPath'] |
|||
mock_socket.return_value.accept.return_value = (mock_socket, 0) |
|||
mock_socket.recv.return_value.decode.return_value = dummy_start |
|||
tc = TrainerController(' ', ' ', 1, None, True, True, False, 1, |
|||
1, 1, 1) |
|||
assert(tc.env.brain_names[0] == 'RealFakeBrain') |
|||
|
|||
|
|||
def test_load_config(): |
|||
open_name = '%s.open' % __name__ |
|||
with mock.patch('yaml.load') as mock_load: |
|||
with mock.patch(open_name, create=True) as mock_open: |
|||
mock_open.return_value = 0 |
|||
mock_load.return_value = dummy_config |
|||
config = TrainerController._load_config("tests/test_unitytrainers.py") |
|||
assert(len(config) == 1) |
|||
assert(config['default']['trainer'] == "ppo") |
|||
|
|||
|
|||
def test_initialize_trainers(): |
|||
open_name = '%s.open' % __name__ |
|||
with mock.patch('yaml.load') as mock_load: |
|||
with mock.patch(open_name, create=True) as mock_open: |
|||
mock_open.return_value = 0 |
|||
with mock.patch('subprocess.Popen'): |
|||
with mock.patch('socket.socket') as mock_socket: |
|||
with mock.patch('glob.glob') as mock_glob: |
|||
mock_glob.return_value = ['FakeLaunchPath'] |
|||
mock_socket.return_value.accept.return_value = (mock_socket, 0) |
|||
mock_socket.recv.return_value.decode.return_value = dummy_start |
|||
tc = TrainerController(' ', ' ', 1, None, True, True, False, 1, |
|||
1, 1, 1) |
|||
|
|||
# Test for PPO trainer |
|||
mock_load.return_value = dummy_config |
|||
config = tc._load_config("tests/test_unitytrainers.py") |
|||
tf.reset_default_graph() |
|||
with tf.Session() as sess: |
|||
tc._initialize_trainers(config, sess) |
|||
assert(len(tc.trainers) == 1) |
|||
assert(isinstance(tc.trainers['RealFakeBrain'], PPOTrainer)) |
|||
|
|||
# Test for Behavior Cloning Trainer |
|||
mock_load.return_value = dummy_bc_config |
|||
config = tc._load_config("tests/test_unitytrainers.py") |
|||
tf.reset_default_graph() |
|||
with tf.Session() as sess: |
|||
tc._initialize_trainers(config, sess) |
|||
assert(isinstance(tc.trainers['RealFakeBrain'], BehavioralCloningTrainer)) |
|||
|
|||
# Test for proper exception when trainer name is incorrect |
|||
mock_load.return_value = dummy_bad_config |
|||
config = tc._load_config("tests/test_unitytrainers.py") |
|||
tf.reset_default_graph() |
|||
with tf.Session() as sess: |
|||
with pytest.raises(UnityEnvironmentException): |
|||
tc._initialize_trainers(config, sess) |
|||
|
|||
|
|||
def assert_array(a, b): |
|||
assert a.shape == b.shape |
|||
la = list(a.flatten()) |
|||
lb = list(b.flatten()) |
|||
for i in range(len(la)): |
|||
assert la[i] == lb[i] |
|||
|
|||
|
|||
def test_buffer(): |
|||
b = Buffer() |
|||
for fake_agent_id in range(4): |
|||
for step in range(9): |
|||
b[fake_agent_id]['state'].append( |
|||
[100 * fake_agent_id + 10 * step + 1, |
|||
100 * fake_agent_id + 10 * step + 2, |
|||
100 * fake_agent_id + 10 * step + 3] |
|||
) |
|||
b[fake_agent_id]['action'].append([100 * fake_agent_id + 10 * step + 4, |
|||
100 * fake_agent_id + 10 * step + 5]) |
|||
a = b[1]['state'].get_batch(batch_size=2, training_length=None, sequential=True) |
|||
assert_array(a, np.array([[171, 172, 173], [181, 182, 183]])) |
|||
a = b[2]['state'].get_batch(batch_size=2, training_length=3, sequential=True) |
|||
assert_array(a, np.array([ |
|||
[[231, 232, 233], [241, 242, 243], [251, 252, 253]], |
|||
[[261, 262, 263], [271, 272, 273], [281, 282, 283]] |
|||
])) |
|||
a = b[2]['state'].get_batch(batch_size=2, training_length=3, sequential=False) |
|||
assert_array(a, np.array([ |
|||
[[251, 252, 253], [261, 262, 263], [271, 272, 273]], |
|||
[[261, 262, 263], [271, 272, 273], [281, 282, 283]] |
|||
])) |
|||
b[4].reset_agent() |
|||
assert len(b[4]) == 0 |
|||
b.append_update_buffer(3, |
|||
batch_size=None, training_length=2) |
|||
b.append_update_buffer(2, |
|||
batch_size=None, training_length=2) |
|||
assert len(b.update_buffer['action']) == 10 |
|||
assert np.array(b.update_buffer['action']).shape == (10, 2, 2) |
|||
|
|||
|
|||
if __name__ == '__main__': |
|||
pytest.main() |
|
|||
import tensorflow as tf |
|||
import tensorflow.contrib.layers as c_layers |
|||
from unitytrainers.models import LearningModel |
|||
|
|||
|
|||
class BehavioralCloningModel(LearningModel): |
|||
def __init__(self, h_size, lr, n_layers, m_size, normalize, use_recurrent, brain): |
|||
LearningModel.__init__(self, m_size, normalize, use_recurrent, brain) |
|||
|
|||
num_streams = 1 |
|||
hidden_streams = self.create_new_obs(num_streams, h_size, n_layers) |
|||
hidden = hidden_streams[0] |
|||
self.dropout_rate = tf.placeholder(dtype=tf.float32, shape=[], name="dropout_rate") |
|||
hidden_reg = tf.layers.dropout(hidden, self.dropout_rate) |
|||
if self.use_recurrent: |
|||
self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32, name='recurrent_in') |
|||
hidden_reg, self.memory_out = self.create_recurrent_encoder(hidden_reg, self.memory_in) |
|||
self.memory_out = tf.identity(self.memory_out, name='recurrent_out') |
|||
self.policy = tf.layers.dense(hidden_reg, self.a_size, activation=None, use_bias=False, |
|||
kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01)) |
|||
|
|||
if brain.action_space_type == "discrete": |
|||
self.action_probs = tf.nn.softmax(self.policy) |
|||
self.sample_action = tf.multinomial(self.policy, 1, name="action") |
|||
self.true_action = tf.placeholder(shape=[None], dtype=tf.int32) |
|||
self.action_oh = tf.one_hot(self.true_action, self.a_size) |
|||
self.loss = tf.reduce_sum(-tf.log(self.action_probs + 1e-10) * self.action_oh) |
|||
|
|||
self.action_percent = tf.reduce_mean(tf.cast( |
|||
tf.equal(tf.cast(tf.argmax(self.action_probs, axis=1), tf.int32), self.sample_action), tf.float32)) |
|||
else: |
|||
self.sample_action = tf.identity(self.policy, name="action") |
|||
self.true_action = tf.placeholder(shape=[None, self.a_size], dtype=tf.float32) |
|||
self.loss = tf.reduce_sum(tf.squared_difference(self.true_action, self.sample_action)) |
|||
|
|||
optimizer = tf.train.AdamOptimizer(learning_rate=lr) |
|||
self.update = optimizer.minimize(self.loss) |
|
|||
import json |
|||
import mock |
|||
import pytest |
|||
import struct |
|||
|
|||
from unitytrainers.buffer import Buffer |
|||
from unitytrainers.models import * |
|||
from unityagents import UnityEnvironment, UnityEnvironmentException, UnityActionException, \ |
|||
BrainInfo, Curriculum |
|||
|
|||
|
|||
def append_length(input): |
|||
return struct.pack("I", len(input.encode())) + input.encode() |
|||
|
|||
|
|||
dummy_start = '''{ |
|||
"AcademyName": "RealFakeAcademy", |
|||
"resetParameters": {}, |
|||
"brainNames": ["RealFakeBrain"], |
|||
"externalBrainNames": ["RealFakeBrain"], |
|||
"logPath":"RealFakePath", |
|||
"apiNumber":"API-2", |
|||
"brainParameters": [{ |
|||
"stateSize": 3, |
|||
"actionSize": 2, |
|||
"memorySize": 0, |
|||
"cameraResolutions": [], |
|||
"actionDescriptions": ["",""], |
|||
"actionSpaceType": 1, |
|||
"stateSpaceType": 1 |
|||
}] |
|||
}'''.encode() |
|||
|
|||
dummy_reset = [ |
|||
'CONFIG_REQUEST'.encode(), |
|||
append_length( |
|||
''' |
|||
{ |
|||
"brain_name": "RealFakeBrain", |
|||
"agents": [1,2], |
|||
"states": [1,2,3,4,5,6], |
|||
"rewards": [1,2], |
|||
"actions": [1,2,3,4], |
|||
"memories": [], |
|||
"dones": [false, false] |
|||
}'''), |
|||
'False'.encode()] |
|||
|
|||
dummy_step = ['actions'.encode(), |
|||
append_length(''' |
|||
{ |
|||
"brain_name": "RealFakeBrain", |
|||
"agents": [1,2,3], |
|||
"states": [1,2,3,4,5,6,7,8,9], |
|||
"rewards": [1,2,3], |
|||
"actions": [1,2,3,4,5,6], |
|||
"memories": [], |
|||
"dones": [false, false, false] |
|||
}'''), |
|||
'False'.encode(), |
|||
'actions'.encode(), |
|||
append_length(''' |
|||
{ |
|||
"brain_name": "RealFakeBrain", |
|||
"agents": [1,2,3], |
|||
"states": [1,2,3,4,5,6,7,8,9], |
|||
"rewards": [1,2,3], |
|||
"actions": [1,2,3,4,5,6], |
|||
"memories": [], |
|||
"dones": [false, false, true] |
|||
}'''), |
|||
'True'.encode()] |
|||
|
|||
|
|||
def test_handles_bad_filename(): |
|||
with pytest.raises(UnityEnvironmentException): |
|||
UnityEnvironment(' ') |
|||
|
|||
|
|||
def test_initialization(): |
|||
with mock.patch('subprocess.Popen') as mock_subproc_popen: |
|||
with mock.patch('socket.socket') as mock_socket: |
|||
with mock.patch('glob.glob') as mock_glob: |
|||
mock_glob.return_value = ['FakeLaunchPath'] |
|||
mock_socket.return_value.accept.return_value = (mock_socket, 0) |
|||
mock_socket.recv.return_value.decode.return_value = dummy_start |
|||
env = UnityEnvironment(' ') |
|||
with pytest.raises(UnityActionException): |
|||
env.step([0]) |
|||
assert env.brain_names[0] == 'RealFakeBrain' |
|||
env.close() |
|||
|
|||
|
|||
def test_reset(): |
|||
with mock.patch('subprocess.Popen') as mock_subproc_popen: |
|||
with mock.patch('socket.socket') as mock_socket: |
|||
with mock.patch('glob.glob') as mock_glob: |
|||
mock_glob.return_value = ['FakeLaunchPath'] |
|||
mock_socket.return_value.accept.return_value = (mock_socket, 0) |
|||
mock_socket.recv.return_value.decode.return_value = dummy_start |
|||
env = UnityEnvironment(' ') |
|||
brain = env.brains['RealFakeBrain'] |
|||
mock_socket.recv.side_effect = dummy_reset |
|||
brain_info = env.reset() |
|||
env.close() |
|||
assert not env.global_done |
|||
assert isinstance(brain_info, dict) |
|||
assert isinstance(brain_info['RealFakeBrain'], BrainInfo) |
|||
assert isinstance(brain_info['RealFakeBrain'].observations, list) |
|||
assert isinstance(brain_info['RealFakeBrain'].states, np.ndarray) |
|||
assert len(brain_info['RealFakeBrain'].observations) == brain.number_observations |
|||
assert brain_info['RealFakeBrain'].states.shape[0] == len(brain_info['RealFakeBrain'].agents) |
|||
assert brain_info['RealFakeBrain'].states.shape[1] == brain.state_space_size |
|||
|
|||
|
|||
def test_step(): |
|||
with mock.patch('subprocess.Popen') as mock_subproc_popen: |
|||
with mock.patch('socket.socket') as mock_socket: |
|||
with mock.patch('glob.glob') as mock_glob: |
|||
mock_glob.return_value = ['FakeLaunchPath'] |
|||
mock_socket.return_value.accept.return_value = (mock_socket, 0) |
|||
mock_socket.recv.return_value.decode.return_value = dummy_start |
|||
env = UnityEnvironment(' ') |
|||
brain = env.brains['RealFakeBrain'] |
|||
mock_socket.recv.side_effect = dummy_reset |
|||
brain_info = env.reset() |
|||
mock_socket.recv.side_effect = dummy_step |
|||
brain_info = env.step([0] * brain.action_space_size * len(brain_info['RealFakeBrain'].agents)) |
|||
with pytest.raises(UnityActionException): |
|||
env.step([0]) |
|||
brain_info = env.step([0] * brain.action_space_size * len(brain_info['RealFakeBrain'].agents)) |
|||
with pytest.raises(UnityActionException): |
|||
env.step([0] * brain.action_space_size * len(brain_info['RealFakeBrain'].agents)) |
|||
env.close() |
|||
assert env.global_done |
|||
assert isinstance(brain_info, dict) |
|||
assert isinstance(brain_info['RealFakeBrain'], BrainInfo) |
|||
assert isinstance(brain_info['RealFakeBrain'].observations, list) |
|||
assert isinstance(brain_info['RealFakeBrain'].states, np.ndarray) |
|||
assert len(brain_info['RealFakeBrain'].observations) == brain.number_observations |
|||
assert brain_info['RealFakeBrain'].states.shape[0] == len(brain_info['RealFakeBrain'].agents) |
|||
assert brain_info['RealFakeBrain'].states.shape[1] == brain.state_space_size |
|||
assert not brain_info['RealFakeBrain'].local_done[0] |
|||
assert brain_info['RealFakeBrain'].local_done[2] |
|||
|
|||
|
|||
def test_close(): |
|||
with mock.patch('subprocess.Popen') as mock_subproc_popen: |
|||
with mock.patch('socket.socket') as mock_socket: |
|||
with mock.patch('glob.glob') as mock_glob: |
|||
mock_glob.return_value = ['FakeLaunchPath'] |
|||
mock_socket.return_value.accept.return_value = (mock_socket, 0) |
|||
mock_socket.recv.return_value.decode.return_value = dummy_start |
|||
env = UnityEnvironment(' ') |
|||
assert env._loaded |
|||
env.close() |
|||
assert not env._loaded |
|||
mock_socket.close.assert_called_once() |
|||
|
|||
|
|||
dummy_curriculum = json.loads('''{ |
|||
"measure" : "reward", |
|||
"thresholds" : [10, 20, 50], |
|||
"min_lesson_length" : 3, |
|||
"signal_smoothing" : true, |
|||
"parameters" : |
|||
{ |
|||
"param1" : [0.7, 0.5, 0.3, 0.1], |
|||
"param2" : [100, 50, 20, 15], |
|||
"param3" : [0.2, 0.3, 0.7, 0.9] |
|||
} |
|||
}''') |
|||
bad_curriculum = json.loads('''{ |
|||
"measure" : "reward", |
|||
"thresholds" : [10, 20, 50], |
|||
"min_lesson_length" : 3, |
|||
"signal_smoothing" : false, |
|||
"parameters" : |
|||
{ |
|||
"param1" : [0.7, 0.5, 0.3, 0.1], |
|||
"param2" : [100, 50, 20], |
|||
"param3" : [0.2, 0.3, 0.7, 0.9] |
|||
} |
|||
}''') |
|||
|
|||
|
|||
def test_curriculum(): |
|||
open_name = '%s.open' % __name__ |
|||
with mock.patch('json.load') as mock_load: |
|||
with mock.patch(open_name, create=True) as mock_open: |
|||
mock_open.return_value = 0 |
|||
mock_load.return_value = bad_curriculum |
|||
with pytest.raises(UnityEnvironmentException): |
|||
curriculum = Curriculum('test_unityagents.py', {"param1": 1, "param2": 1, "param3": 1}) |
|||
mock_load.return_value = dummy_curriculum |
|||
with pytest.raises(UnityEnvironmentException): |
|||
curriculum = Curriculum('test_unityagents.py', {"param1": 1, "param2": 1}) |
|||
curriculum = Curriculum('test_unityagents.py', {"param1": 1, "param2": 1, "param3": 1}) |
|||
assert curriculum.get_lesson_number == 0 |
|||
curriculum.set_lesson_number(1) |
|||
assert curriculum.get_lesson_number == 1 |
|||
curriculum.increment_lesson(10) |
|||
assert curriculum.get_lesson_number == 1 |
|||
curriculum.increment_lesson(30) |
|||
curriculum.increment_lesson(30) |
|||
assert curriculum.get_lesson_number == 1 |
|||
assert curriculum.lesson_length == 3 |
|||
curriculum.increment_lesson(30) |
|||
assert curriculum.get_config() == {'param1': 0.3, 'param2': 20, 'param3': 0.7} |
|||
assert curriculum.get_config(0) == {"param1": 0.7, "param2": 100, "param3": 0.2} |
|||
assert curriculum.lesson_length == 0 |
|||
assert curriculum.get_lesson_number == 2 |
|||
|
|||
|
|||
c_action_c_state_start = '''{ |
|||
"AcademyName": "RealFakeAcademy", |
|||
"resetParameters": {}, |
|||
"brainNames": ["RealFakeBrain"], |
|||
"externalBrainNames": ["RealFakeBrain"], |
|||
"logPath":"RealFakePath", |
|||
"apiNumber":"API-2", |
|||
"brainParameters": [{ |
|||
"stateSize": 3, |
|||
"actionSize": 2, |
|||
"memorySize": 0, |
|||
"cameraResolutions": [], |
|||
"actionDescriptions": ["",""], |
|||
"actionSpaceType": 1, |
|||
"stateSpaceType": 1 |
|||
}] |
|||
}'''.encode() |
|||
|
|||
|
|||
def test_ppo_model_continuous(): |
|||
tf.reset_default_graph() |
|||
with mock.patch('subprocess.Popen') as mock_subproc_popen: |
|||
with mock.patch('socket.socket') as mock_socket: |
|||
with mock.patch('glob.glob') as mock_glob: |
|||
# End of mock |
|||
with tf.Session() as sess: |
|||
with tf.variable_scope("FakeGraphScope"): |
|||
mock_glob.return_value = ['FakeLaunchPath'] |
|||
mock_socket.return_value.accept.return_value = (mock_socket, 0) |
|||
mock_socket.recv.return_value.decode.return_value = c_action_c_state_start |
|||
env = UnityEnvironment(' ') |
|||
|
|||
model = create_agent_model(env.brains["RealFakeBrain"]) |
|||
init = tf.global_variables_initializer() |
|||
sess.run(init) |
|||
|
|||
run_list = [model.output, model.probs, model.value, model.entropy, |
|||
model.learning_rate] |
|||
feed_dict = {model.batch_size: 2, |
|||
model.sequence_length: 1, |
|||
model.state_in: np.array([[1, 2, 3], [3, 4, 5]]), |
|||
model.epsilon: np.random.randn(2, 2) |
|||
} |
|||
sess.run(run_list, feed_dict=feed_dict) |
|||
env.close() |
|||
|
|||
|
|||
d_action_c_state_start = '''{ |
|||
"AcademyName": "RealFakeAcademy", |
|||
"resetParameters": {}, |
|||
"brainNames": ["RealFakeBrain"], |
|||
"externalBrainNames": ["RealFakeBrain"], |
|||
"logPath":"RealFakePath", |
|||
"apiNumber":"API-2", |
|||
"brainParameters": [{ |
|||
"stateSize": 3, |
|||
"actionSize": 2, |
|||
"memorySize": 0, |
|||
"cameraResolutions": [{"width":30,"height":40,"blackAndWhite":false}], |
|||
"actionDescriptions": ["",""], |
|||
"actionSpaceType": 0, |
|||
"stateSpaceType": 1 |
|||
}] |
|||
}'''.encode() |
|||
|
|||
|
|||
def test_ppo_model_discrete(): |
|||
tf.reset_default_graph() |
|||
with mock.patch('subprocess.Popen') as mock_subproc_popen: |
|||
with mock.patch('socket.socket') as mock_socket: |
|||
with mock.patch('glob.glob') as mock_glob: |
|||
# End of mock |
|||
with tf.Session() as sess: |
|||
with tf.variable_scope("FakeGraphScope"): |
|||
mock_glob.return_value = ['FakeLaunchPath'] |
|||
mock_socket.return_value.accept.return_value = (mock_socket, 0) |
|||
mock_socket.recv.return_value.decode.return_value = d_action_c_state_start |
|||
env = UnityEnvironment(' ') |
|||
model = create_agent_model(env.brains["RealFakeBrain"]) |
|||
init = tf.global_variables_initializer() |
|||
sess.run(init) |
|||
|
|||
run_list = [model.output, model.probs, model.value, model.entropy, |
|||
model.learning_rate] |
|||
feed_dict = {model.batch_size: 2, |
|||
model.sequence_length: 1, |
|||
model.state_in: np.array([[1, 2, 3], [3, 4, 5]]), |
|||
model.observation_in[0]: np.ones([2, 40, 30, 3]) |
|||
} |
|||
sess.run(run_list, feed_dict=feed_dict) |
|||
env.close() |
|||
|
|||
|
|||
def assert_array(a, b): |
|||
assert a.shape == b.shape |
|||
la = list(a.flatten()) |
|||
lb = list(b.flatten()) |
|||
for i in range(len(la)): |
|||
assert la[i] == lb[i] |
|||
|
|||
|
|||
def test_buffer(): |
|||
b = Buffer() |
|||
for fake_agent_id in range(4): |
|||
for i in range(9): |
|||
b[fake_agent_id]['state'].append( |
|||
[100 * fake_agent_id + 10 * i + 1, 100 * fake_agent_id + 10 * i + 2, 100 * fake_agent_id + 10 * i + 3] |
|||
) |
|||
b[fake_agent_id]['action'].append([100 * fake_agent_id + 10 * i + 4, 100 * fake_agent_id + 10 * i + 5]) |
|||
a = b[1]['state'].get_batch(batch_size=2, training_length=None, sequential=True) |
|||
assert_array(a, np.array([[171, 172, 173], [181, 182, 183]])) |
|||
a = b[2]['state'].get_batch(batch_size=2, training_length=3, sequential=True) |
|||
assert_array(a, np.array([ |
|||
[[231, 232, 233], [241, 242, 243], [251, 252, 253]], |
|||
[[261, 262, 263], [271, 272, 273], [281, 282, 283]] |
|||
])) |
|||
a = b[2]['state'].get_batch(batch_size=2, training_length=3, sequential=False) |
|||
assert_array(a, np.array([ |
|||
[[251, 252, 253], [261, 262, 263], [271, 272, 273]], |
|||
[[261, 262, 263], [271, 272, 273], [281, 282, 283]] |
|||
])) |
|||
b[4].reset_agent() |
|||
assert len(b[4]) == 0 |
|||
b.append_update_buffer(3, |
|||
batch_size=None, training_length=2) |
|||
b.append_update_buffer(2, |
|||
batch_size=None, training_length=2) |
|||
assert len(b.update_buffer['action']) == 10 |
|||
assert np.array(b.update_buffer['action']).shape == (10, 2, 2) |
|||
|
|||
|
|||
if __name__ == '__main__': |
|||
pytest.main() |
撰写
预览
正在加载...
取消
保存
Reference in new issue