|
|
|
|
|
|
logger = logging.getLogger("mlagents.trainers") |
|
|
|
|
|
|
|
ActivationFunction = Callable[[tf.Tensor], tf.Tensor] |
|
|
|
EncoderFunction = Callable[ |
|
|
|
[tf.Tensor, int, ActivationFunction, int, str, bool], tf.Tensor |
|
|
|
] |
|
|
|
|
|
|
|
EPSILON = 1e-7 |
|
|
|
|
|
|
|
|
|
|
LINEAR = "linear" |
|
|
|
|
|
|
|
|
|
|
|
class LearningModel(object): |
|
|
|
class LearningModel: |
|
|
|
|
|
|
|
# Minimum supported side for each encoder type. If refactoring an encoder, please |
|
|
|
# adjust these also. |
|
|
|
MIN_RESOLUTION_FOR_ENCODER = { |
|
|
|
EncoderType.SIMPLE: 20, |
|
|
|
EncoderType.NATURE_CNN: 36, |
|
|
|
EncoderType.RESNET: 15, |
|
|
|
} |
|
|
|
|
|
|
|
def __init__( |
|
|
|
self, m_size, normalize, use_recurrent, brain, seed, stream_names=None |
|
|
|
|
|
|
return hidden_flat |
|
|
|
|
|
|
|
@staticmethod |
|
|
|
def get_encoder_for_type(encoder_type: EncoderType) -> EncoderFunction: |
|
|
|
ENCODER_FUNCTION_BY_TYPE = { |
|
|
|
EncoderType.SIMPLE: LearningModel.create_visual_observation_encoder, |
|
|
|
EncoderType.NATURE_CNN: LearningModel.create_nature_cnn_visual_observation_encoder, |
|
|
|
EncoderType.RESNET: LearningModel.create_resnet_visual_observation_encoder, |
|
|
|
} |
|
|
|
return ENCODER_FUNCTION_BY_TYPE.get( |
|
|
|
encoder_type, LearningModel.create_visual_observation_encoder |
|
|
|
) |
|
|
|
|
|
|
|
@staticmethod |
|
|
|
def create_discrete_action_masking_layer(all_logits, action_masks, action_size): |
|
|
|
""" |
|
|
|
Creates a masking layer for the discrete actions |
|
|
|
|
|
|
), |
|
|
|
) |
|
|
|
|
|
|
|
@staticmethod |
|
|
|
def _check_resolution_for_encoder( |
|
|
|
camera_res: CameraResolution, vis_encoder_type: EncoderType |
|
|
|
) -> None: |
|
|
|
min_res = LearningModel.MIN_RESOLUTION_FOR_ENCODER[vis_encoder_type] |
|
|
|
if camera_res.height < min_res or camera_res.width < min_res: |
|
|
|
raise UnityTrainerException( |
|
|
|
f"Visual observation resolution ({camera_res.width}x{camera_res.height}) is too small for" |
|
|
|
f"the provided EncoderType ({vis_encoder_type.value}). The min dimension is {min_res}" |
|
|
|
) |
|
|
|
|
|
|
|
def create_observation_streams( |
|
|
|
self, |
|
|
|
num_streams: int, |
|
|
|
|
|
|
|
|
|
|
self.visual_in = [] |
|
|
|
for i in range(brain.number_visual_observations): |
|
|
|
LearningModel._check_resolution_for_encoder( |
|
|
|
brain.camera_resolutions[i], vis_encode_type |
|
|
|
) |
|
|
|
visual_input = self.create_visual_input( |
|
|
|
brain.camera_resolutions[i], name="visual_observation_" + str(i) |
|
|
|
) |
|
|
|
|
|
|
# Pick the encoder function based on the EncoderType |
|
|
|
create_encoder_func = LearningModel.create_visual_observation_encoder |
|
|
|
if vis_encode_type == EncoderType.RESNET: |
|
|
|
create_encoder_func = LearningModel.create_resnet_visual_observation_encoder |
|
|
|
elif vis_encode_type == EncoderType.NATURE_CNN: |
|
|
|
create_encoder_func = ( |
|
|
|
LearningModel.create_nature_cnn_visual_observation_encoder |
|
|
|
) |
|
|
|
|
|
|
|
# Pick the encoder function based on the EncoderType |
|
|
|
create_encoder_func = LearningModel.get_encoder_for_type(vis_encode_type) |
|
|
|
|
|
|
|
visual_encoders = [] |
|
|
|
hidden_state, hidden_visual = None, None |
|
|
|
_scope_add = stream_scopes[i] if stream_scopes else "" |
|
|
|
|
|
|
h_size, |
|
|
|
activation_fn, |
|
|
|
num_layers, |
|
|
|
scope=f"{_scope_add}main_graph_{i}_encoder{j}", |
|
|
|
reuse=False, |
|
|
|
f"{_scope_add}main_graph_{i}_encoder{j}", # scope |
|
|
|
False, # reuse |
|
|
|
) |
|
|
|
visual_encoders.append(encoded_visual) |
|
|
|
hidden_visual = tf.concat(visual_encoders, axis=1) |
|
|
|