浏览代码

using to_float for encoding

/sampler-refactor-copy
Andrew Cohen 5 年前
当前提交
e5c07272
共有 3 个文件被更改,包括 26 次插入17 次删除
  1. 16
      ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py
  2. 23
      ml-agents/mlagents/trainers/settings.py
  3. 4
      ml-agents/mlagents/trainers/subprocess_env_manager.py

16
ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py


msg.write_float32(value)
super().queue_message_to_send(msg)
def set_sampler_parameters(self, key: str, values: List[float]) -> None:
def set_sampler_parameters(
self, key: str, encoding: List[float], seed: int
) -> None:
:param values: The float encoding of the sampler.
:param encoding: The float encoding of the sampler.
:param seed: The random seed to initialize the sampler.
# Write seed
msg.write_int32(int(values[0]))
msg.write_int32(len(values[1:]))
# Sampler encoding
for value in values[1:]:
msg.write_int32(seed)
# for read float list in C#
msg.write_int32(len(encoding))
for value in encoding:
msg.write_float32(value)
super().queue_message_to_send(msg)

23
ml-agents/mlagents/trainers/settings.py


}
return _mapping[self]
def to_float(self) -> float:
_mapping = {
ParameterRandomizationType.UNIFORM: 0.0,
ParameterRandomizationType.GAUSSIAN: 1.0,
ParameterRandomizationType.MULTIRANGEUNIFORM: 2.0,
@staticmethod
def to_float(t: type) -> float:
_mapping: Dict[type, float] = {
UniformSettings: 0.0,
GaussianSettings: 1.0,
MultiRangeUniformSettings: 2.0,
return _mapping[self]
return _mapping[t]
@attr.s(auto_attribs=True)

def to_float_encoding(self) -> List[float]:
"Returns the sampler type followed by the min and max values"
return [self.seed, 0.0, self.min_value, self.max_value]
return [
ParameterRandomizationType.to_float(type(self)),
self.min_value,
self.max_value,
]
@attr.s(auto_attribs=True)

def to_float_encoding(self) -> List[float]:
"Returns the sampler type followed by the mean and standard deviation"
return [self.seed, 1.0, self.mean, self.st_dev]
return [ParameterRandomizationType.to_float(type(self)), self.mean, self.st_dev]
@attr.s(auto_attribs=True)

floats: List[float] = []
for interval in self.intervals:
floats += interval
return [self.seed, 2.0] + floats
return [ParameterRandomizationType.to_float(type(self))] + floats
@attr.s(auto_attribs=True)

4
ml-agents/mlagents/trainers/subprocess_env_manager.py


if isinstance(v, float):
env_parameters.set_float_parameter(k, v)
elif isinstance(v, ParameterRandomizationSettings):
env_parameters.set_sampler_parameters(k, v.to_float_encoding())
env_parameters.set_sampler_parameters(
k, v.to_float_encoding(), v.seed
)
env.reset()
all_step_result = _generate_all_results()
_send_response(EnvironmentCommand.RESET, all_step_result)

正在加载...
取消
保存