浏览代码

Merge pull request #4385 from Unity-Technologies/release_2_verified-barracuda-1.0.2

update verified brach with barracuda 1.0.2
/r2v-yamato-linux
GitHub 4 年前
当前提交
f7373172
共有 51 个文件被更改,包括 152 次插入150 次删除
  1. 12
      .pre-commit-config.yaml
  2. 2
      .yamato/com.unity.ml-agents-test.yml
  3. 24
      .yamato/gym-interface-test.yml
  4. 5
      .yamato/protobuf-generation-test.yml
  5. 25
      .yamato/python-ll-api-test.yml
  6. 2
      .yamato/standalone-build-test.yml
  7. 2
      .yamato/training-int-tests.yml
  8. 9
      com.unity.ml-agents/CHANGELOG.md
  9. 2
      com.unity.ml-agents/Runtime/Academy.cs
  10. 8
      com.unity.ml-agents/package.json
  11. 4
      gym-unity/setup.py
  12. 8
      ml-agents-envs/mlagents_envs/base_env.py
  13. 2
      ml-agents-envs/mlagents_envs/communicator.py
  14. 22
      ml-agents-envs/mlagents_envs/environment.py
  15. 2
      ml-agents-envs/mlagents_envs/exception.py
  16. 2
      ml-agents-envs/mlagents_envs/rpc_communicator.py
  17. 2
      ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py
  18. 2
      ml-agents-envs/mlagents_envs/side_channel/float_properties_channel.py
  19. 4
      ml-agents-envs/mlagents_envs/tests/test_side_channel.py
  20. 2
      ml-agents-envs/setup.py
  21. 4
      ml-agents/mlagents/trainers/buffer.py
  22. 2
      ml-agents/mlagents/trainers/components/bc/model.py
  23. 2
      ml-agents/mlagents/trainers/components/bc/module.py
  24. 2
      ml-agents/mlagents/trainers/components/reward_signals/__init__.py
  25. 6
      ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py
  26. 6
      ml-agents/mlagents/trainers/components/reward_signals/gail/model.py
  27. 4
      ml-agents/mlagents/trainers/components/reward_signals/reward_signal_factory.py
  28. 12
      ml-agents/mlagents/trainers/curriculum.py
  29. 4
      ml-agents/mlagents/trainers/ghost/controller.py
  30. 2
      ml-agents/mlagents/trainers/ghost/trainer.py
  31. 6
      ml-agents/mlagents/trainers/models.py
  32. 12
      ml-agents/mlagents/trainers/policy/tf_policy.py
  33. 12
      ml-agents/mlagents/trainers/ppo/optimizer.py
  34. 16
      ml-agents/mlagents/trainers/ppo/trainer.py
  35. 6
      ml-agents/mlagents/trainers/sac/network.py
  36. 4
      ml-agents/mlagents/trainers/sac/optimizer.py
  37. 14
      ml-agents/mlagents/trainers/sac/trainer.py
  38. 2
      ml-agents/mlagents/trainers/sampler_class.py
  39. 12
      ml-agents/mlagents/trainers/stats.py
  40. 2
      ml-agents/mlagents/trainers/tests/test_simple_rl.py
  41. 2
      ml-agents/mlagents/trainers/trainer/rl_trainer.py
  42. 4
      ml-agents/mlagents/trainers/trainer/trainer.py
  43. 2
      ml-agents/mlagents/trainers/trainer_controller.py
  44. 2
      ml-agents/mlagents/trainers/trainer_util.py
  45. 5
      ml-agents/setup.py
  46. 3
      ml-agents/tests/yamato/check_coverage_percent.py
  47. 4
      ml-agents/tests/yamato/scripts/run_gym.py
  48. 2
      ml-agents/tests/yamato/scripts/run_llapi.py
  49. 3
      ml-agents/tests/yamato/yamato_utils.py
  50. 2
      test_requirements.txt
  51. 2
      utils/validate_versions.py

12
.pre-commit-config.yaml


# flake8-tidy-imports is used for banned-modules, not actually tidying
additional_dependencies: [flake8-comprehensions==3.2.2, flake8-tidy-imports==4.1.0, flake8-bugbear==20.1.4]
- repo: https://github.com/asottile/pyupgrade
rev: v2.7.0
hooks:
- id: pyupgrade
args: [--py3-plus, --py36-plus]
exclude: >
(?x)^(
.*barracuda.py|
.*_pb2.py|
.*_pb2_grpc.py
)$
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.5.0
hooks:

2
.yamato/com.unity.ml-agents-test.yml


image: {{ platform.image }}
flavor: {{ platform.flavor}}
commands:
- python -m pip install unity-downloader-cli --extra-index-url https://artifactory.eu-cph-1.unityops.net/api/pypi/common-python/simple
- python -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/unity-pypi-local/simple --upgrade
- unity-downloader-cli -u trunk -c editor --wait --fast
- npm install upm-ci-utils@stable -g --registry https://artifactory.prd.cds.internal.unity3d.com/artifactory/api/npm/upm-npm
- upm-ci package test -u {{ editor.version }} --package-path com.unity.ml-agents {{ editor.coverageOptions }}

24
.yamato/gym-interface-test.yml


variables:
UNITY_VERSION: {{ editor.version }}
commands:
- pip install pyyaml
- pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
- python -u -m ml-agents.tests.yamato.setup_venv
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_gym.py --env=artifacts/testPlayer-Basic
dependencies:

changes:
only:
- "com.unity.ml-agents/**"
- "Project/**"
- "ml-agents/**"
- "ml-agents-envs/**"
- ".yamato/gym-interface-test.yml"
except:
- "*.md"
- "com.unity.ml-agents/*.md"
- "com.unity.ml-agents/**/*.md"
expression: |
(pull_request.target eq "master" OR
pull_request.target match "release.+") AND
NOT pull_request.draft AND
(pull_request.changes.any match "com.unity.ml-agents/**" OR
pull_request.changes.any match "Project/**" OR
pull_request.changes.any match "ml-agents/**" OR
pull_request.changes.any match "ml-agents-envs/**" OR
pull_request.changes.any match "gym-unity/**" OR
pull_request.changes.any match ".yamato/gym-interface-test.yml") AND
NOT pull_request.changes.all match "**/*.md"
{% endfor %}

5
.yamato/protobuf-generation-test.yml


nuget install Grpc.Tools -Version $GRPC_VERSION -OutputDirectory protobuf-definitions/
python3 -m venv venv
. venv/bin/activate
pip install --upgrade pip
pip install grpcio-tools==1.13.0 --progress-bar=off
pip install mypy-protobuf==1.16.0 --progress-bar=off
pip install --upgrade pip --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
pip install grpcio==1.28.1 grpcio-tools==1.13.0 protobuf==3.11.3 six==1.14.0 mypy-protobuf==1.16.0 --progress-bar=off --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
cd protobuf-definitions
chmod +x Grpc.Tools.$GRPC_VERSION/tools/macosx_x64/protoc
chmod +x Grpc.Tools.$GRPC_VERSION/tools/macosx_x64/grpc_csharp_plugin

25
.yamato/python-ll-api-test.yml


variables:
UNITY_VERSION: {{ editor.version }}
commands:
- pip install pyyaml
- pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py --env=artifacts/testPlayer-Basic
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py --env=artifacts/testPlayer-WallJump
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py --env=artifacts/testPlayer-Bouncer

cancel_old_ci: true
changes:
only:
- "com.unity.ml-agents/**"
- "Project/**"
- "ml-agents/**"
- "ml-agents-envs/**"
- ".yamato/python-ll-api-test.yml"
except:
- "*.md"
- "com.unity.ml-agents/*.md"
- "com.unity.ml-agents/**/*.md"
expression: |
(pull_request.target eq "master" OR
pull_request.target match "release.+") AND
NOT pull_request.draft AND
(pull_request.changes.any match "com.unity.ml-agents/**" OR
pull_request.changes.any match "Project/**" OR
pull_request.changes.any match "ml-agents/**" OR
pull_request.changes.any match "ml-agents-envs/**" OR
pull_request.changes.any match ".yamato/python-ll-api-test.yml") AND
NOT pull_request.changes.all match "**/*.md"
{% endfor %}

2
.yamato/standalone-build-test.yml


variables:
UNITY_VERSION: {{ editor.version }}
commands:
- pip install pyyaml
- pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
- python -u -m ml-agents.tests.yamato.standalone_build_tests
- python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/Basic/Scenes/Basic.unity
- python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/Bouncer/Scenes/Bouncer.unity

2
.yamato/training-int-tests.yml


variables:
UNITY_VERSION: {{ editor.version }}
commands:
- pip install pyyaml
- pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
- python -u -m ml-agents.tests.yamato.training_int_tests
# Backwards-compatibility tests.
# If we make a breaking change to the communication protocol, these will need

9
com.unity.ml-agents/CHANGELOG.md


and this project adheres to
[Semantic Versioning](http://semver.org/spec/v2.0.0.html).
## [1.0.4] - 2020-08-19
### Minor Changes
#### com.unity.ml-agents (C#)
- Update Barracuda to 1.0.2. (#4385)
- Explicitly call out dependencies in package.json.
### Bug Fixes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
## [1.0.3] - 2020-07-07
### Minor Changes
#### com.unity.ml-agents (C#)

2
com.unity.ml-agents/Runtime/Academy.cs


/// Unity package version of com.unity.ml-agents.
/// This must match the version string in package.json and is checked in a unit test.
/// </summary>
internal const string k_PackageVersion = "1.0.3";
internal const string k_PackageVersion = "1.0.4";
const int k_EditorTrainingPort = 5004;

8
com.unity.ml-agents/package.json


{
"name": "com.unity.ml-agents",
"displayName": "ML Agents",
"version": "1.0.3",
"version": "1.0.4",
"com.unity.barracuda": "1.0.1"
"com.unity.barracuda": "1.0.2",
"com.unity.modules.imageconversion": "1.0.0",
"com.unity.modules.jsonserialize": "1.0.0",
"com.unity.modules.physics": "1.0.0",
"com.unity.modules.physics2d": "1.0.0"
}
}

4
gym-unity/setup.py


tag = os.getenv("CIRCLE_TAG")
if tag != EXPECTED_TAG:
info = "Git tag: {0} does not match the expected tag of this app: {1}".format(
info = "Git tag: {} does not match the expected tag of this app: {}".format(
tag, EXPECTED_TAG
)
sys.exit(info)

author_email="ML-Agents@unity3d.com",
url="https://github.com/Unity-Technologies/ml-agents",
packages=find_packages(),
install_requires=["gym", "mlagents_envs=={}".format(VERSION)],
install_requires=["gym", f"mlagents_envs=={VERSION}"],
cmdclass={"verify": VerifyVersionCommand},
)

8
ml-agents-envs/mlagents_envs/base_env.py


:returns: The DecisionStep
"""
if agent_id not in self.agent_id_to_index:
raise KeyError(
"agent_id {} is not present in the DecisionSteps".format(agent_id)
)
raise KeyError(f"agent_id {agent_id} is not present in the DecisionSteps")
agent_index = self._agent_id_to_index[agent_id] # type: ignore
agent_obs = []
for batched_obs in self.obs:

specific agent
"""
if agent_id not in self.agent_id_to_index:
raise KeyError(
"agent_id {} is not present in the TerminalSteps".format(agent_id)
)
raise KeyError(f"agent_id {agent_id} is not present in the TerminalSteps")
agent_index = self._agent_id_to_index[agent_id] # type: ignore
agent_obs = []
for batched_obs in self.obs:

2
ml-agents-envs/mlagents_envs/communicator.py


from mlagents_envs.communicator_objects.unity_input_pb2 import UnityInputProto
class Communicator(object):
class Communicator:
def __init__(self, worker_id=0, base_port=5005):
"""
Python side of the communication. Must be used in pair with the right Unity Communicator equivalent.

22
ml-agents-envs/mlagents_envs/environment.py


for _sc in side_channels:
if _sc.channel_id in self.side_channels:
raise UnityEnvironmentException(
"There cannot be two side channels with the same channel id {0}.".format(
"There cannot be two side channels with the same channel id {}.".format(
_sc.channel_id
)
)

.replace(".x86", "")
)
true_filename = os.path.basename(os.path.normpath(env_path))
logger.debug("The true file name is {}".format(true_filename))
logger.debug(f"The true file name is {true_filename}")
if not (glob.glob(env_path) or glob.glob(env_path + ".*")):
return None

f"Couldn't launch the {file_name} environment. Provided filename does not match any environments."
)
else:
logger.debug("This is the launch string {}".format(launch_string))
logger.debug(f"This is the launch string {launch_string}")
# Launch Unity environment
subprocess_args = [launch_string]
if no_graphics:

def _assert_behavior_exists(self, behavior_name: str) -> None:
if behavior_name not in self._env_specs:
raise UnityActionException(
"The group {0} does not correspond to an existing agent group "
"The group {} does not correspond to an existing agent group "
"in the environment".format(behavior_name)
)

expected_shape = (len(self._env_state[behavior_name][0]), spec.action_size)
if action.shape != expected_shape:
raise UnityActionException(
"The behavior {0} needs an input of dimension {1} for "
"The behavior {} needs an input of dimension {} for "
"dimension {2}".format(behavior_name, expected_shape, action.shape)
"dimension {}".format(behavior_name, expected_shape, action.shape)
)
if action.dtype != expected_type:
action = action.astype(expected_type)

expected_shape = (spec.action_size,)
if action.shape != expected_shape:
raise UnityActionException(
f"The Agent {0} with BehaviorName {1} needs an input of dimension "
f"{2} but received input of dimension {3}".format(
agent_id, behavior_name, expected_shape, action.shape
)
f"The Agent {agent_id} with BehaviorName {behavior_name} needs an input of dimension "
f"{expected_shape} but received input of dimension {action.shape}"
)
expected_type = np.float32 if spec.is_action_continuous() else np.int32
if action.dtype != expected_type:

)
if len(message_data) != message_len:
raise UnityEnvironmentException(
"The message received by the side channel {0} was "
"The message received by the side channel {} was "
"unexpectedly short. Make sure your Unity Environment "
"sending side channel data properly.".format(channel_id)
)

else:
logger.warning(
"Unknown side channel data received. Channel type "
": {0}.".format(channel_id)
": {}.".format(channel_id)
)
@staticmethod

2
ml-agents-envs/mlagents_envs/exception.py


def __init__(self, worker_id):
message = self.MESSAGE_TEMPLATE.format(str(worker_id))
super(UnityWorkerInUseException, self).__init__(message)
super().__init__(message)

2
ml-agents-envs/mlagents_envs/rpc_communicator.py


s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
s.bind(("localhost", port))
except socket.error:
except OSError:
raise UnityWorkerInUseException(self.worker_id)
finally:
s.close()

2
ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py


FLOAT = 0
def __init__(self) -> None:
channel_id = uuid.UUID(("534c891e-810f-11ea-a9d0-822485860400"))
channel_id = uuid.UUID("534c891e-810f-11ea-a9d0-822485860400")
super().__init__(channel_id)
def on_message_received(self, msg: IncomingMessage) -> None:

2
ml-agents-envs/mlagents_envs/side_channel/float_properties_channel.py


def __init__(self, channel_id: uuid.UUID = None) -> None:
self._float_properties: Dict[str, float] = {}
if channel_id is None:
channel_id = uuid.UUID(("60ccf7d0-4f7e-11ea-b238-784f4387d1f7"))
channel_id = uuid.UUID("60ccf7d0-4f7e-11ea-b238-784f4387d1f7")
super().__init__(channel_id)
def on_message_received(self, msg: IncomingMessage) -> None:

4
ml-agents-envs/mlagents_envs/tests/test_side_channel.py


sender = RawBytesChannel(guid)
receiver = RawBytesChannel(guid)
sender.send_raw_data("foo".encode("ascii"))
sender.send_raw_data("bar".encode("ascii"))
sender.send_raw_data(b"foo")
sender.send_raw_data(b"bar")
data = UnityEnvironment._generate_side_channel_data({sender.channel_id: sender})
UnityEnvironment._parse_side_channel_message({receiver.channel_id: receiver}, data)

2
ml-agents-envs/setup.py


tag = os.getenv("CIRCLE_TAG")
if tag != EXPECTED_TAG:
info = "Git tag: {0} does not match the expected tag of this app: {1}".format(
info = "Git tag: {} does not match the expected tag of this app: {}".format(
tag, EXPECTED_TAG
)
sys.exit(info)

4
ml-agents/mlagents/trainers/buffer.py


super().__init__()
def __str__(self):
return ", ".join(["'{0}' : {1}".format(k, str(self[k])) for k in self.keys()])
return ", ".join(["'{}' : {}".format(k, str(self[k])) for k in self.keys()])
def reset_agent(self) -> None:
"""

key_list = list(self.keys())
if not self.check_length(key_list):
raise BufferException(
"The length of the fields {0} were not of same length".format(key_list)
f"The length of the fields {key_list} were not of same length"
)
for field_key in key_list:
target_buffer[field_key].extend(

2
ml-agents/mlagents/trainers/components/bc/model.py


from mlagents.trainers.policy.tf_policy import TFPolicy
class BCModel(object):
class BCModel:
def __init__(
self, policy: TFPolicy, learning_rate: float = 3e-4, anneal_steps: int = 0
):

2
ml-agents/mlagents/trainers/components/bc/module.py


for k in param_keys:
if k not in config_dict:
raise UnityTrainerException(
"The required pre-training hyper-parameter {0} was not defined. Please check your \
"The required pre-training hyper-parameter {} was not defined. Please check your \
trainer YAML file.".format(
k
)

2
ml-agents/mlagents/trainers/components/reward_signals/__init__.py


for k in param_keys:
if k not in config_dict:
raise UnityTrainerException(
"The hyper-parameter {0} could not be found for {1}.".format(
"The hyper-parameter {} could not be found for {}.".format(
k, cls.__name__
)
)

6
ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py


from mlagents.trainers.policy.tf_policy import TFPolicy
class CuriosityModel(object):
class CuriosityModel:
def __init__(
self, policy: TFPolicy, encoding_size: int = 128, learning_rate: float = 3e-4
):

self.encoding_size,
ModelUtils.swish,
1,
"curiosity_stream_{}_visual_obs_encoder".format(i),
f"curiosity_stream_{i}_visual_obs_encoder",
False,
)

ModelUtils.swish,
1,
"curiosity_stream_{}_visual_obs_encoder".format(i),
f"curiosity_stream_{i}_visual_obs_encoder",
True,
)
visual_encoders.append(encoded_visual)

6
ml-agents/mlagents/trainers/components/reward_signals/gail/model.py


EPSILON = 1e-7
class GAILModel(object):
class GAILModel:
def __init__(
self,
policy: TFPolicy,

self.encoding_size,
ModelUtils.swish,
1,
"gail_stream_{}_visual_obs_encoder".format(i),
f"gail_stream_{i}_visual_obs_encoder",
False,
)

ModelUtils.swish,
1,
"gail_stream_{}_visual_obs_encoder".format(i),
f"gail_stream_{i}_visual_obs_encoder",
True,
)
visual_policy_encoders.append(encoded_policy_visual)

4
ml-agents/mlagents/trainers/components/reward_signals/reward_signal_factory.py


"""
rcls = NAME_TO_CLASS.get(name)
if not rcls:
raise UnityTrainerException("Unknown reward signal type {0}".format(name))
raise UnityTrainerException(f"Unknown reward signal type {name}")
"Unknown parameters given for reward signal {0}".format(name)
f"Unknown parameters given for reward signal {name}"
)
return class_inst

12
ml-agents/mlagents/trainers/curriculum.py


for key in parameters:
config[key] = parameters[key][self.lesson_num]
logger.info(
"{0} lesson changed. Now in lesson {1}: {2}".format(
"{} lesson changed. Now in lesson {}: {}".format(
self.brain_name,
self.lesson_num,
", ".join([str(x) + " -> " + str(config[x]) for x in config]),

try:
with open(config_path) as data_file:
return Curriculum._load_curriculum(data_file)
except IOError:
raise CurriculumLoadingError(
"The file {0} could not be found.".format(config_path)
)
except OSError:
raise CurriculumLoadingError(f"The file {config_path} could not be found.")
raise CurriculumLoadingError(
"There was an error decoding {}".format(config_path)
)
raise CurriculumLoadingError(f"There was an error decoding {config_path}")
@staticmethod
def _load_curriculum(fp: TextIO) -> Dict:

4
ml-agents/mlagents/trainers/ghost/controller.py


"""
self._queue.append(self._learning_team)
self._learning_team = self._queue.popleft()
logger.debug(
"Learning team {} swapped on step {}".format(self._learning_team, step)
)
logger.debug(f"Learning team {self._learning_team} swapped on step {step}")
self._changed_training_team = True
# Adapted from https://github.com/Unity-Technologies/ml-agents/pull/1975 and

2
ml-agents/mlagents/trainers/ghost/trainer.py


:param run_id: The identifier of the current run
"""
super(GhostTrainer, self).__init__(
super().__init__(
brain_name, trainer_parameters, training, run_id, reward_buff_cap
)

6
ml-agents/mlagents/trainers/models.py


)
else:
raise UnityTrainerException(
"The learning rate schedule {} is invalid.".format(lr_schedule)
f"The learning rate schedule {lr_schedule} is invalid."
)
return learning_rate

h_size,
activation=activation,
reuse=reuse,
name="hidden_{}".format(i),
name=f"hidden_{i}",
kernel_initializer=tf.initializers.variance_scaling(1.0),
)
return hidden

"""
value_heads = {}
for name in stream_names:
value = tf.layers.dense(hidden_input, 1, name="{}_value".format(name))
value = tf.layers.dense(hidden_input, 1, name=f"{name}_value")
value_heads[name] = value
value = tf.reduce_mean(list(value_heads.values()), 0)
return value_heads, value

12
ml-agents/mlagents/trainers/policy/tf_policy.py


self.sequence_length = trainer_parameters["sequence_length"]
if self.m_size == 0:
raise UnityPolicyException(
"The memory size for brain {0} is 0 even "
"The memory size for brain {} is 0 even "
"The memory size for brain {0} is {1} "
"The memory size for brain {} is {} "
"but it must be divisible by 2.".format(
brain.brain_name, self.m_size
)

ckpt = tf.train.get_checkpoint_state(model_path)
if ckpt is None:
raise UnityPolicyException(
"The model {0} could not be loaded. Make "
"The model {} could not be loaded. Make "
"sure you specified the right "
"--run-id and that the previous run you are loading from had the same "
"behavior names.".format(model_path)

except tf.errors.NotFoundError:
raise UnityPolicyException(
"The model {0} was found but could not be loaded. Make "
"The model {} was found but could not be loaded. Make "
"sure the model is from the same version of ML-Agents, has the same behavior parameters, "
"and is using the same trainer configuration as the current run.".format(
model_path

)
)
else:
logger.info(
"Resuming training from step {}.".format(self.get_current_step())
)
logger.info(f"Resuming training from step {self.get_current_step()}.")
def initialize_or_load(self):
# If there is an initialize path, load from that. Else, load from the set model path.

12
ml-agents/mlagents/trainers/ppo/optimizer.py


self.old_values = {}
for name in value_heads.keys():
returns_holder = tf.placeholder(
shape=[None], dtype=tf.float32, name="{}_returns".format(name)
shape=[None], dtype=tf.float32, name=f"{name}_returns"
shape=[None], dtype=tf.float32, name="{}_value_estimate".format(name)
shape=[None], dtype=tf.float32, name=f"{name}_value_estimate"
)
self.returns_holders[name] = returns_holder
self.old_values[name] = old_value

self.all_old_log_probs: mini_batch["action_probs"],
}
for name in self.reward_signals:
feed_dict[self.returns_holders[name]] = mini_batch[
"{}_returns".format(name)
]
feed_dict[self.old_values[name]] = mini_batch[
"{}_value_estimates".format(name)
]
feed_dict[self.returns_holders[name]] = mini_batch[f"{name}_returns"]
feed_dict[self.old_values[name]] = mini_batch[f"{name}_value_estimates"]
if self.policy.output_pre is not None and "actions_pre" in mini_batch:
feed_dict[self.policy.output_pre] = mini_batch["actions_pre"]

16
ml-agents/mlagents/trainers/ppo/trainer.py


:param seed: The seed the model will be initialized with
:param run_id: The identifier of the current run
"""
super(PPOTrainer, self).__init__(
super().__init__(
brain_name, trainer_parameters, training, run_id, reward_buff_cap
)
self.param_keys = [

trajectory.done_reached and not trajectory.max_step_reached,
)
for name, v in value_estimates.items():
agent_buffer_trajectory["{}_value_estimates".format(name)].extend(v)
agent_buffer_trajectory[f"{name}_value_estimates"].extend(v)
self._stats_reporter.add_stat(
self.optimizer.reward_signals[name].value_name, np.mean(v)
)

evaluate_result = reward_signal.evaluate_batch(
agent_buffer_trajectory
).scaled_reward
agent_buffer_trajectory["{}_rewards".format(name)].extend(evaluate_result)
agent_buffer_trajectory[f"{name}_rewards"].extend(evaluate_result)
# Report the reward signals
self.collected_rewards[name][agent_id] += np.sum(evaluate_result)

for name in self.optimizer.reward_signals:
bootstrap_value = value_next[name]
local_rewards = agent_buffer_trajectory[
"{}_rewards".format(name)
].get_batch()
local_rewards = agent_buffer_trajectory[f"{name}_rewards"].get_batch()
"{}_value_estimates".format(name)
f"{name}_value_estimates"
].get_batch()
local_advantage = get_gae(
rewards=local_rewards,

)
local_return = local_advantage + local_value_estimates
# This is later use as target for the different value estimates
agent_buffer_trajectory["{}_returns".format(name)].set(local_return)
agent_buffer_trajectory["{}_advantage".format(name)].set(local_advantage)
agent_buffer_trajectory[f"{name}_returns"].set(local_return)
agent_buffer_trajectory[f"{name}_advantage"].set(local_advantage)
tmp_advantages.append(local_advantage)
tmp_returns.append(local_return)

6
ml-agents/mlagents/trainers/sac/network.py


"""
self.value_heads = {}
for name in stream_names:
value = tf.layers.dense(hidden_input, 1, name="{}_value".format(name))
value = tf.layers.dense(hidden_input, 1, name=f"{name}_value")
self.value_heads[name] = value
self.value = tf.reduce_mean(list(self.value_heads.values()), 0)

q1_heads = {}
for name in stream_names:
_q1 = tf.layers.dense(q1_hidden, num_outputs, name="{}_q1".format(name))
_q1 = tf.layers.dense(q1_hidden, num_outputs, name=f"{name}_q1")
q1_heads[name] = _q1
q1 = tf.reduce_mean(list(q1_heads.values()), axis=0)

q2_heads = {}
for name in stream_names:
_q2 = tf.layers.dense(q2_hidden, num_outputs, name="{}_q2".format(name))
_q2 = tf.layers.dense(q2_hidden, num_outputs, name=f"{name}_q2")
q2_heads[name] = _q2
q2 = tf.reduce_mean(list(q2_heads.values()), axis=0)

4
ml-agents/mlagents/trainers/sac/optimizer.py


)
rewards_holder = tf.placeholder(
shape=[None], dtype=tf.float32, name="{}_rewards".format(name)
shape=[None], dtype=tf.float32, name=f"{name}_rewards"
)
self.rewards_holders[name] = rewards_holder

self.policy.mask_input: batch["masks"] * burn_in_mask,
}
for name in self.reward_signals:
feed_dict[self.rewards_holders[name]] = batch["{}_rewards".format(name)]
feed_dict[self.rewards_holders[name]] = batch[f"{name}_rewards"]
if self.policy.use_continuous_act:
feed_dict[self.policy_network.external_action_in] = batch["actions"]

14
ml-agents/mlagents/trainers/sac/trainer.py


filename = os.path.join(
self.trainer_parameters["model_path"], "last_replay_buffer.hdf5"
)
logger.info("Saving Experience Replay Buffer to {}".format(filename))
logger.info(f"Saving Experience Replay Buffer to {filename}")
with open(filename, "wb") as file_object:
self.update_buffer.save_to_file(file_object)

filename = os.path.join(
self.trainer_parameters["model_path"], "last_replay_buffer.hdf5"
)
logger.info("Loading Experience Replay Buffer from {}".format(filename))
logger.info(f"Loading Experience Replay Buffer from {filename}")
with open(filename, "rb+") as file_object:
self.update_buffer.load_from_file(file_object)
logger.info(

batch_update_stats: Dict[str, list] = defaultdict(list)
while self.step / self.update_steps > self.steps_per_update:
logger.debug("Updating SAC policy at step {}".format(self.step))
logger.debug(f"Updating SAC policy at step {self.step}")
buffer = self.update_buffer
if (
self.update_buffer.num_experiences

)
# Get rewards for each reward
for name, signal in self.optimizer.reward_signals.items():
sampled_minibatch[
"{}_rewards".format(name)
] = signal.evaluate_batch(sampled_minibatch).scaled_reward
sampled_minibatch[f"{name}_rewards"] = signal.evaluate_batch(
sampled_minibatch
).scaled_reward
update_stats = self.optimizer.update(sampled_minibatch, n_sequences)
for stat_name, value in update_stats.items():

# Get minibatches for reward signal update if needed
reward_signal_minibatches = {}
for name, signal in self.optimizer.reward_signals.items():
logger.debug("Updating {} at step {}".format(name, self.step))
logger.debug(f"Updating {name} at step {self.step}")
# Some signals don't need a minibatch to be sampled - so we don't!
if signal.update_dict:
reward_signal_minibatches[name] = buffer.sample_mini_batch(

2
ml-agents/mlagents/trainers/sampler_class.py


for param_name, cur_param_dict in self.reset_param_dict.items():
if "sampler-type" not in cur_param_dict:
raise SamplerException(
"'sampler_type' argument hasn't been supplied for the {0} parameter".format(
"'sampler_type' argument hasn't been supplied for the {} parameter".format(
param_name
)
)

12
ml-agents/mlagents/trainers/stats.py


)
if self.self_play and "Self-play/ELO" in values:
elo_stats = values["Self-play/ELO"]
logger.info("{} ELO: {:0.3f}. ".format(category, elo_stats.mean))
logger.info(f"{category} ELO: {elo_stats.mean:0.3f}. ")
else:
logger.info(
"{}: Step: {}. No episode was completed since last summary. {}".format(

) -> None:
if property_type == StatsPropertyType.HYPERPARAMETERS:
logger.info(
"""Hyperparameters for behavior name {0}: \n{1}""".format(
"""Hyperparameters for behavior name {}: \n{}""".format(
category, self._dict_to_str(value, 0)
)
)

[
"\t"
+ " " * num_tabs
+ "{0}:\t{1}".format(
+ "{}:\t{}".format(
x, self._dict_to_str(param_dict[x], num_tabs + 1)
)
for x in param_dict

self._maybe_create_summary_writer(category)
for key, value in values.items():
summary = tf.Summary()
summary.value.add(tag="{}".format(key), simple_value=value.mean)
summary.value.add(tag=f"{key}", simple_value=value.mean)
self.summary_writers[category].add_summary(summary, step)
self.summary_writers[category].flush()

for file_name in os.listdir(directory_name):
if file_name.startswith("events.out"):
logger.warning(
"{} was left over from a previous run. Deleting.".format(file_name)
f"{file_name} was left over from a previous run. Deleting."
)
full_fname = os.path.join(directory_name, file_name)
try:

s_op = tf.summary.text(
name,
tf.convert_to_tensor(
([[str(x), str(input_dict[x])] for x in input_dict])
[[str(x), str(input_dict[x])] for x in input_dict]
),
)
s = sess.run(s_op)

2
ml-agents/mlagents/trainers/tests/test_simple_rl.py


def default_reward_processor(rewards, last_n_rewards=5):
rewards_to_use = rewards[-last_n_rewards:]
# For debugging tests
print("Last {} rewards:".format(last_n_rewards), rewards_to_use)
print(f"Last {last_n_rewards} rewards:", rewards_to_use)
return np.array(rewards[-last_n_rewards:], dtype=np.float32).mean()

2
ml-agents/mlagents/trainers/trainer/rl_trainer.py


"""
def __init__(self, *args, **kwargs):
super(RLTrainer, self).__init__(*args, **kwargs)
super().__init__(*args, **kwargs)
# Make sure we have at least one reward_signal
if not self.trainer_parameters["reward_signals"]:
raise UnityTrainerException(

4
ml-agents/mlagents/trainers/trainer/trainer.py


for k in self.param_keys:
if k not in self.trainer_parameters:
raise UnityTrainerException(
"The hyper-parameter {0} could not be found for the {1} trainer of "
"brain {2}.".format(k, self.__class__, self.brain_name)
"The hyper-parameter {} could not be found for the {} trainer of "
"brain {}.".format(k, self.__class__, self.brain_name)
)
@property

2
ml-agents/mlagents/trainers/trainer_controller.py


from mlagents.trainers.agent_processor import AgentManager
class TrainerController(object):
class TrainerController:
def __init__(
self,
trainer_factory: TrainerFactory,

2
ml-agents/mlagents/trainers/trainer_util.py


try:
with open(config_path) as data_file:
return _load_config(data_file)
except IOError:
except OSError:
abs_path = os.path.abspath(config_path)
raise TrainerConfigError(f"Config file could not be found at {abs_path}.")
except UnicodeDecodeError:

5
ml-agents/setup.py


from io import open
import os
import sys

tag = os.getenv("CIRCLE_TAG")
if tag != EXPECTED_TAG:
info = "Git tag: {0} does not match the expected tag of this app: {1}".format(
info = "Git tag: {} does not match the expected tag of this app: {}".format(
tag, EXPECTED_TAG
)
sys.exit(info)

# Test-only dependencies should go in test_requirements.txt, not here.
"grpcio>=1.11.0",
"h5py>=2.9.0",
"mlagents_envs=={}".format(VERSION),
f"mlagents_envs=={VERSION}",
"numpy>=1.13.3,<2.0",
"Pillow>=4.2.1",
"protobuf>=3.6",

3
ml-agents/tests/yamato/check_coverage_percent.py


from __future__ import print_function
import sys
import os

summary_xml = os.path.join(dirpath, SUMMARY_XML_FILENAME)
break
if not summary_xml:
print("Couldn't find {} in root directory".format(SUMMARY_XML_FILENAME))
print(f"Couldn't find {SUMMARY_XML_FILENAME} in root directory")
sys.exit(1)
with open(summary_xml) as f:

4
ml-agents/tests/yamato/scripts/run_gym.py


if len(env.observation_space.shape) == 1:
# Examine the initial vector observation
print("Agent observations look like: \n{}".format(initial_observations))
print(f"Agent observations look like: \n{initial_observations}")
for _episode in range(10):
env.reset()

actions = env.action_space.sample()
obs, reward, done, _ = env.step(actions)
episode_rewards += reward
print("Total reward this episode: {}".format(episode_rewards))
print(f"Total reward this episode: {episode_rewards}")
finally:
env.close()

2
ml-agents/tests/yamato/scripts/run_llapi.py


if tracked_agent in terminal_steps:
episode_rewards += terminal_steps[tracked_agent].reward
done = True
print("Total reward this episode: {}".format(episode_rewards))
print(f"Total reward this episode: {episode_rewards}")
finally:
env.close()

3
ml-agents/tests/yamato/yamato_utils.py


if extra_packages:
pip_commands += extra_packages
for cmd in pip_commands:
pip_index_url = "--index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple"
f"source {venv_path}/bin/activate; python -m pip install -q {cmd}",
f"source {venv_path}/bin/activate; python -m pip install -q {cmd} {pip_index_url}",
shell=True,
)
return venv_path

2
test_requirements.txt


# Test-only dependencies should go here, not in setup.py
pytest>4.0.0,<6.0.0
pytest-cov==2.6.1
pytest-xdist
pytest-xdist==1.34.0
# onnx doesn't currently have a wheel for 3.8
tf2onnx>=1.5.5;python_version<'3.8'

2
utils/validate_versions.py


def set_package_version(new_version: str) -> None:
with open(UNITY_PACKAGE_JSON_PATH, "r") as f:
with open(UNITY_PACKAGE_JSON_PATH) as f:
package_json = json.load(f)
if "version" in package_json:
package_json["version"] = new_version

正在加载...
取消
保存