浏览代码

Merge branch 'self-play-mutex' into soccer-2v1

/asymm-envs
Andrew Cohen 4 年前
当前提交
a870d453
共有 15 个文件被更改,包括 295 次插入81 次删除
  1. 5
      .yamato/training-int-tests.yml
  2. 3
      com.unity.ml-agents/CHANGELOG.md
  3. 25
      gym-unity/gym_unity/envs/__init__.py
  4. 48
      gym-unity/gym_unity/tests/test_gym.py
  5. 2
      ml-agents/mlagents/trainers/behavior_id_utils.py
  6. 2
      ml-agents/mlagents/trainers/distributions.py
  7. 1
      ml-agents/mlagents/trainers/sac/optimizer.py
  8. 104
      ml-agents/mlagents/trainers/subprocess_env_manager.py
  9. 2
      ml-agents/mlagents/trainers/tests/simple_test_envs.py
  10. 10
      ml-agents/mlagents/trainers/tests/test_distributions.py
  11. 26
      ml-agents/mlagents/trainers/tests/test_simple_rl.py
  12. 46
      ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py
  13. 14
      ml-agents/mlagents/trainers/trainer_controller.py
  14. 37
      ml-agents/tests/yamato/training_int_tests.py
  15. 51
      ml-agents/tests/yamato/yamato_utils.py

5
.yamato/training-int-tests.yml


commands:
- pip install pyyaml
- python -u -m ml-agents.tests.yamato.training_int_tests
# Backwards-compatibility tests.
# If we make a breaking change to the communication protocol, these will need
# to be disabled until the next release.
- python -u -m ml-agents.tests.yamato.training_int_tests --python=0.15.0
- python -u -m ml-agents.tests.yamato.training_int_tests --csharp=0.15.0
triggers:
cancel_old_ci: true
changes:

3
com.unity.ml-agents/CHANGELOG.md


- Renamed 'Generalization' feature to 'Environment Parameter Randomization'.
- Fixed an issue where specifying `vis_encode_type` was required only for SAC. (#3677)
- The way that UnityEnvironment decides the port was changed. If no port is specified, the behavior will depend on the `file_name` parameter. If it is `None`, 5004 (the editor port) will be used; otherwise 5005 (the base environment port) will be used.
- Fixed the reported entropy values for continuous actions (#3684)
- Fixed an issue in the gym wrapper that would raise an exception if an Agent called EndEpisode multiple times in the same step. (#3700)
- Fixed an issue where exceptions from environments provided a returncode of 0. (#3680)
## [0.15.0-preview] - 2020-03-18
### Major Changes

25
gym-unity/gym_unity/envs/__init__.py


def _sanitize_info(self, step_result: BatchedStepResult) -> BatchedStepResult:
n_extra_agents = step_result.n_agents() - self._n_agents
if n_extra_agents < 0 or n_extra_agents > self._n_agents:
if n_extra_agents < 0:
# or too many requested a decision
raise UnityGymException(
"The number of agents in the scene does not match the expected number."
)

# only cares about the ordering.
for index, agent_id in enumerate(step_result.agent_id):
if not self._previous_step_result.contains_agent(agent_id):
if step_result.done[index]:
# If the Agent is already done (e.g. it ended its epsiode twice in one step)
# Don't try to register it here.
continue
# Register this agent, and get the reward of the previous agent that
# was in its index, so that we can return it to the gym.
last_reward = self.agent_mapper.register_new_agent_id(agent_id)

"""
Declare the agent done with the corresponding final reward.
"""
gym_index = self._agent_id_to_gym_index.pop(agent_id)
self._done_agents_index_to_last_reward[gym_index] = reward
if agent_id in self._agent_id_to_gym_index:
gym_index = self._agent_id_to_gym_index.pop(agent_id)
self._done_agents_index_to_last_reward[gym_index] = reward
else:
# Agent was never registered in the first place (e.g. EndEpisode called multiple times)
pass
def register_new_agent_id(self, agent_id: int) -> float:
"""

self._gym_id_order = list(agent_ids)
def mark_agent_done(self, agent_id: int, reward: float) -> None:
gym_index = self._gym_id_order.index(agent_id)
self._done_agents_index_to_last_reward[gym_index] = reward
self._gym_id_order[gym_index] = -1
try:
gym_index = self._gym_id_order.index(agent_id)
self._done_agents_index_to_last_reward[gym_index] = reward
self._gym_id_order[gym_index] = -1
except ValueError:
# Agent was never registered in the first place (e.g. EndEpisode called multiple times)
pass
def register_new_agent_id(self, agent_id: int) -> float:
original_index = self._gym_id_order.index(-1)

48
gym-unity/gym_unity/tests/test_gym.py


assert expected_agent_id == agent_id
@mock.patch("gym_unity.envs.UnityEnvironment")
def test_sanitize_action_new_agent_done(mock_env):
mock_spec = create_mock_group_spec(
vector_action_space_type="discrete", vector_action_space_size=[2, 2, 3]
)
mock_step = create_mock_vector_step_result(num_agents=3)
mock_step.agent_id = np.array(range(5))
setup_mock_unityenvironment(mock_env, mock_spec, mock_step)
env = UnityEnv(" ", use_visual=False, multiagent=True)
received_step_result = create_mock_vector_step_result(num_agents=7)
received_step_result.agent_id = np.array(range(7))
# agent #3 (id = 2) is Done
# so is the "new" agent (id = 5)
done = [False] * 7
done[2] = True
done[5] = True
received_step_result.done = np.array(done)
sanitized_result = env._sanitize_info(received_step_result)
for expected_agent_id, agent_id in zip([0, 1, 6, 3, 4], sanitized_result.agent_id):
assert expected_agent_id == agent_id
@mock.patch("gym_unity.envs.UnityEnvironment")
def test_sanitize_action_single_agent_multiple_done(mock_env):
mock_spec = create_mock_group_spec(
vector_action_space_type="discrete", vector_action_space_size=[2, 2, 3]
)
mock_step = create_mock_vector_step_result(num_agents=1)
mock_step.agent_id = np.array(range(1))
setup_mock_unityenvironment(mock_env, mock_spec, mock_step)
env = UnityEnv(" ", use_visual=False, multiagent=False)
received_step_result = create_mock_vector_step_result(num_agents=3)
received_step_result.agent_id = np.array(range(3))
# original agent (id = 0) is Done
# so is the "new" agent (id = 1)
done = [True, True, False]
received_step_result.done = np.array(done)
sanitized_result = env._sanitize_info(received_step_result)
for expected_agent_id, agent_id in zip([2], sanitized_result.agent_id):
assert expected_agent_id == agent_id
# Helper methods

# Mark some agents as done with their last rewards.
mapper.mark_agent_done(1001, 42.0)
mapper.mark_agent_done(1004, 1337.0)
# Make sure we can handle an unknown agent id being marked done.
# This can happen when an agent ends an episode on the same step it starts.
mapper.mark_agent_done(9999, -1.0)
# Now add new agents, and get the rewards of the agent they replaced.
old_reward1 = mapper.register_new_agent_id(2001)

2
ml-agents/mlagents/trainers/behavior_id_utils.py


"""
Parses a name_behavior_id of the form name?team=0
into a BehaviorIdentifiers NamedTuple.
This allows you to access the brain name and team id og an agent
This allows you to access the brain name and team id of an agent
:param name_behavior_id: String of behavior params in HTTP format.
:returns: A BehaviorIdentifiers object.
"""

2
ml-agents/mlagents/trainers/distributions.py


self, encoded: "GaussianDistribution.MuSigmaTensors"
) -> tf.Tensor:
single_dim_entropy = 0.5 * tf.reduce_mean(
tf.log(2 * np.pi * np.e) + tf.square(encoded.log_sigma)
tf.log(2 * np.pi * np.e) + 2 * encoded.log_sigma
)
# Make entropy the right shape
return tf.ones_like(tf.reshape(encoded.mu[:, 0], [-1])) * single_dim_entropy

1
ml-agents/mlagents/trainers/sac/optimizer.py


"q1_loss": self.q1_loss,
"q2_loss": self.q2_loss,
"entropy_coef": self.ent_coef,
"entropy": self.policy.entropy,
"update_batch": self.update_batch_policy,
"update_value": self.update_batch_value,
"update_entropy": self.update_batch_entropy,

104
ml-agents/mlagents/trainers/subprocess_env_manager.py


import logging
from typing import Dict, NamedTuple, List, Any, Optional, Callable, Set, Tuple
import cloudpickle
import enum
from mlagents_envs.exception import UnityCommunicationException, UnityTimeOutException
from mlagents_envs.exception import (
UnityCommunicationException,
UnityTimeOutException,
UnityEnvironmentException,
)
from multiprocessing import Process, Pipe, Queue
from multiprocessing.connection import Connection
from queue import Empty as EmptyQueueException

logger = logging.getLogger("mlagents.trainers")
class EnvironmentCommand(NamedTuple):
name: str
class EnvironmentCommand(enum.Enum):
STEP = 1
EXTERNAL_BRAINS = 2
GET_PROPERTIES = 3
RESET = 4
CLOSE = 5
ENV_EXITED = 6
class EnvironmentRequest(NamedTuple):
cmd: EnvironmentCommand
name: str
cmd: EnvironmentCommand
worker_id: int
payload: Any

self.previous_all_action_info: Dict[str, ActionInfo] = {}
self.waiting = False
def send(self, name: str, payload: Any = None) -> None:
def send(self, cmd: EnvironmentCommand, payload: Any = None) -> None:
cmd = EnvironmentCommand(name, payload)
self.conn.send(cmd)
req = EnvironmentRequest(cmd, payload)
self.conn.send(req)
except (BrokenPipeError, EOFError):
raise UnityCommunicationException("UnityEnvironment worker: send failed.")

if response.cmd == EnvironmentCommand.ENV_EXITED:
env_exception: Exception = response.payload
raise env_exception
return response
except (BrokenPipeError, EOFError):
raise UnityCommunicationException("UnityEnvironment worker: recv failed.")

self.conn.send(EnvironmentCommand("close"))
self.conn.send(EnvironmentRequest(EnvironmentCommand.CLOSE))
except (BrokenPipeError, EOFError):
logger.debug(
f"UnityEnvWorker {self.worker_id} got exception trying to close."

engine_configuration_channel = EngineConfigurationChannel()
engine_configuration_channel.set_configuration(engine_configuration)
stats_channel = StatsSideChannel()
env: BaseEnv = env_factory(
worker_id,
[shared_float_properties, engine_configuration_channel, stats_channel],
)
env: BaseEnv = None
def _send_response(cmd_name, payload):
def _send_response(cmd_name: EnvironmentCommand, payload: Any) -> None:
parent_conn.send(EnvironmentResponse(cmd_name, worker_id, payload))
def _generate_all_results() -> AllStepResult:

return result
try:
env = env_factory(
worker_id,
[shared_float_properties, engine_configuration_channel, stats_channel],
)
cmd: EnvironmentCommand = parent_conn.recv()
if cmd.name == "step":
all_action_info = cmd.payload
req: EnvironmentRequest = parent_conn.recv()
if req.cmd == EnvironmentCommand.STEP:
all_action_info = req.payload
for brain_name, action_info in all_action_info.items():
if len(action_info.action) != 0:
env.set_actions(brain_name, action_info.action)

step_response = StepResponse(
all_step_result, get_timer_root(), env_stats
)
step_queue.put(EnvironmentResponse("step", worker_id, step_response))
step_queue.put(
EnvironmentResponse(
EnvironmentCommand.STEP, worker_id, step_response
)
)
elif cmd.name == "external_brains":
_send_response("external_brains", external_brains())
elif cmd.name == "get_properties":
elif req.cmd == EnvironmentCommand.EXTERNAL_BRAINS:
_send_response(EnvironmentCommand.EXTERNAL_BRAINS, external_brains())
elif req.cmd == EnvironmentCommand.GET_PROPERTIES:
_send_response("get_properties", reset_params)
elif cmd.name == "reset":
for k, v in cmd.payload.items():
_send_response(EnvironmentCommand.GET_PROPERTIES, reset_params)
elif req.cmd == EnvironmentCommand.RESET:
for k, v in req.payload.items():
_send_response("reset", all_step_result)
elif cmd.name == "close":
_send_response(EnvironmentCommand.RESET, all_step_result)
elif req.cmd == EnvironmentCommand.CLOSE:
except (KeyboardInterrupt, UnityCommunicationException, UnityTimeOutException):
except (
KeyboardInterrupt,
UnityCommunicationException,
UnityTimeOutException,
UnityEnvironmentException,
) as ex:
step_queue.put(EnvironmentResponse("env_close", worker_id, None))
step_queue.put(
EnvironmentResponse(EnvironmentCommand.ENV_EXITED, worker_id, ex)
)
_send_response(EnvironmentCommand.ENV_EXITED, ex)
finally:
# If this worker has put an item in the step queue that hasn't been processed by the EnvManager, the process
# will hang until the item is processed. We avoid this behavior by using Queue.cancel_join_thread()

step_queue.cancel_join_thread()
step_queue.close()
env.close()
if env is not None:
env.close()
logger.debug(f"UnityEnvironment worker {worker_id} done.")

if not env_worker.waiting:
env_action_info = self._take_step(env_worker.previous_step)
env_worker.previous_all_action_info = env_action_info
env_worker.send("step", env_action_info)
env_worker.send(EnvironmentCommand.STEP, env_action_info)
env_worker.waiting = True
def _step(self) -> List[EnvironmentStep]:

while len(worker_steps) < 1:
try:
while True:
step = self.step_queue.get_nowait()
if step.name == "env_close":
raise UnityCommunicationException(
"At least one of the environments has closed."
)
step: EnvironmentResponse = self.step_queue.get_nowait()
if step.cmd == EnvironmentCommand.ENV_EXITED:
env_exception: Exception = step.payload
raise env_exception
self.env_workers[step.worker_id].waiting = False
if step.worker_id not in step_workers:
worker_steps.append(step)

self.env_workers[step.worker_id].waiting = False
# First enqueue reset commands for all workers so that they reset in parallel
for ew in self.env_workers:
ew.send("reset", config)
ew.send(EnvironmentCommand.RESET, config)
# Next (synchronously) collect the reset observations from each worker in sequence
for ew in self.env_workers:
ew.previous_step = EnvironmentStep(ew.recv().payload, ew.worker_id, {}, {})

def external_brains(self) -> Dict[AgentGroup, BrainParameters]:
self.env_workers[0].send("external_brains")
self.env_workers[0].send(EnvironmentCommand.EXTERNAL_BRAINS)
self.env_workers[0].send("get_properties")
self.env_workers[0].send(EnvironmentCommand.GET_PROPERTIES)
return self.env_workers[0].recv().payload
def close(self) -> None:

2
ml-agents/mlagents/trainers/tests/simple_test_envs.py


VIS_OBS_SIZE = (20, 20, 3)
STEP_SIZE = 0.1
TIME_PENALTY = 0.001
TIME_PENALTY = 0.01
MIN_STEPS = int(1.0 / STEP_SIZE) + 1
SUCCESS_REWARD = 1.0 + MIN_STEPS * TIME_PENALTY

10
ml-agents/mlagents/trainers/tests/test_distributions.py


def test_gaussian_distribution():
with tf.Graph().as_default():
logits = tf.Variable(initial_value=[[0, 0]], trainable=True, dtype=tf.float32)
logits = tf.Variable(initial_value=[[1, 1]], trainable=True, dtype=tf.float32)
distribution = GaussianDistribution(
logits,
act_size=VECTOR_ACTION_SPACE,

assert out.shape[1] == VECTOR_ACTION_SPACE[0]
output = sess.run([distribution.total_log_probs])
assert output[0].shape[0] == 1
# Test entropy is correct
log_std_tensor = tf.get_default_graph().get_tensor_by_name(
"log_std/BiasAdd:0"
)
feed_dict = {log_std_tensor: [[1.0, 1.0]]}
entropy = sess.run([distribution.entropy], feed_dict=feed_dict)
# Entropy with log_std of 1.0 should be 2.42
assert pytest.approx(entropy[0], 0.01) == 2.42
def test_tanh_distribution():

26
ml-agents/mlagents/trainers/tests/test_simple_rl.py


lambd: 0.95
learning_rate: 5.0e-3
learning_rate_schedule: constant
max_steps: 2000
max_steps: 3000
memory_size: 16
normalize: false
num_epoch: 3

# Custom reward processors shuld be built within the test function and passed to _check_environment_trains
# Default is average over the last 5 final rewards
def default_reward_processor(rewards, last_n_rewards=5):
rewards_to_use = rewards[-last_n_rewards:]
# For debugging tests
print("Last {} rewards:".format(last_n_rewards), rewards_to_use)
return np.array(rewards[-last_n_rewards:], dtype=np.float32).mean()

trainer_config,
reward_processor=default_reward_processor,
meta_curriculum=None,
success_threshold=0.99,
success_threshold=0.9,
env_manager=None,
):
# Create controller and begin training.

if (
success_threshold is not None
): # For tests where we are just checking setup and not reward
processed_rewards = [
reward_processor(rewards) for rewards in env.final_rewards.values()
]

def test_recurrent_ppo(use_discrete):
env = MemoryEnvironment([BRAIN_NAME], use_discrete=use_discrete)
override_vals = {
"max_steps": 4000,
"max_steps": 5000,
"batch_size": 64,
"buffer_size": 128,
"learning_rate": 1e-3,

@pytest.mark.parametrize("use_discrete", [True, False])
def test_2d_sac(use_discrete):
env = SimpleEnvironment(
[BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.5
[BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
override_vals = {"buffer_init_steps": 2000, "max_steps": 3000}
override_vals = {"buffer_init_steps": 2000, "max_steps": 4000}
_check_environment_trains(env, config)
_check_environment_trains(env, config, success_threshold=0.8)
@pytest.mark.parametrize("use_discrete", [True, False])

@pytest.mark.parametrize("use_discrete", [True, False])
def test_recurrent_sac(use_discrete):
env = MemoryEnvironment([BRAIN_NAME], use_discrete=use_discrete)
override_vals = {"batch_size": 32, "use_recurrent": True, "max_steps": 2000}
override_vals = {
"batch_size": 64,
"use_recurrent": True,
"max_steps": 3000,
"learning_rate": 1e-3,
"buffer_init_steps": 500,
}
config = generate_config(SAC_CONFIG, override_vals)
_check_environment_trains(env, config)

processed_rewards = [
default_reward_processor(rewards) for rewards in env.final_rewards.values()
]
success_threshold = 0.99
success_threshold = 0.9
assert any(reward > success_threshold for reward in processed_rewards) and any(
reward < success_threshold for reward in processed_rewards
)

46
ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py


SubprocessEnvManager,
EnvironmentResponse,
StepResponse,
EnvironmentCommand,
from mlagents_envs.exception import UnityEnvironmentException
from mlagents.trainers.tests.simple_test_envs import SimpleEnvironment
from mlagents.trainers.stats import StatsReporter
from mlagents.trainers.tests.test_simple_rl import (

def create_worker_mock(worker_id, step_queue, env_factor, engine_c):
return MockEnvWorker(worker_id, EnvironmentResponse("reset", worker_id, worker_id))
return MockEnvWorker(
worker_id, EnvironmentResponse(EnvironmentCommand.RESET, worker_id, worker_id)
)
class SubprocessEnvManagerTest(unittest.TestCase):

)
params = {"test": "params"}
manager._reset_env(params)
manager.env_workers[0].send.assert_called_with("reset", (params))
manager.env_workers[0].send.assert_called_with(
EnvironmentCommand.RESET, (params)
)
@mock.patch(
"mlagents.trainers.subprocess_env_manager.SubprocessEnvManager.create_worker"

params = {"test": "params"}
res = manager._reset_env(params)
for i, env in enumerate(manager.env_workers):
env.send.assert_called_with("reset", (params))
env.send.assert_called_with(EnvironmentCommand.RESET, (params))
env.recv.assert_called()
# Check that the "last steps" are set to the value returned for each step
self.assertEqual(

)
manager.step_queue = Mock()
manager.step_queue.get_nowait.side_effect = [
EnvironmentResponse("step", 0, StepResponse(0, None, {})),
EnvironmentResponse("step", 1, StepResponse(1, None, {})),
EnvironmentResponse(EnvironmentCommand.STEP, 0, StepResponse(0, None, {})),
EnvironmentResponse(EnvironmentCommand.STEP, 1, StepResponse(1, None, {})),
EmptyQueue(),
]
step_mock = Mock()

res = manager._step()
for i, env in enumerate(manager.env_workers):
if i < 2:
env.send.assert_called_with("step", step_mock)
env.send.assert_called_with(EnvironmentCommand.STEP, step_mock)
manager.step_queue.get_nowait.assert_called()
# Check that the "last steps" are set to the value returned for each step
self.assertEqual(

env_manager.advance()
assert env_manager.policies[brain_name] == mock_policy
assert agent_manager_mock.policy == mock_policy
def simple_env_factory(worker_id, config):
env = SimpleEnvironment(["1D"], use_discrete=True)
return env
def simple_env_factory(worker_id, config):
env = SimpleEnvironment(["1D"], use_discrete=True)
return env
env_manager = SubprocessEnvManager(
simple_env_factory, EngineConfig.default_config(), num_envs
)

val > 0.7 for val in StatsReporter.writers[0].get_last_rewards().values()
)
env_manager.close()
@pytest.mark.parametrize("num_envs", [1, 4])
def test_subprocess_env_raises_errors(num_envs):
def failing_env_factory(worker_id, config):
import time
# Sleep momentarily to allow time for the EnvManager to be waiting for the
# subprocess response. We won't be able to capture failures from the subprocess
# that cause it to close the pipe before we can send the first message.
time.sleep(0.1)
raise UnityEnvironmentException()
env_manager = SubprocessEnvManager(
failing_env_factory, EngineConfig.default_config(), num_envs
)
with pytest.raises(UnityEnvironmentException):
env_manager.reset()
env_manager.close()

14
ml-agents/mlagents/trainers/trainer_controller.py


# Final save Tensorflow model
if global_step != 0 and self.train_model:
self._save_model()
except (KeyboardInterrupt, UnityCommunicationException):
except (
KeyboardInterrupt,
UnityCommunicationException,
UnityEnvironmentException,
) as ex:
pass
if isinstance(ex, KeyboardInterrupt):
pass
else:
# If the environment failed, we want to make sure to raise
# the exception so we exit the process with an return code of 1.
raise ex
if self.train_model:
self._export_graph()

37
ml-agents/tests/yamato/training_int_tests.py


import argparse
import time
from .yamato_utils import (
get_base_path,

checkout_csharp_version,
undo_git_checkout,
def main():
nn_file_expected = "./models/ppo/3DBall.nn"
def run_training(python_version, csharp_version):
latest = "latest"
run_id = int(time.time() * 1000.0)
print(
f"Running training with python={python_version or latest} and c#={csharp_version or latest}"
)
nn_file_expected = f"./models/{run_id}/3DBall.nn"
if os.path.exists(nn_file_expected):
# Should never happen - make sure nothing leftover from an old test.
print("Artifacts from previous build found!")

print(f"Running in base path {base_path}")
if csharp_version is not None:
checkout_csharp_version(csharp_version)
init_venv()
venv_path = init_venv(python_version)
# Copy the default training config but override the max_steps parameter,
# and reduce the batch_size and buffer_size enough to ensure an update step happens.

# TODO pass scene name and exe destination to build
# TODO make sure we fail if the exe isn't found - see MLA-559
mla_learn_cmd = "mlagents-learn override.yaml --train --env=Project/testPlayer --no-graphics --env-args -logFile -" # noqa
res = subprocess.run(f"source venv/bin/activate; {mla_learn_cmd}", shell=True)
mla_learn_cmd = f"mlagents-learn override.yaml --train --env=Project/testPlayer --run-id={run_id} --no-graphics --env-args -logFile -" # noqa
res = subprocess.run(
f"source {venv_path}/bin/activate; {mla_learn_cmd}", shell=True
)
if res.returncode != 0 or not os.path.exists(nn_file_expected):
print("mlagents-learn run FAILED!")

sys.exit(0)
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--python", default=None)
parser.add_argument("--csharp", default=None)
args = parser.parse_args()
try:
run_training(args.python, args.csharp)
finally:
# Cleanup - this gets executed even if we hit sys.exit()
undo_git_checkout()
if __name__ == "__main__":

51
ml-agents/tests/yamato/yamato_utils.py


return res.returncode
def init_venv():
def init_venv(mlagents_python_version: str = None) -> str:
"""
Set up the virtual environment, and return the venv path.
:param mlagents_python_version: The version of mlagents python packcage to install.
If None, will do a local install, otherwise will install from pypi
:return:
"""
# Use a different venv path for different versions
venv_path = "venv"
if mlagents_python_version:
venv_path += "_" + mlagents_python_version
subprocess.check_call("python -m venv venv", shell=True)
subprocess.check_call(f"python -m venv {venv_path}", shell=True)
"-e ./ml-agents-envs",
"-e ./ml-agents",
if mlagents_python_version:
# install from pypi
pip_commands.append(f"mlagents=={mlagents_python_version}")
else:
# Local install
pip_commands += ["-e ./ml-agents-envs", "-e ./ml-agents"]
f"source venv/bin/activate; python -m pip install -q {cmd}", shell=True
f"source {venv_path}/bin/activate; python -m pip install -q {cmd}",
shell=True,
)
return venv_path
def checkout_csharp_version(csharp_version):
"""
Checks out the specific git revision (usually a tag) for the C# package and Project.
If csharp_version is None, no changes are made.
:param csharp_version:
:return:
"""
if csharp_version is None:
return
csharp_dirs = ["com.unity.ml-agents", "Project"]
for csharp_dir in csharp_dirs:
subprocess.check_call(
f"git checkout {csharp_version} -- {csharp_dir}", shell=True
def undo_git_checkout():
"""
Clean up the git working directory.
"""
subprocess.check_call("git reset HEAD .", shell=True)
subprocess.check_call("git checkout -- .", shell=True)
def override_config_file(src_path, dest_path, **kwargs):

正在加载...
取消
保存