浏览代码

Formatting

/develop/rm-rf-new-models
vincentpierre 4 年前
当前提交
2dd34aa5
共有 3 个文件被更改,包括 7 次插入12 次删除
  1. 3
      ml-agents/mlagents/trainers/ppo/trainer.py
  2. 11
      ml-agents/mlagents/trainers/sac/trainer.py
  3. 5
      ml-agents/tests/yamato/yamato_utils.py

3
ml-agents/mlagents/trainers/ppo/trainer.py


)
for name, reward_signal in self.optimizer.reward_signals.items():
evaluate_result = (
reward_signal.evaluate(agent_buffer_trajectory)
* reward_signal.strength
reward_signal.evaluate(agent_buffer_trajectory) * reward_signal.strength
)
agent_buffer_trajectory[f"{name}_rewards"].extend(evaluate_result)
# Report the reward signals

11
ml-agents/mlagents/trainers/sac/trainer.py


)
for name, reward_signal in self.optimizer.reward_signals.items():
evaluate_result = (
reward_signal.evaluate(agent_buffer_trajectory)
* reward_signal.strength
reward_signal.evaluate(agent_buffer_trajectory) * reward_signal.strength
)
# Report the reward signals

agent_buffer_trajectory, trajectory.next_obs, trajectory.done_reached
)
for name, v in value_estimates.items():
self._stats_reporter.add_stat(
f"Policy/{self.optimizer.reward_signals[name].name.capitalize()} Value",
np.mean(v),
)
self._stats_reporter.add_stat(
f"Policy/{self.optimizer.reward_signals[name].name.capitalize()} Value",
np.mean(v),
)
# Bootstrap using the last step rather than the bootstrap step if max step is reached.
# Set last element to duplicate obs and remove dones.

5
ml-agents/tests/yamato/yamato_utils.py


# Set up the venv and install mlagents
subprocess.check_call(f"python -m venv {venv_path}", shell=True)
pip_commands = [
"--upgrade pip",
"--upgrade setuptools",
]
pip_commands = ["--upgrade pip", "--upgrade setuptools"]
if mlagents_python_version:
# install from pypi
pip_commands += [

正在加载...
取消
保存