浏览代码

current_best_ratio -> latest_model_ratio

/develop/cubewars
Andrew Cohen 5 年前
当前提交
345fa382
共有 3 个文件被更改,包括 9 次插入9 次删除
  1. 4
      config/trainer_config.yaml
  2. 6
      ml-agents/mlagents/trainers/ghost/trainer.py
  3. 8
      ml-agents/mlagents/trainers/tests/test_simple_rl.py

4
config/trainer_config.yaml


time_horizon: 1000
self_play:
window: 10
play_against_current_best_ratio: 0.5
play_against_latest_model_ratio: 0.5
save_steps: 50000
swap_steps: 50000
team_change: 100000

num_layers: 2
self_play:
window: 10
play_against_current_best_ratio: 0.5
play_against_latest_model_ratio: 0.5
save_steps: 50000
swap_steps: 50000
team_change: 100000

6
ml-agents/mlagents/trainers/ghost/trainer.py


self_play_parameters = trainer_parameters["self_play"]
self.window = self_play_parameters.get("window", 10)
self.play_against_current_best_ratio = self_play_parameters.get(
"play_against_current_best_ratio", 0.5
self.play_against_latest_model_ratio = self_play_parameters.get(
"play_against_latest_model_ratio", 0.5
)
self.steps_between_save = self_play_parameters.get("save_steps", 20000)
self.steps_between_swap = self_play_parameters.get("swap_steps", 20000)

for team_id in self._team_to_name_to_policy_queue:
if team_id == self._learning_team:
continue
elif np.random.uniform() < (1 - self.play_against_current_best_ratio):
elif np.random.uniform() < (1 - self.play_against_latest_model_ratio):
x = np.random.randint(len(self.policy_snapshots))
snapshot = self.policy_snapshots[x]
else:

8
ml-agents/mlagents/trainers/tests/test_simple_rl.py


override_vals = {
"max_steps": 2500,
"self_play": {
"play_against_current_best_ratio": 1.0,
"play_against_latest_model_ratio": 1.0,
"save_steps": 2000,
"swap_steps": 2000,
},

override_vals = {
"max_steps": 2500,
"self_play": {
"play_against_current_best_ratio": 1.0,
"play_against_latest_model_ratio": 1.0,
"save_steps": 2000,
"swap_steps": 4000,
},

override_vals = {
"max_steps": 2000,
"self_play": {
"play_against_current_best_ratio": 1.0,
"play_against_latest_model_ratio": 1.0,
"save_steps": 5000,
"swap_steps": 5000,
"team_change": 2000,

override_vals = {
"max_steps": 2000,
"self_play": {
"play_against_current_best_ratio": 0.0,
"play_against_latest_model_ratio": 0.0,
"save_steps": 5000,
"swap_steps": 5000,
"team_change": 2000,

正在加载...
取消
保存