|
|
|
|
|
|
'entropy': [], 'value_loss': [], 'policy_loss': [], 'learning_rate': []} |
|
|
|
self.stats = stats |
|
|
|
self.is_training = training |
|
|
|
self.training_buffer = vectorize_history(empty_local_history({})) |
|
|
|
|
|
|
|
self.history_dict = empty_all_history(info) |
|
|
|
self.reset_buffers(info, total=True) |
|
|
|
|
|
|
|
self.is_continuous = is_continuous |
|
|
|
self.use_observations = use_observations |
|
|
|
|
|
|
history['cumulative_reward'] = 0 |
|
|
|
history['episode_steps'] = 0 |
|
|
|
|
|
|
|
def reset_buffers(self, brain_info=None, total=False): |
|
|
|
self.training_buffer = vectorize_history(empty_local_history({})) |
|
|
|
if not total: |
|
|
|
for key in self.history_dict: |
|
|
|
self.history_dict[key] = empty_local_history(self.history_dict[key]) |
|
|
|
else: |
|
|
|
self.history_dict = empty_all_history(agent_info=brain_info) |
|
|
|
|
|
|
|
def update_model(self, batch_size, num_epoch): |
|
|
|
""" |
|
|
|
Uses training_buffer to update model. |
|
|
|
|
|
|
total_p += p_loss |
|
|
|
self.stats['value_loss'].append(total_v) |
|
|
|
self.stats['policy_loss'].append(total_p) |
|
|
|
self.training_buffer = vectorize_history(empty_local_history({})) |
|
|
|
for key in self.history_dict: |
|
|
|
self.history_dict[key] = empty_local_history(self.history_dict[key]) |
|
|
|
self.reset_buffers() |
|
|
|
|
|
|
|
def write_summary(self, summary_writer, steps, lesson_number): |
|
|
|
""" |
|
|
|
|
|
|
""" |
|
|
|
print("Mean Reward: {0}".format(np.mean(self.stats['cumulative_reward']))) |
|
|
|
if len(self.stats['cumulative_reward']) > 0: |
|
|
|
mean_reward = np.mean(self.stats['cumulative_reward']) |
|
|
|
print("Mean Reward: {0}".format(mean_reward)) |
|
|
|
summary = tf.Summary() |
|
|
|
for key in self.stats: |
|
|
|
if len(self.stats[key]) > 0: |
|
|
|