|
|
|
|
|
|
feed_dict[self.model.epsilon] = epsilon |
|
|
|
feed_dict = self.fill_eval_dict(feed_dict, brain_info) |
|
|
|
run_out = self._execute_model(feed_dict, self.inference_dict) |
|
|
|
if self.use_continuous_act: |
|
|
|
run_out["random_normal_epsilon"] = epsilon |
|
|
|
return run_out |
|
|
|
|
|
|
|
@timed |
|
|
|
|
|
|
|
|
|
|
if self.use_continuous_act: |
|
|
|
feed_dict[model.output_pre] = mini_batch["actions_pre"] |
|
|
|
feed_dict[model.epsilon] = mini_batch["random_normal_epsilon"] |
|
|
|
else: |
|
|
|
feed_dict[model.action_holder] = mini_batch["actions"] |
|
|
|
if self.use_recurrent: |
|
|
|