Commit a6264978 authored by Jayant Khatkar's avatar Jayant Khatkar

storing rollout, depth added to simulation

parent f0d8d4e0
......@@ -213,7 +213,7 @@ class Tree:
return True
def grow(self, nsims=10, gamma = 0.9):
def grow(self, nsims=10, gamma = 0.9, depth=10):
"""
Grow Tree by one node
"""
......@@ -239,10 +239,9 @@ class Tree:
for i in range(nsims):
temp_state = self.graph.node[start_node]["state"]
simulation_complete = False
while not simulation_complete:
# TODO HOW TO CHECK IF SIMULATION IS COMPLETE
simulation_complete = True
d = 0 # depth
while d<depth: # also breaks at no available options
d += 1
# Get the available actions
options = self.available_actions(
......@@ -251,6 +250,10 @@ class Tree:
self.id
)
# If no actions possible, simulation complete
if len(options)==0:
break
# "randomly" choose 1 - function provided by user
state[self.id] = temp_state
sim_action = self.sim_selection_func(self.data, options, state)
......@@ -266,14 +269,11 @@ class Tree:
# if best reward so far, store the rollout in the new node
if rew > best_reward:
best_reward = rew
# TODO: STORE THE BEST ROLLOUT IN THE NODE
best_rollout = temp_state
#state[self.id] = self.graph.node[start_node]["state"]
#avg_reward = self.reward(self.data, state) - self.reward(self.data, self._null_state(state))
self.graph.node[start_node]["mu"] = avg_reward
self.graph.node[start_node]["N"] = 1
self.graph.node[start_node]["best_rollout"] = best_rollout
### BACKPROPOGATION
while start_node!=1: #while not root node
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment