Commit f1b7a1f1 authored by Jayant Khatkar's avatar Jayant Khatkar

debug grow

parent b3937a57
......@@ -24,7 +24,6 @@ class ActionDistribution:
def __init__(self, X, q):
# Action sequence as provided
assert(len(X)==n)
self.X = X
# Normalise
......@@ -109,15 +108,18 @@ class Tree:
"""
Select Child node which maximises UCT
"""
print children
# N for parent
n_p = self.graph.nodes[self._parent(children[0])]["N"]
n_p = self.graph.node[self._parent(children[0])]["N"]
# UCT values for children
uct = [_UCT(node["mu"], self.c_p, n_p, node["N"])
for node in map(self.graph.nodes.__getitem__, children)]
# Return Child with highest UCT
print uct
print children[np.argmax(uct)]
return children[np.argmax(uct)]
......@@ -139,8 +141,8 @@ class Tree:
temp=nx.get_node_attributes(self.graph, "mu")
top_n_nodes = sorted(temp, key=temp.get, reverse=True)[:self.comm_n]
X = [self.graph.nodes[n]["action_seq"] for n in top_n_nodes]
q = [self.graph.nodes[n]["mu"]**2 for n in top_n_nodes]
X = [self.graph.node[n]["action_seq"] for n in top_n_nodes]
q = [self.graph.node[n]["mu"]**2 for n in top_n_nodes]
self.my_act_dist = ActionDistribution(X,q)
return True
......@@ -154,13 +156,13 @@ class Tree:
and second element is a dictionary of the other paths
"""
node_path = self.graph.nodes[node_id]["action_seq"]
node_path = self.graph.node[node_id]["action_seq"]
other_paths = {k:self.comms[k].random_action() for k in self.comms}
return (node_path, other_paths)
def grow(self, nsims=10, ntime=None):
def grow(self, nsims=10, ntime=None, gamma = 0.9):
"""
Grow Tree by one node
"""
......@@ -177,10 +179,10 @@ class Tree:
# create empty nodes underneath the node being expanded
for o in options:
self.graph.add_node(len(self.graph)
self.graph.add_node(len(self.graph)+1,
mu = 0,
N = 0,
action_seq=self.graph.nodes[start_node]["action_seq"] + [o],
action_seq=self.graph.node[start_node]["action_seq"] + [o],
time=0
)
......@@ -197,21 +199,23 @@ class Tree:
# SIMULATION NOT REQUIRED FOR BIGPRINT, HEURISTIC USED INSTEAD
pass
avg_reward = self.reward(self.data, state)
self.graph.nodes["mu"] = avg_reward
self.graph.nodes["N"] = 1
self.graph.node[start_node]["mu"] = avg_reward
self.graph.node[start_node]["N"] = 1
### BACKPROPOGATION
while start_node!=1: #while not root node
start_node = self._parent(start_node)
self.graph.nodes[start_node]["mu"] = \
(gamma * self.graph.nodes[start_node]["mu"] + avg_reward)\
/(gamma * self.graph.nodes[start_node]["N"] + 1)
self.graph.nodes[start_node]["N"] = \
gamma * self.graph.nodes[start_node]["N"] + 1
self.graph.node[start_node]["mu"] = \
(gamma * self.graph.node[start_node]["mu"] + avg_reward)\
/(gamma * self.graph.node[start_node]["N"] + 1)
self.graph.node[start_node]["N"] = \
gamma * self.graph.node[start_node]["N"] + 1
self._update_distribution()
return avg_reward
......
from DecMCTS import Tree
data = {}
def avail_actions(data, state):
return [1,2,3,4,5]
def reward(dat, state):
other_robots = [sum(state[1][a]) for a in state[1]]
return sum(state[0]) + sum(other_robots)
comm_n = 5
tree1 = Tree(data, reward, avail_actions, comm_n)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment