Commit ed11e79b authored by Jayant Khatkar's avatar Jayant Khatkar

comms working

parent f1b7a1f1
......@@ -27,7 +27,10 @@ class ActionDistribution:
self.X = X
# Normalise
self.q = (np.array(q)/sum(q)).tolist()
if sum(q)==0:
self.q = [1/len(q)] * len(q)
else:
self.q = (np.array(q).astype(float)/sum(q)).tolist()
def best_action(self):
......@@ -41,7 +44,7 @@ class ActionDistribution:
"""
Weighted random out of possible action sequences
"""
return np.random.choice(self.X, p=self.q)
return self.X[np.random.choice(len(self.q), p=self.q)]
class Tree:
......@@ -57,6 +60,7 @@ class Tree:
calculated once simulation is complete, in which
case it should return "None" while simulation is
incomplete
- MUST RETURN POSITIVE VALUE
- available_actions
- This is a function which has inputs (data, state) and
returns the possible actions which can be taken
......@@ -85,7 +89,7 @@ class Tree:
self.comm_n = comm_n # number of action dists to communicate
# Set Action sequence as nothing for now
self.my_act_dist = ActionDistribution([[]],[1])
self.my_act_dist = ActionDistribution([0],[1])
# Graph add root node of tree
self.graph.add_node(1,
......@@ -95,6 +99,8 @@ class Tree:
time=0
)
self._expansion(1)
def _parent(self, node_id):
"""
......@@ -108,8 +114,7 @@ class Tree:
"""
Select Child node which maximises UCT
"""
print children
# N for parent
n_p = self.graph.node[self._parent(children[0])]["N"]
......@@ -118,8 +123,6 @@ class Tree:
for node in map(self.graph.nodes.__getitem__, children)]
# Return Child with highest UCT
print uct
print children[np.argmax(uct)]
return children[np.argmax(uct)]
......@@ -161,18 +164,14 @@ class Tree:
return (node_path, other_paths)
def grow(self, nsims=10, ntime=None, gamma = 0.9):
def _expansion(self, start_node):
"""
Grow Tree by one node
Does the Expansion step for tree growing.
Separated into it's own function because also done in
Init step.
"""
### SELECTION
start_node = 1
while len(self._childNodes(start_node))>0:
start_node = self._select(self._childNodes(start_node))
### EXPANSION
state = self._get_state(start_node)
options = self.available_actions(self.data, state)
......@@ -190,7 +189,24 @@ class Tree:
if self.time_func != None:
# TODO Do the time calculations
pass
return True
def grow(self, nsims=10, ntime=None, gamma = 0.9):
"""
Grow Tree by one node
"""
### SELECTION
start_node = 1
while len(self._childNodes(start_node))>0:
start_node = self._select(self._childNodes(start_node))
### EXPANSION
self._expansion(start_node)
### SIMULATION
avg_reward = 0
best_reward = float("-Inf")
......@@ -198,6 +214,8 @@ class Tree:
# TODO
# SIMULATION NOT REQUIRED FOR BIGPRINT, HEURISTIC USED INSTEAD
pass
state = self._get_state(start_node)
avg_reward = self.reward(self.data, state)
self.graph.node[start_node]["mu"] = avg_reward
self.graph.node[start_node]["N"] = 1
......
......@@ -8,9 +8,19 @@ def avail_actions(data, state):
def reward(dat, state):
other_robots = [sum(state[1][a]) for a in state[1]]
if sum(other_robots) + sum(state[0]) >25:
return 0
return sum(state[0]) + sum(other_robots)
comm_n = 5
tree1 = Tree(data, reward, avail_actions, comm_n)
tree2 = Tree(data, reward, avail_actions, comm_n)
for i in range(150):
tree1.grow()
tree2.grow()
tree1.receive_comms(tree2.send_comms(), 2)
tree2.receive_comms(tree2.send_comms(), 1)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment