diff --git a/DecMCTS.py b/DecMCTS.py index d51b2235a9974e0f48247c413d1f5a830b3da662..511e76d4262ba42525a1c00b97d61bb6e9826ba4 100644 --- a/DecMCTS.py +++ b/DecMCTS.py @@ -17,7 +17,9 @@ class ActionDistribution: Working with action sequences and their respective probability To initialise, Inputs: - - X: list of action sequences + - X: list of action sequences + - NOTE: X is is simply a state object returned by state_store. + You are expected to store action sequence in this object - q: probability of each action sequence (normalised in intialisation) """ @@ -57,14 +59,15 @@ class Tree: - reward - This is a function which has inputs (data, state) and returns the GLOBAL reward to be maximised - - It may be the case that the reward function can only - calculated once simulation is complete, in which - case it should return "None" while simulation is - incomplete - MUST RETURN POSITIVE VALUE - available_actions - This is a function which has inputs (data, state) and returns the possible actions which can be taken + - state_store + - This is a function which has inputs + (data, parent_state, action) and returns an object to + store in the node. + - Root Node has parent state None and action None. - c_p - exploration multiplier (number between 0 and 1) @@ -82,16 +85,16 @@ class Tree: data, reward_func, avail_actions_func, + state_store_func, comm_n, robot_id, - c_p=1, - time_func=None): + c_p=1): self.data = data self.graph = nx.DiGraph() self.reward = reward_func self.available_actions = avail_actions_func - self.time_func = time_func + self.state_store = state_store_func self.c_p = c_p self.id = robot_id self.comms = {} # Plan with no robots initially @@ -104,8 +107,7 @@ class Tree: self.graph.add_node(1, mu=0, N=0, - action_seq=[], - time=0 + state=self.state_store(self.data, None, None) ) self._expansion(1) @@ -153,13 +155,13 @@ class Tree: temp=nx.get_node_attributes(self.graph, "mu") top_n_nodes = sorted(temp, key=temp.get, reverse=True)[:self.comm_n] - X = [self.graph.node[n]["action_seq"] for n in top_n_nodes] + X = [self.graph.node[n]["state"] for n in top_n_nodes] q = [self.graph.node[n]["mu"]**2 for n in top_n_nodes] self.my_act_dist = ActionDistribution(X,q) return True - def _get_state(self, node_id): + def _get_system_state(self, node_id): """ Randomly select 1 path taken by every other robot & path taken by this robot to get to this node @@ -168,16 +170,16 @@ class Tree: and second element is a dictionary of the other paths """ - node_path = self.graph.node[node_id]["action_seq"] - all_paths = {k:self.comms[k].random_action() for k in self.comms} - all_paths[self.id] = node_path + robot_state = self.graph.node[node_id]["state"] + system_state = {k:self.comms[k].random_action() for k in self.comms} + system_state[self.id] = robot_state - return all_paths + return system_state def _null_state(self, state): temp = deepcopy(state) - temp[self.id] = [] + temp[self.id] = self.graph.node[1]["state"] # Null state is if robot still at root node return temp @@ -188,7 +190,7 @@ class Tree: Init step. """ - state = self._get_state(start_node) + state = self._get_system_state(start_node) options = self.available_actions(self.data, state, self.id) # create empty nodes underneath the node being expanded @@ -197,20 +199,16 @@ class Tree: self.graph.add_node(len(self.graph)+1, mu = 0, N = 0, - action_seq=self.graph.node[start_node]["action_seq"] + [o], - time=0 + state=self.state_store(self.data, self.graph.node[start_node]["state"], o) ) self.graph.add_edge(start_node, len(self.graph)) - if self.time_func != None: - # TODO Do the time calculations - pass return True - def grow(self, nsims=10, ntime=None, gamma = 0.9): + def grow(self, nsims=10, gamma = 0.9): """ Grow Tree by one node """ @@ -231,7 +229,7 @@ class Tree: # SIMULATION NOT REQUIRED FOR BIGPRINT, HEURISTIC USED INSTEAD pass - state = self._get_state(start_node) + state = self._get_system_state(start_node) avg_reward = self.reward(self.data, state) - self.reward(self.data, self._null_state(state)) self.graph.node[start_node]["mu"] = avg_reward self.graph.node[start_node]["N"] = 1 diff --git a/test/test.py b/test/test.py index 80204aa2536d11406f33c0d9160cf7d6613c7799..e125f1f43a8795ff9346ed5b5f4dcf1d9ddcc4dd 100644 --- a/test/test.py +++ b/test/test.py @@ -1,21 +1,37 @@ from DecMCTS import Tree +from copy import deepcopy data = {} -def avail_actions(data, state, robot_id): +class State: + def __init__(self, act_seq, cum_sum): + self.action_seq = act_seq + self.cumulative_sum = cum_sum + +def state_storer(data, parent_state, action): + + if parent_state == None: + return State([],0) # This state is also used Null action when calculating local reward + + state = deepcopy(parent_state) + state.action_seq.append(action) + state.cumulative_sum = state.cumulative_sum + action + return state + +def avail_actions(data, states, robot_id): return [1,2,3,4,5] -def reward(dat, state): - each_robot_sum= [sum(state[a]) for a in state] +def reward(dat, states): + print states + each_robot_sum= [states[robot].cumulative_sum for robot in states] return sum(each_robot_sum) comm_n = 5 +tree1 = Tree(data, reward, avail_actions, state_storer, comm_n, 1) -tree1 = Tree(data, reward, avail_actions, comm_n, 1) - -tree2 = Tree(data, reward, avail_actions, comm_n, 2) +tree2 = Tree(data, reward, avail_actions, state_storer, comm_n, 2) for i in range(350): tree1.grow()