diff --git a/pydecmcts/DecMCTS.py b/pydecmcts/DecMCTS.py index 7d14965fb3c6d92d23c621daf1ee118bf0040dcd..fd52344acba116228c62b71502b0c7e3cc6d84a2 100644 --- a/pydecmcts/DecMCTS.py +++ b/pydecmcts/DecMCTS.py @@ -5,6 +5,7 @@ from copy import copy from math import log import numpy as np + def _UCT(mu_j, c_p, n_p, n_j): if n_j ==0: return float("Inf") @@ -117,8 +118,7 @@ class Tree: ) # Set Action sequence as nothing for now - self.my_act_dist = ActionDistribution([self.graph.node[1]["state"]],[1]) - + self.my_act_dist = ActionDistribution([self.graph.nodes[1]["state"]],[1]) self._expansion(1) @@ -127,7 +127,6 @@ class Tree: """ wrapper for code readability """ - return list(self.graph.predecessors(node_id))[0] @@ -137,7 +136,7 @@ class Tree: """ # N for parent - n_p = self.graph.node[self._parent(children[0])]["N"] + n_p = self.graph.nodes[self._parent(children[0])]["N"] # UCT values for children uct = [_UCT(node["mu"], self.c_p, n_p, node["N"]) @@ -166,8 +165,8 @@ class Tree: temp.pop(1, None) top_n_nodes = sorted(temp, key=temp.get, reverse=True)[:self.comm_n] - X = [self.graph.node[n]["best_rollout"] for n in top_n_nodes if self.graph.node[n]["N"]>0] - q = [self.graph.node[n]["mu"]**2 for n in top_n_nodes if self.graph.node[n]["N"]>0] + X = [self.graph.nodes[n]["best_rollout"] for n in top_n_nodes if self.graph.nodes[n]["N"]>0] + q = [self.graph.nodes[n]["mu"]**2 for n in top_n_nodes if self.graph.nodes[n]["N"]>0] self.my_act_dist = ActionDistribution(X,q) return True @@ -182,14 +181,14 @@ class Tree: """ system_state = {k:self.comms[k].random_action() for k in self.comms} - system_state[self.id] = self.graph.node[node_id]["state"] + system_state[self.id] = self.graph.nodes[node_id]["state"] return system_state def _null_state(self, state): temp = copy(state) - temp[self.id] = self.graph.node[1]["state"] # Null state is if robot still at root node + temp[self.id] = self.graph.nodes[1]["state"] # Null state is if robot still at root node return temp @@ -202,7 +201,7 @@ class Tree: options = self.available_actions( self.data, - self.graph.node[start_node]["state"], + self.graph.nodes[start_node]["state"], self.id ) @@ -216,7 +215,7 @@ class Tree: mu = 0, best_reward = 0, N = 0, - state=self.state_store(self.data, self.graph.node[start_node]["state"], o, self.id) + state=self.state_store(self.data, self.graph.nodes[start_node]["state"], o, self.id) ) self.graph.add_edge(start_node, len(self.graph)) @@ -249,7 +248,7 @@ class Tree: best_reward = float("-Inf") best_rollout = None for i in range(nsims): - temp_state = self.graph.node[start_node]["state"] + temp_state = self.graph.nodes[start_node]["state"] state[self.id] = temp_state d = 0 # depth @@ -259,7 +258,7 @@ class Tree: # Get the available actions options = self.sim_available_actions( self.data, - state, + state[self.id], self.id ) @@ -285,27 +284,27 @@ class Tree: avg_reward = avg_reward / nsims - self.graph.node[start_node]["mu"] = avg_reward - self.graph.node[start_node]["best_reward"] = best_reward - self.graph.node[start_node]["N"] = 1 - self.graph.node[start_node]["best_rollout"] = copy(best_rollout) + self.graph.nodes[start_node]["mu"] = avg_reward + self.graph.nodes[start_node]["best_reward"] = best_reward + self.graph.nodes[start_node]["N"] = 1 + self.graph.nodes[start_node]["best_rollout"] = copy(best_rollout) ### BACKPROPOGATION while start_node!=1: #while not root node start_node = self._parent(start_node) - self.graph.node[start_node]["mu"] = \ - (gamma * self.graph.node[start_node]["mu"] * \ - self.graph.node[start_node]["N"] + avg_reward) \ - /(self.graph.node[start_node]["N"] + 1) + self.graph.nodes[start_node]["mu"] = \ + (gamma * self.graph.nodes[start_node]["mu"] * \ + self.graph.nodes[start_node]["N"] + avg_reward) \ + /(self.graph.nodes[start_node]["N"] + 1) - self.graph.node[start_node]["N"] = \ - gamma * self.graph.node[start_node]["N"] + 1 + self.graph.nodes[start_node]["N"] = \ + gamma * self.graph.nodes[start_node]["N"] + 1 - if best_reward > self.graph.node[start_node]["best_reward"]: - self.graph.node[start_node]["best_reward"] = best_reward - self.graph.node[start_node]["best_rollout"] = copy(best_rollout) + if best_reward > self.graph.nodes[start_node]["best_reward"]: + self.graph.nodes[start_node]["best_reward"] = best_reward + self.graph.nodes[start_node]["best_rollout"] = copy(best_rollout) self._update_distribution() diff --git a/setup.py b/setup.py index bc1600283d411c7986763c38bca1061876b2c9c5..b66d4bd82291546532b6acefed15e38aee861345 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup, find_packages setup( name='pydecmcts', - version='0.4', + version='0.5', packages=find_packages(include=['pydecmcts']), py_modules=['pydecmcts'] )