fix python3 nodes bug, and sim_state bug

2c509b67 · Jayant Khatkar · 03852849 · 2c509b67 · 2c509b67
Commit 2c509b67 authored 4 years ago by Jayant Khatkar
--- a/pydecmcts/DecMCTS.py
+++ b/pydecmcts/DecMCTS.py
@@ -5,6 +5,7 @@ from copy import copy
 from math import log
 import numpy as np

+
 def _UCT(mu_j, c_p, n_p, n_j):
    if n_j ==0:
        return float("Inf")
@@ -117,8 +118,7 @@ class Tree:
                )

        # Set Action sequence as nothing for now
-        self.my_act_dist = ActionDistribution([self.graph.node[1]["state"]],[1])
-
+        self.my_act_dist = ActionDistribution([self.graph.nodes[1]["state"]],[1])

        self._expansion(1)

@@ -127,7 +127,6 @@ class Tree:
        """
        wrapper for code readability
        """
-
        return list(self.graph.predecessors(node_id))[0]


@@ -137,7 +136,7 @@ class Tree:
        """

        # N for parent
-        n_p = self.graph.node[self._parent(children[0])]["N"]
+        n_p = self.graph.nodes[self._parent(children[0])]["N"]

        # UCT values for children
        uct = [_UCT(node["mu"], self.c_p, n_p, node["N"]) 
@@ -166,8 +165,8 @@ class Tree:
        temp.pop(1, None)

        top_n_nodes = sorted(temp, key=temp.get, reverse=True)[:self.comm_n]
-        X = [self.graph.node[n]["best_rollout"] for n in top_n_nodes if self.graph.node[n]["N"]>0]
-        q = [self.graph.node[n]["mu"]**2 for n in top_n_nodes if self.graph.node[n]["N"]>0]
+        X = [self.graph.nodes[n]["best_rollout"] for n in top_n_nodes if self.graph.nodes[n]["N"]>0]
+        q = [self.graph.nodes[n]["mu"]**2 for n in top_n_nodes if self.graph.nodes[n]["N"]>0]
        self.my_act_dist = ActionDistribution(X,q)
        return True

@@ -182,14 +181,14 @@ class Tree:
        """

        system_state = {k:self.comms[k].random_action() for k in self.comms}
-        system_state[self.id] = self.graph.node[node_id]["state"]
+        system_state[self.id] = self.graph.nodes[node_id]["state"]

        return system_state


    def _null_state(self, state):
        temp = copy(state)
-        temp[self.id] = self.graph.node[1]["state"] # Null state is if robot still at root node
+        temp[self.id] = self.graph.nodes[1]["state"] # Null state is if robot still at root node
        return temp

    
@@ -202,7 +201,7 @@ class Tree:

        options = self.available_actions(
            self.data,
-            self.graph.node[start_node]["state"],
+            self.graph.nodes[start_node]["state"],
            self.id
        )

@@ -216,7 +215,7 @@ class Tree:
                    mu = 0,
                    best_reward = 0,
                    N = 0,
-                    state=self.state_store(self.data, self.graph.node[start_node]["state"], o, self.id)
+                    state=self.state_store(self.data, self.graph.nodes[start_node]["state"], o, self.id)
                    )

            self.graph.add_edge(start_node, len(self.graph))
@@ -249,7 +248,7 @@ class Tree:
        best_reward = float("-Inf")
        best_rollout = None
        for i in range(nsims):
-            temp_state = self.graph.node[start_node]["state"]
+            temp_state = self.graph.nodes[start_node]["state"]
            state[self.id] = temp_state

            d = 0 # depth
@@ -259,7 +258,7 @@ class Tree:
                # Get the available actions
                options = self.sim_available_actions(
                            self.data,
-                            state,
+                            state[self.id],
                            self.id
                        )

@@ -285,27 +284,27 @@ class Tree:

        avg_reward = avg_reward / nsims

-        self.graph.node[start_node]["mu"] = avg_reward
-        self.graph.node[start_node]["best_reward"] = best_reward
-        self.graph.node[start_node]["N"] = 1
-        self.graph.node[start_node]["best_rollout"] = copy(best_rollout)
+        self.graph.nodes[start_node]["mu"] = avg_reward
+        self.graph.nodes[start_node]["best_reward"] = best_reward
+        self.graph.nodes[start_node]["N"] = 1
+        self.graph.nodes[start_node]["best_rollout"] = copy(best_rollout)

        ### BACKPROPOGATION
        while start_node!=1: #while not root node

            start_node = self._parent(start_node)

-            self.graph.node[start_node]["mu"] = \
-                    (gamma * self.graph.node[start_node]["mu"] * \
-                     self.graph.node[start_node]["N"] + avg_reward) \
-                    /(self.graph.node[start_node]["N"] + 1)
+            self.graph.nodes[start_node]["mu"] = \
+                    (gamma * self.graph.nodes[start_node]["mu"] * \
+                     self.graph.nodes[start_node]["N"] + avg_reward) \
+                    /(self.graph.nodes[start_node]["N"] + 1)

-            self.graph.node[start_node]["N"] = \
-                    gamma * self.graph.node[start_node]["N"] + 1
+            self.graph.nodes[start_node]["N"] = \
+                    gamma * self.graph.nodes[start_node]["N"] + 1

-            if best_reward > self.graph.node[start_node]["best_reward"]:
-                self.graph.node[start_node]["best_reward"] = best_reward
-                self.graph.node[start_node]["best_rollout"] = copy(best_rollout)
+            if best_reward > self.graph.nodes[start_node]["best_reward"]:
+                self.graph.nodes[start_node]["best_reward"] = best_reward
+                self.graph.nodes[start_node]["best_rollout"] = copy(best_rollout)

        self._update_distribution()


--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages

 setup(
    name='pydecmcts',
-    version='0.4',
+    version='0.5',
    packages=find_packages(include=['pydecmcts']),
    py_modules=['pydecmcts']
 )