gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[GNUnet-SVN] r31781 - gnunet/src/ats


From: gnunet
Subject: [GNUnet-SVN] r31781 - gnunet/src/ats
Date: Thu, 2 Jan 2014 23:23:19 +0100

Author: oehlmann
Date: 2014-01-02 23:23:18 +0100 (Thu, 02 Jan 2014)
New Revision: 31781

Modified:
   gnunet/src/ats/plugin_ats_ril.c
Log:
eligibility traces fix

Modified: gnunet/src/ats/plugin_ats_ril.c
===================================================================
--- gnunet/src/ats/plugin_ats_ril.c     2014-01-02 13:19:28 UTC (rev 31780)
+++ gnunet/src/ats/plugin_ats_ril.c     2014-01-02 22:23:18 UTC (rev 31781)
@@ -72,7 +72,7 @@
   RIL_ACTION_BW_OUT_HLV = -5,
   RIL_ACTION_BW_OUT_INC = -6,
   RIL_ACTION_BW_OUT_DEC = -7,
-  RIL_ACTION_TYPE_NUM = 1
+  RIL_ACTION_TYPE_NUM = 2
 };
 
 enum RIL_Algorithm
@@ -89,7 +89,7 @@
 
 enum RIL_E_Modification
 {
-  RIL_E_SET,
+  RIL_E_UPDATE,
   RIL_E_ZERO,
   RIL_E_ACCUMULATE,
   RIL_E_REPLACE
@@ -240,9 +240,9 @@
   int a_old;
 
   /**
-   * Eligibility trace vector
+   * Eligibility traces
    */
-  double * e;
+  double ** E;
 
   /**
    * Address in use
@@ -580,45 +580,49 @@
 //        delta,
 //        i,
 //        agent->e[i]);
-    theta[i] += agent->envi->parameters.alpha * delta * agent->s_old[i];// * 
agent->e[i];
+    theta[i] += agent->envi->parameters.alpha * delta * agent->s_old[i];// * 
agent->E[a_prime][i];
   }
 }
 
 
 /**
  * Changes the eligibility trace vector e in various manners:
- * #RIL_E_ACCUMULATE - adds @a f to each component as in accumulating 
eligibility traces
- * #RIL_E_REPLACE - resets each component to @a f  as in replacing traces
+ * #RIL_E_ACCUMULATE - adds @a feature to each component as in accumulating 
eligibility traces
+ * #RIL_E_REPLACE - resets each component to @a feature  as in replacing traces
  * #RIL_E_SET - multiplies e with discount factor and lambda as in the update 
rule
  * #RIL_E_ZERO - sets e to 0 as in Watkin's Q-learning algorithm when 
exploring and when initializing
  *
  * @param agent the agent handle
  * @param mod the kind of modification
- * @param f how much to change
+ * @param feature the feature vector
  */
 static void
 agent_modify_eligibility (struct RIL_Peer_Agent *agent,
                           enum RIL_E_Modification mod,
-                          double *f)
+                          double *feature,
+                          int action)
 {
   int i;
-  double *e = agent->e;
+  int k;
 
   for (i = 0; i < agent->m; i++)
   {
     switch (mod)
     {
     case RIL_E_ACCUMULATE:
-      e[i] += f[i];
+      agent->E[action][i] += feature[i];
       break;
     case RIL_E_REPLACE:
-      e[i] = f[i];
+      agent->E[action][i] = agent->E[action][i]+feature[i] > 1 ? 1 : 
agent->E[action][i]+feature[i]; //TODO? Maybe remove as only accumulating 
traces really apply
       break;
-    case RIL_E_SET:
-      e[i] *= agent->envi->global_discount_variable * 
agent->envi->parameters.lambda;
+    case RIL_E_UPDATE:
+      agent->E[action][i] *= agent->envi->global_discount_variable * 
agent->envi->parameters.lambda;
       break;
     case RIL_E_ZERO:
-      e[i] = 0;
+      for (k = 0; k < agent->n; k++)
+      {
+        agent->E[k][i] = 0;
+      }
       break;
     }
   }
@@ -769,7 +773,7 @@
   int i;
   int k;
 
-  state = GNUNET_malloc (sizeof(agent->m));
+  state = GNUNET_malloc (sizeof(double) * agent->m);
 
   y[0] = (double) agent->bw_out;
   y[1] = (double) agent->bw_in;
@@ -964,6 +968,7 @@
 
   unsigned long long objective;
 
+  LOG(GNUNET_ERROR_TYPE_INFO, "address: %x\n", agent->address_inuse);
   net = agent->address_inuse->solver_information;
   if (net->bw_in_assigned > net->bw_in_available)
   {
@@ -1197,21 +1202,25 @@
 static int
 agent_select_egreedy (struct RIL_Peer_Agent *agent, double *state)
 {
+  int action;
+
   if (agent_decide_exploration(agent))
   {
+    action = agent_get_action_explore(agent, state);
     if (RIL_ALGO_Q == agent->envi->parameters.algorithm)
     {
-      agent_modify_eligibility(agent, RIL_E_ZERO, NULL);
+      agent_modify_eligibility(agent, RIL_E_ZERO, NULL, action);
     }
-    return agent_get_action_explore(agent, state);
+    return action;
   }
   else
   {
+    action = agent_get_action_best(agent, state);
     if (RIL_ALGO_Q == agent->envi->parameters.algorithm)
     {
-      agent_modify_eligibility(agent, RIL_E_SET, NULL);
+      agent_modify_eligibility(agent, RIL_E_UPDATE, NULL, action);
     }
-    return agent_get_action_best(agent, state);
+    return action;
   }
 }
 
@@ -1234,11 +1243,6 @@
   double sum = 0;
   double r;
 
-  if (RIL_ALGO_Q == agent->envi->parameters.algorithm)
-  {
-    agent_modify_eligibility(agent, RIL_E_SET, NULL);
-  }
-
   for (i=0; i<agent->n; i++)
   {
     eqt[i] = exp(agent_estimate_q(agent,state,i) / 
agent->envi->parameters.temperature);
@@ -1255,6 +1259,10 @@
   {
     if (sum + p[i] > r)
     {
+      if (RIL_ALGO_Q == agent->envi->parameters.algorithm)
+      {
+        agent_modify_eligibility(agent, RIL_E_UPDATE, NULL, i);
+      }
       return i;
     }
     sum += p[i];
@@ -1307,7 +1315,7 @@
       //updates weights with selected action (on-policy), if not first step
       agent_update_weights (agent, reward, s_next, a_next);
     }
-    agent_modify_eligibility (agent, RIL_E_SET, s_next);
+    agent_modify_eligibility (agent, RIL_E_UPDATE, s_next, a_next);
     break;
 
   case RIL_ALGO_Q:
@@ -1323,7 +1331,7 @@
 
   GNUNET_assert(RIL_ACTION_INVALID != a_next);
 
-  agent_modify_eligibility (agent, RIL_E_ACCUMULATE, s_next);
+  agent_modify_eligibility (agent, RIL_E_ACCUMULATE, s_next, a_next);
 
 //  GNUNET_log (GNUNET_ERROR_TYPE_INFO, "step()  Step# %llu  R: %f  IN %llu  
OUT %llu  A: %d\n",
 //        agent->step_count,
@@ -1674,15 +1682,16 @@
   agent->n = RIL_ACTION_TYPE_NUM;
   agent->m = 0;
   agent->W = (double **) GNUNET_malloc (sizeof (double *) * agent->n);
+  agent->E = (double **) GNUNET_malloc (sizeof (double *) * agent->n);
   for (i = 0; i < agent->n; i++)
   {
     agent->W[i] = (double *) GNUNET_malloc (sizeof (double) * agent->m);
+    agent->E[i] = (double *) GNUNET_malloc (sizeof (double) * agent->m);
   }
   agent_w_start(agent);
   agent->a_old = RIL_ACTION_INVALID;
   agent->s_old = GNUNET_malloc (sizeof (double) * agent->m);
-  agent->e = (double *) GNUNET_malloc (sizeof (double) * agent->m);
-  agent_modify_eligibility (agent, RIL_E_ZERO, NULL);
+  agent->address_inuse = NULL;
 
   return agent;
 }
@@ -1700,11 +1709,12 @@
 
   for (i = 0; i < agent->n; i++)
   {
-    GNUNET_free(agent->W[i]);
+    GNUNET_free_non_null(agent->W[i]);
+    GNUNET_free_non_null(agent->E[i]);
   }
-  GNUNET_free(agent->W);
-  GNUNET_free(agent->e);
-  GNUNET_free(agent->s_old);
+  GNUNET_free_non_null(agent->W);
+  GNUNET_free_non_null(agent->E);
+  GNUNET_free_non_null(agent->s_old);
   GNUNET_free(agent);
 }
 
@@ -1780,7 +1790,7 @@
   unsigned int bytes_hole;
   unsigned int bytes_after;
 
-  GNUNET_assert(old_length > hole_length);
+  GNUNET_assert(old_length >= hole_length);
   GNUNET_assert(old_length >= (hole_start + hole_length));
 
   size = element_size * (old_length - hole_length);
@@ -2119,17 +2129,23 @@
   n_old = agent->n;
 
   GNUNET_array_grow(agent->W, agent->n, n_new);
+  agent->n = n_old;
+  GNUNET_array_grow(agent->E, agent->n, n_new);
   for (i = 0; i < n_new; i++)
   {
     if (i < n_old)
     {
       agent->m = m_old;
       GNUNET_array_grow(agent->W[i], agent->m, m_new);
+      agent->m = m_old;
+      GNUNET_array_grow(agent->E[i], agent->m, m_new);
     }
     else
     {
       zero = 0;
       GNUNET_array_grow(agent->W[i], zero, m_new);
+      zero = 0;
+      GNUNET_array_grow(agent->E[i], zero, m_new);
     }
   }
 
@@ -2137,9 +2153,6 @@
   agent->m = m_old;
   GNUNET_array_grow(agent->s_old, agent->m, m_new);
 
-  agent->m = m_old;
-  GNUNET_array_grow(agent->e, agent->m, m_new);
-
   ril_try_unblock_agent(s, agent, GNUNET_NO);
 
   ril_step (s);
@@ -2204,18 +2217,25 @@
   m_new = agent->m - ((s->parameters.divisor+1) * (s->parameters.divisor+1));
   n_new = agent->n - 1;
 
-  LOG(GNUNET_ERROR_TYPE_DEBUG, "first\n");
-
   for (i = 0; i < agent->n; i++)
   {
+    LOG(GNUNET_ERROR_TYPE_DEBUG, "first\n");
     ril_cut_from_vector ((void **) &agent->W[i], sizeof(double),
         address_index * ((s->parameters.divisor+1) * 
(s->parameters.divisor+1)),
         ((s->parameters.divisor+1) * (s->parameters.divisor+1)), agent->m);
+    LOG(GNUNET_ERROR_TYPE_DEBUG, "sec\n");
+    ril_cut_from_vector ((void **) &agent->E[i], sizeof(double),
+        address_index * ((s->parameters.divisor+1) * 
(s->parameters.divisor+1)),
+        ((s->parameters.divisor+1) * (s->parameters.divisor+1)), agent->m);
   }
-  GNUNET_free(agent->W[RIL_ACTION_TYPE_NUM + address_index]);
-  LOG(GNUNET_ERROR_TYPE_DEBUG, "second\n");
+  GNUNET_free_non_null(agent->W[RIL_ACTION_TYPE_NUM + address_index]);
+  GNUNET_free_non_null(agent->E[RIL_ACTION_TYPE_NUM + address_index]);
+  LOG(GNUNET_ERROR_TYPE_DEBUG, "third\n");
   ril_cut_from_vector ((void **) &agent->W, sizeof(double *), 
RIL_ACTION_TYPE_NUM + address_index,
       1, agent->n);
+  LOG(GNUNET_ERROR_TYPE_DEBUG, "fourth\n");
+  ril_cut_from_vector ((void **) &agent->E, sizeof(double *), 
RIL_ACTION_TYPE_NUM + address_index,
+      1, agent->n);
   //correct last action
   if (agent->a_old > (RIL_ACTION_TYPE_NUM + address_index))
   {
@@ -2225,14 +2245,11 @@
   {
     agent->a_old = RIL_ACTION_INVALID;
   }
-  //decrease old state vector and eligibility vector
-  LOG(GNUNET_ERROR_TYPE_DEBUG, "third\n");
+  //decrease old state vector
+  LOG(GNUNET_ERROR_TYPE_DEBUG, "fifth\n");
   ril_cut_from_vector ((void **) &agent->s_old, sizeof(double),
       address_index * ((s->parameters.divisor+1) * (s->parameters.divisor+1)),
       ((s->parameters.divisor+1) * (s->parameters.divisor+1)), agent->m);
-  ril_cut_from_vector ((void **) &agent->e, sizeof(double),
-      address_index * ((s->parameters.divisor+1) * (s->parameters.divisor+1)),
-      ((s->parameters.divisor+1) * (s->parameters.divisor+1)), agent->m);
   agent->m = m_new;
   agent->n = n_new;
 




reply via email to

[Prev in Thread] Current Thread [Next in Thread]