gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[GNUnet-SVN] r23982 - gnunet/src/regex


From: gnunet
Subject: [GNUnet-SVN] r23982 - gnunet/src/regex
Date: Mon, 24 Sep 2012 21:11:42 +0200

Author: szengel
Date: 2012-09-24 21:11:42 +0200 (Mon, 24 Sep 2012)
New Revision: 23982

Modified:
   gnunet/src/regex/regex.c
   gnunet/src/regex/test_regex_iterate_api.c
Log:
regex: iteration improvements/fixes

Modified: gnunet/src/regex/regex.c
===================================================================
--- gnunet/src/regex/regex.c    2012-09-24 18:49:45 UTC (rev 23981)
+++ gnunet/src/regex/regex.c    2012-09-24 19:11:42 UTC (rev 23982)
@@ -86,7 +86,6 @@
                       struct GNUNET_REGEX_State *from_state, const char *label,
                       struct GNUNET_REGEX_State *to_state)
 {
-  int is_dup;
   struct GNUNET_REGEX_Transition *t;
   struct GNUNET_REGEX_Transition *oth;
 
@@ -97,20 +96,13 @@
   }
 
   // Do not add duplicate state transitions
-  is_dup = GNUNET_NO;
   for (t = from_state->transitions_head; NULL != t; t = t->next)
   {
     if (t->to_state == to_state && 0 == nullstrcmp (t->label, label) &&
         t->from_state == from_state)
-    {
-      is_dup = GNUNET_YES;
-      break;
-    }
+      return;
   }
 
-  if (GNUNET_YES == is_dup)
-    return;
-
   // sort transitions by label
   for (oth = from_state->transitions_head; NULL != oth; oth = oth->next)
   {
@@ -151,10 +143,11 @@
   if (transition->from_state != state)
     return;
 
+  GNUNET_free_non_null (transition->label);
+
   state->transition_count--;
   GNUNET_CONTAINER_DLL_remove (state->transitions_head, 
state->transitions_tail,
                                transition);
-  GNUNET_free_non_null (transition->label);
   GNUNET_free (transition);
 }
 
@@ -257,11 +250,12 @@
 static void
 state_set_clear (struct GNUNET_REGEX_StateSet *set)
 {
-  if (NULL != set)
-  {
-    GNUNET_free_non_null (set->states);
-    GNUNET_free (set);
-  }
+  if (NULL == set)
+    return;
+
+  if (set->len > 0)
+    GNUNET_array_grow (set->states, set->len, 0);
+  GNUNET_free (set);
 }
 
 
@@ -302,17 +296,14 @@
 
   GNUNET_free_non_null (s->name);
   GNUNET_free_non_null (s->proof);
+  state_set_clear (s->nfa_set);
 
   for (t = s->transitions_head; NULL != t; t = next_t)
   {
     next_t = t->next;
-    GNUNET_CONTAINER_DLL_remove (s->transitions_head, s->transitions_tail, t);
-    GNUNET_free_non_null (t->label);
-    GNUNET_free (t);
+    state_remove_transition (s, t);
   }
 
-  state_set_clear (s->nfa_set);
-
   GNUNET_free (s);
 }
 
@@ -329,34 +320,30 @@
 automaton_remove_state (struct GNUNET_REGEX_Automaton *a,
                         struct GNUNET_REGEX_State *s)
 {
-  struct GNUNET_REGEX_State *ss;
   struct GNUNET_REGEX_State *s_check;
   struct GNUNET_REGEX_Transition *t_check;
+  struct GNUNET_REGEX_Transition *t_check_next;
 
   if (NULL == a || NULL == s)
     return;
 
-  // remove state
-  ss = s;
-  GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s);
-  a->state_count--;
-
   // remove all transitions leading to this state
   for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
   {
     for (t_check = s_check->transitions_head; NULL != t_check;
-         t_check = t_check->next)
+         t_check = t_check_next)
     {
-      if (t_check->to_state == ss)
-      {
-        GNUNET_CONTAINER_DLL_remove (s_check->transitions_head,
-                                     s_check->transitions_tail, t_check);
-        s_check->transition_count--;
-      }
+      t_check_next = t_check->next;
+      if (t_check->to_state == s)
+        state_remove_transition (s_check, t_check);
     }
   }
 
-  automaton_destroy_state (ss);
+  // remove state
+  GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s);
+  a->state_count--;
+
+  automaton_destroy_state (s);
 }
 
 
@@ -1703,8 +1690,6 @@
     t->from_state = start;
     GNUNET_CONTAINER_DLL_insert (*transitions_head, *transitions_tail, t);
 
-    GNUNET_free_non_null (label);
-
     if (GNUNET_NO == cur->marked)
     {
       dfa_compress_paths_helper (cur, cur, NULL, transitions_head,
@@ -1733,6 +1718,7 @@
       dfa_compress_paths_helper (start, t->to_state, new_label,
                                  transitions_head, transitions_tail);
     }
+    GNUNET_free (new_label);
   }
 }
 
@@ -2563,11 +2549,11 @@
   GNUNET_free_non_null (a->regex);
   GNUNET_free_non_null (a->canonical_regex);
 
-  for (s = a->states_head; NULL != s;)
+  for (s = a->states_head; NULL != s; s = next_state)
   {
     next_state = s->next;
+    GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s);
     automaton_destroy_state (s);
-    s = next_state;
   }
 
   GNUNET_free (a);
@@ -2815,7 +2801,6 @@
  *
  * @param min_len minimum length of the path in the graph.
  * @param max_len maximum length of the path in the graph.
- * @param cur_len current length of the path already traversed.
  * @param consumed_string string consumed by traversing the graph till this 
state.
  * @param state current state of the automaton.
  * @param iterator iterator function called for each edge.
@@ -2823,8 +2808,7 @@
  */
 static void
 iterate_initial_edge (const unsigned int min_len, const unsigned int max_len,
-                      unsigned int cur_len, char *consumed_string,
-                      struct GNUNET_REGEX_State *state,
+                      char *consumed_string, struct GNUNET_REGEX_State *state,
                       GNUNET_REGEX_KeyIterator iterator, void *iterator_cls)
 {
   unsigned int i;
@@ -2834,22 +2818,56 @@
   struct GNUNET_REGEX_Edge edges[num_edges];
   struct GNUNET_HashCode hash;
 
-  if (cur_len > min_len && NULL != consumed_string && cur_len <= max_len)
+  unsigned int cur_len;
+
+  if (NULL != consumed_string)
+    cur_len = strlen (consumed_string);
+  else
+    cur_len = 0;
+
+  if (cur_len > min_len && NULL != consumed_string)
   {
-    for (i = 0, t = state->transitions_head; NULL != t; t = t->next, i++)
+
+    if (cur_len <= max_len)
     {
-      edges[i].label = t->label;
-      edges[i].destination = t->to_state->hash;
+      for (i = 0, t = state->transitions_head; NULL != t && i < num_edges;
+           t = t->next, i++)
+      {
+        edges[i].label = t->label;
+        edges[i].destination = t->to_state->hash;
+      }
+
+      GNUNET_CRYPTO_hash (consumed_string, strlen (consumed_string), &hash);
+      iterator (iterator_cls, &hash, consumed_string, state->accepting,
+                num_edges, edges);
+
+      // Special case for regex consisting of just a string that is shorter 
than max_len
+      if (GNUNET_YES == state->accepting && cur_len > 1 &&
+          state->transition_count < 1)
+      {
+        edges[0].label = &consumed_string[cur_len - 1];
+        edges[0].destination = state->hash;
+        temp = GNUNET_strdup (consumed_string);
+        temp[cur_len - 1] = '\0';
+        GNUNET_CRYPTO_hash (temp, cur_len - 1, &hash);
+        iterator (iterator_cls, &hash, temp, GNUNET_NO, 1, edges);
+        GNUNET_free (temp);
+      }
     }
-
-    GNUNET_CRYPTO_hash (consumed_string, strlen (consumed_string), &hash);
-    iterator (iterator_cls, &hash, consumed_string, state->accepting, 
num_edges,
-              edges);
+    else
+    {
+      edges[0].label = &consumed_string[max_len];
+      edges[0].destination = state->hash;
+      temp = GNUNET_strdup (consumed_string);
+      temp[max_len] = '\0';
+      GNUNET_CRYPTO_hash (temp, max_len, &hash);
+      iterator (iterator_cls, &hash, temp, GNUNET_NO, 1, edges);
+      GNUNET_free (temp);
+    }
   }
 
   if (cur_len < max_len)
   {
-    cur_len++;
     for (t = state->transitions_head; NULL != t; t = t->next)
     {
       if (NULL != consumed_string)
@@ -2857,8 +2875,8 @@
       else
         GNUNET_asprintf (&temp, "%s", t->label);
 
-      iterate_initial_edge (min_len, max_len, cur_len, temp, t->to_state,
-                            iterator, iterator_cls);
+      iterate_initial_edge (min_len, max_len, temp, t->to_state, iterator,
+                            iterator_cls);
       GNUNET_free (temp);
     }
   }
@@ -2866,69 +2884,8 @@
 
 
 /**
- * Iterate over all initial edges that aren't actually part of the automaton.
- * This is needed to find the initial states returned by
- * GNUNET_REGEX_get_first_key. Iteration will start at the first state that has
- * more than one outgoing edge, i.e. the state that branches the graph.
- * For example consider the following graph:
- * a -> b -> c -> d -> ...
- *            \-> e -> ...
- *
- * This function will not iterate over the edges leading to "c", because these
- * will be covered by the iterate_edges function.
- *
- * @param a the automaton for which the initial states should be computed.
- * @param initial_len length of the initial state string.
- * @param iterator iterator function called for each edge.
- * @param iterator_cls closure for the iterator function.
- */
-void
-iterate_initial_edges (struct GNUNET_REGEX_Automaton *a,
-                       const unsigned int initial_len,
-                       GNUNET_REGEX_KeyIterator iterator, void *iterator_cls)
-{
-  char *consumed_string;
-  char *temp;
-  struct GNUNET_REGEX_State *s;
-  unsigned int cur_len;
-
-  if (1 > initial_len)
-    return;
-
-  consumed_string = NULL;
-  s = a->start;
-  cur_len = 0;
-
-  if (1 == s->transition_count)
-  {
-    do
-    {
-      if (NULL != consumed_string)
-      {
-        temp = consumed_string;
-        GNUNET_asprintf (&consumed_string, "%s%s", consumed_string,
-                         s->transitions_head->label);
-        GNUNET_free (temp);
-      }
-      else
-        GNUNET_asprintf (&consumed_string, "%s", s->transitions_head->label);
-
-      s = s->transitions_head->to_state;
-      cur_len += strlen (s->transitions_head->label);
-    }
-    while (cur_len < initial_len && 1 == s->transition_count);
-  }
-
-  iterate_initial_edge (cur_len, initial_len, cur_len, consumed_string, s,
-                        iterator, iterator_cls);
-
-  GNUNET_free_non_null (consumed_string);
-}
-
-
-/**
  * Iterate over all edges helper function starting from state 's', calling
- * iterator function for each edge.
+ * iterator function for each edge if the automaton.
  *
  * @param s state.
  * @param iterator iterator function called for each edge.
@@ -2976,6 +2933,7 @@
   for (s = a->states_head; NULL != s; s = s->next)
     s->marked = GNUNET_NO;
 
-  iterate_initial_edges (a, INITIAL_BITS, iterator, iterator_cls);
+  iterate_initial_edge (0, INITIAL_BITS, NULL, a->start, iterator,
+                        iterator_cls);
   iterate_edge (a->start, iterator, iterator_cls);
 }

Modified: gnunet/src/regex/test_regex_iterate_api.c
===================================================================
--- gnunet/src/regex/test_regex_iterate_api.c   2012-09-24 18:49:45 UTC (rev 
23981)
+++ gnunet/src/regex/test_regex_iterate_api.c   2012-09-24 19:11:42 UTC (rev 
23982)
@@ -28,15 +28,27 @@
 #include "gnunet_regex_lib.h"
 #include "regex_internal.h"
 
+#define GNUNET_REGEX_ITERATE_SAVE_DEBUG_GRAPH GNUNET_NO
+
 static unsigned int transition_counter;
 
 struct IteratorContext
 {
   int error;
   int should_save_graph;
-  FILE *graph_file;
+  FILE *graph_filep;
+  unsigned int string_count;
+  char *const *strings;
+  unsigned int match_count;
 };
 
+struct RegexStringPair
+{
+  char *regex;
+  unsigned int string_count;
+  char *strings[20];
+};
+
 void
 key_iterator (void *cls, const struct GNUNET_HashCode *key, const char *proof,
               int accepting, unsigned int num_edges,
@@ -44,21 +56,41 @@
 {
   unsigned int i;
   struct IteratorContext *ctx = cls;
+  char *out_str;
+  char *state_id = GNUNET_strdup (GNUNET_h2s (key));
 
-  GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating... (accepting: %i)\n",
-              accepting);
+  if (GNUNET_YES == ctx->should_save_graph)
+  {
+    if (GNUNET_YES == accepting)
+      GNUNET_asprintf (&out_str, "\"%s\" [shape=doublecircle]\n", state_id);
+    else
+      GNUNET_asprintf (&out_str, "\"%s\" [shape=circle]\n", state_id);
+    fwrite (out_str, strlen (out_str), 1, ctx->graph_filep);
+    GNUNET_free (out_str);
 
-  if (NULL != proof)
-    GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Proof: %s\n", proof);
+    for (i = 0; i < num_edges; i++)
+    {
+      transition_counter++;
+      GNUNET_asprintf (&out_str, "\"%s\" -> \"%s\" [label = \"%s (%s)\"]\n",
+                       state_id, GNUNET_h2s (&edges[i].destination),
+                       edges[i].label, proof);
+      fwrite (out_str, strlen (out_str), 1, ctx->graph_filep);
 
-  if (NULL != key)
-    GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Hash: %s\n", GNUNET_h2s (key));
+      GNUNET_free (out_str);
+    }
+  }
+  else
+  {
+    for (i = 0; i < num_edges; i++)
+      transition_counter++;
+  }
 
-  for (i = 0; i < num_edges; i++)
+  GNUNET_free (state_id);
+
+  for (i = 0; i < ctx->string_count; i++)
   {
-    transition_counter++;
-    GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Edge %i: Label: %s Destination: 
%s\n",
-                i, edges[i].label, GNUNET_h2s (&edges[i].destination));
+    if (0 == strcmp (proof, ctx->strings[i]))
+      ctx->match_count++;
   }
 
   ctx->error += (GNUNET_OK == GNUNET_REGEX_check_proof (proof, key)) ? 0 : 1;
@@ -80,49 +112,112 @@
   unsigned int i;
   unsigned int num_transitions;
   struct IteratorContext ctx = { 0, 0, NULL };
+  char *filename = NULL;
 
   error = 0;
 
-  const char *regex[17] = {
-    "ab(c|d)+c*(a(b|c)+d)+(bla)+",
-    "(bla)*",
-    "b(lab)*la",
-    "(ab)*",
-    "ab(c|d)+c*(a(b|c)+d)+(bla)(bla)*",
-    "z(abc|def)?xyz",
-    "1*0(0|1)*",
-    "a*b*",
-    "a+X*y+c|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*",
-    
"abcd:(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1):(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)",
-    "abc(1|0)*def",
-    "ab|ac",
-    "(ab)(ab)*",
-    "ab|cd|ef|gh",
-    "a|b|c|d|e|f|g",
-    "(ab)|(ac)",
-    "x*|(0|1|2)(a|b|c|d)"
+  const struct RegexStringPair rxstr[10] = {
+    {"ab(c|d)+c*(a(b|c)+d)+(bla)+", 2, {"abcdcdca", "abcabdbl"}},
+    {"abcdefghijklmnop*qst", 1, {"abcdefgh"}},
+    {"VPN-4-1(0|1)*", 2, {"VPN-4-10", "VPN-4-11"}},
+    {"a+X*y+c|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*", 4,
+     {"aaaaaaaa", "aaXXyyyc", "p", "Y"}},
+    {"a*", 8,
+     {"a", "aa", "aaa", "aaaa", "aaaaa", "aaaaaa", "aaaaaaa", "aaaaaaaa"}},
+    {"xzxzxzxzxz", 1, {"xzxzxzxz"}},
+    {"xyz*", 2, {"xy", "xyz"}},
+    {"ab", 1, {"a"}},
+    
{"abcd:(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1):(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)",
 2, {"abcd:000", "abcd:101"}},
+    {"x*|(0|1|2)(a|b|c|d)", 2, {"xxxxxxxx", "0a"}}
   };
 
-  for (i = 0; i < 17; i++)
+  const char *graph_start_str = "digraph G {\nrankdir=LR\n";
+  const char *graph_end_str = "\n}\n";
+
+  for (i = 0; i < 10; i++)
   {
+    // Create graph
+    if (GNUNET_YES == GNUNET_REGEX_ITERATE_SAVE_DEBUG_GRAPH)
+    {
+      GNUNET_asprintf (&filename, "iteration_graph_%u.dot", i);
+      ctx.graph_filep = fopen (filename, "w");
+      if (NULL == ctx.graph_filep)
+      {
+        GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
+                    "Could not open file %s for saving iteration graph.\n",
+                    filename);
+        ctx.should_save_graph = GNUNET_NO;
+      }
+      else
+      {
+        ctx.should_save_graph = GNUNET_YES;
+        fwrite (graph_start_str, strlen (graph_start_str), 1, ctx.graph_filep);
+      }
+      GNUNET_free (filename);
+    }
+    else
+    {
+      ctx.should_save_graph = GNUNET_NO;
+    }
+
+    // Iterate over DFA edges
     transition_counter = 0;
-    dfa = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i]));
+    ctx.string_count = rxstr[i].string_count;
+    ctx.strings = rxstr[i].strings;
+    ctx.match_count = 0;
+    dfa = GNUNET_REGEX_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex));
     GNUNET_REGEX_iterate_all_edges (dfa, key_iterator, &ctx);
     num_transitions = GNUNET_REGEX_get_transition_count (dfa);
-    if (transition_counter != num_transitions)
+
+    if (transition_counter < num_transitions)
     {
-      GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
+      GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
                   "Automaton has %d transitions, iterated over %d 
transitions\n",
                   num_transitions, transition_counter);
+      error += 1;
+      break;
     }
+
+    if (ctx.match_count < ctx.string_count)
+    {
+      GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+                  "Missing initial states for regex %s\n", rxstr[i].regex);
+      error += (ctx.string_count - ctx.match_count);
+    }
+    else if (ctx.match_count > ctx.string_count)
+    {
+      GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+                  "Doublicate initial transitions for regex %s\n",
+                  rxstr[i].regex);
+      error += (ctx.string_count - ctx.match_count);
+    }
+
     GNUNET_REGEX_automaton_destroy (dfa);
+
+    // Finish graph
+    if (GNUNET_YES == ctx.should_save_graph)
+    {
+      fwrite (graph_end_str, strlen (graph_end_str), 1, ctx.graph_filep);
+      fclose (ctx.graph_filep);
+      ctx.graph_filep = NULL;
+      ctx.should_save_graph = GNUNET_NO;
+    }
   }
 
-  for (i = 0; i < 17; i++)
+
+  for (i = 0; i < 10; i++)
   {
-    dfa = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i]));
+    dfa = GNUNET_REGEX_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex));
     GNUNET_REGEX_dfa_add_multi_strides (NULL, dfa, 2);
     GNUNET_REGEX_iterate_all_edges (dfa, key_iterator, &ctx);
+
+    if (ctx.match_count < ctx.string_count)
+    {
+      GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
+                  "Missing initial states for regex %s\n", rxstr[i].regex);
+      error += (ctx.string_count - ctx.match_count);
+    }
+
     GNUNET_REGEX_automaton_destroy (dfa);
   }
 




reply via email to

[Prev in Thread] Current Thread [Next in Thread]