bug-gnu-utils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: memory leak in gawk 3.1.0


From: Aharon Robbins
Subject: Re: memory leak in gawk 3.1.0
Date: Tue, 14 May 2002 17:24:44 +0300

This note is for the list and its archives. Tony received a
preliminary version of this fix.

> Date: Thu, 2 May 2002 01:56:14 -0700
> From: Tony Leneis <address@hidden>
> To: address@hidden
> Subject: memory leak in gawk 3.1.0
>
>       There's a rather nasty memory leak in gawk 3.1.0 when a next statement
> is called from within a for (index in array) loop.  For example, this gawk
> program eventually consumes over 100 megs after processing around 2 million
> lines:
>
> BEGIN {
>   x[1] = 0
> }
> {
>   for (i in x)
>     next
> }
>
> I assume the for (i in x) construct must allocate some memory that isn't
> freed when the loop is exited via the next statement.
>
> -Tony
>
> -- 
> e-mail: address@hidden                 Computerized Vehicle Registration
> phone : 503 402-3531                    2525 SW 1st Ave, Suite 450
> FAX   : 503 294-1526                    Portland, OR 97201-4760

This is indeed a bug. Below is a patch relative to the current
official version, 3.1.1, that fixes the leak.

Arnold
---------------------------------------------------------------------
*** ../gawk-3.1.1/eval.c        Tue Apr 16 14:41:14 2002
--- eval.c      Wed May  8 10:40:16 2002
***************
*** 33,38 ****
--- 33,42 ----
  static NODE *op_assign P((NODE *tree));
  static NODE *func_call P((NODE *name, NODE *arg_list));
  static NODE *match_op P((NODE *tree));
+ static int forloops_active P((void));
+ static void pop_forloop P((void));
+ static void pop_all_forloops P((void));
+ static void push_forloop P((char *vname, NODE **elems, size_t nelems));
  static void push_args P((int count, NODE *arglist, NODE **oldstack,
                        char *func_name, char **varnames));
  static void pop_fcall_stack P((void));
***************
*** 369,376 ****
                                }
                                break;
                        case TAG_CONTINUE:      /* NEXT statement */
                                return 1;
!                       case TAG_BREAK:
                                return 0;
                        default:
                                cant_happen();
--- 373,388 ----
                                }
                                break;
                        case TAG_CONTINUE:      /* NEXT statement */
+                               if (forloops_active())
+                                       pop_all_forloops();
+                               if (in_function())
+                                       pop_fcall_stack();
                                return 1;
!                       case TAG_BREAK:         /* EXIT statement */
!                               if (forloops_active())
!                                       pop_all_forloops();
!                               if (in_function())
!                                       pop_fcall_stack();
                                return 0;
                        default:
                                cant_happen();
***************
*** 509,514 ****
--- 521,527 ----
                        qsort(list, num_elems, sizeof(NODE *), comp_func); /* 
shazzam! */
  
                /* now we can run the loop */
+               push_forloop(array->vname, list, num_elems);
                PUSH_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
  
                lhs = get_lhs(tree->hakvar, &after_assign, FALSE);
***************
*** 536,551 ****
  
        done:
                RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
  
                if (do_lint && num_elems != array->table_size)
                        lintwarn(_("for loop: array `%s' changed size from %d 
to %d during loop execution"),
                                        array->vname, num_elems, 
array->table_size);
                
-               for (i = 0; i < num_elems; i++)
-                       unref(list[i]);
- 
-               free(list);
- 
                if (retval == 1)
                        return 1;
                break;
--- 549,560 ----
  
        done:
                RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid);
+               pop_forloop();
  
                if (do_lint && num_elems != array->table_size)
                        lintwarn(_("for loop: array `%s' changed size from %d 
to %d during loop execution"),
                                        array->vname, num_elems, 
array->table_size);
                
                if (retval == 1)
                        return 1;
                break;
***************
*** 567,574 ****
                        }
                        if (! do_traditional || do_posix)
                                fatal(_("`break' outside a loop is not 
allowed"));
-                       if (in_function())
-                               pop_fcall_stack();
                        longjmp(rule_tag, TAG_CONTINUE);
                } else
                        longjmp(loop_tag, TAG_BREAK);
--- 576,581 ----
***************
*** 590,597 ****
                        }
                        if (! do_traditional || do_posix)
                                fatal(_("`continue' outside a loop is not 
allowed"));
-                       if (in_function())
-                               pop_fcall_stack();
                        longjmp(rule_tag, TAG_CONTINUE);
                } else
                        longjmp(loop_tag, TAG_CONTINUE);
--- 597,602 ----
***************
*** 623,632 ****
                else if (in_end_rule)
                        fatal(_("`next' cannot be called from an END rule"));
  
!               /* could add a lint check here */
!               if (in_function())
!                       pop_fcall_stack();
! 
                longjmp(rule_tag, TAG_CONTINUE);
                break;
  
--- 628,634 ----
                else if (in_end_rule)
                        fatal(_("`next' cannot be called from an END rule"));
  
!               /* could add a lint check here for in a loop or function */
                longjmp(rule_tag, TAG_CONTINUE);
                break;
  
***************
*** 637,643 ****
                else if (in_end_rule)
                        fatal(_("`nextfile' cannot be called from an END 
rule"));
  
!               /* could add a lint check here */
                if (in_function())
                        pop_fcall_stack();
  
--- 639,652 ----
                else if (in_end_rule)
                        fatal(_("`nextfile' cannot be called from an END 
rule"));
  
!               /* could add a lint check here for in a loop or function */
!               /*
!                * Have to do this cleanup here, since we don't longjump
!                * back to the main awk rule loop (rule_tag).
!                */
!               if (forloops_active())
!                       pop_all_forloops();
! 
                if (in_function())
                        pop_fcall_stack();
  
***************
*** 1277,1288 ****
        return *lhs;
  }
  
  static struct fcall {
!       char *fname;
!       unsigned long count;
!       NODE *arglist;
!       NODE **prevstack;
!       NODE **stack;
  } *fcall_list = NULL;
  
  static long fcall_list_size = 0;
--- 1286,1391 ----
        return *lhs;
  }
  
+ /*
+  * Avoiding memory leaks is difficult.  In paticular, any of `next',
+  * `nextfile', `break' or `continue' (when not in a loop), can longjmp
+  * out to the outermost level.  This leaks memory if it happens in a
+  * called function. It also leaks memory if it happens in a
+  * `for (iggy in foo)' loop, since such loops malloc an array of the
+  * current array indices to loop over, which provides stability.
+  *
+  * The following code takes care of these problems.  First comes the
+  * array-loop management code.  This can be a stack of arrays being looped
+  * on at any one time.  This stack serves for both mainline code and
+  * function body code. As each loop starts and finishes, it pushes its
+  * info onto this stack and off of it; whether the loop is in a function
+  * body or not isn't relevant.
+  *
+  * Since the list of indices is created using dupnode(), when popping
+  * this stack it should be safe to unref() things, and then memory
+  * will get finally released when the function call stack is popped.
+  * This means that the loop_stack should be popped first upon a `next'.
+  */
+ 
+ static struct loop_info {
+       char *vname;            /* variable name, for debugging */
+       NODE **elems;           /* list of indices */
+       size_t nelems;          /* how many there are */
+ } *loop_stack = NULL;
+ size_t nloops = 0;            /* how many slots there are in the stack */
+ size_t nloops_active = 0;     /* how many loops are actively stacked */
+ 
+ 
+ /* forloops_active --- return true if there are loops that need popping */
+ 
+ static int
+ forloops_active()
+ {
+       return nloops > 0;
+ }
+ 
+ /* pop_forloop --- pop one for loop off the stack */
+ 
+ static void
+ pop_forloop()
+ {
+       int i, curloop;
+       struct loop_info *loop;
+ 
+       assert(nloops_active > 0);
+ 
+       curloop = --nloops_active;      /* 0-based indexing */
+       loop = & loop_stack[curloop];
+ 
+       for (i = 0; i < loop->nelems; i++)
+               unref(loop->elems[i]);
+ 
+       free(loop->elems);
+ 
+       loop->elems = NULL;
+       loop->vname = NULL;
+       loop->nelems = 0;
+ }
+ 
+ /* pop_forloops --- pop the for loops stack all the way */
+ 
+ static void
+ pop_all_forloops()
+ {
+       while (nloops_active > 0)
+               pop_forloop();  /* decrements nloops_active for us */
+ }
+ 
+ /* push_forloop --- add a single for loop to the stack */
+ 
+ static void
+ push_forloop(char *vname, NODE **elems, size_t nelems)
+ {
+ #define NLOOPS        4       /* seems like a good guess */
+       if (loop_stack == NULL) {
+               /* allocate stack, set vars */
+               nloops = NLOOPS;
+               emalloc(loop_stack, struct loop_info *, nloops * sizeof(struct 
loop_info),
+                               "push_forloop");
+       } else if (nloops_active == nloops) {
+               /* grow stack, set vars */
+               nloops *= 2;
+               erealloc(loop_stack, struct loop_info *, nloops * sizeof(struct 
loop_info),
+                               "push_forloop");
+       }
+ 
+       loop_stack[nloops_active].vname = vname;
+       loop_stack[nloops_active].elems = elems;
+       loop_stack[nloops_active].nelems = nelems;
+       nloops_active++;
+ }
+ 
  static struct fcall {
!       char *fname;            /* function name */
!       unsigned long count;    /* how many args */
!       NODE *arglist;          /* list thereof */
!       NODE **prevstack;       /* function stack frame of previous function */
!       NODE **stack;           /* function stack frame of current function */
  } *fcall_list = NULL;
  
  static long fcall_list_size = 0;



reply via email to

[Prev in Thread] Current Thread [Next in Thread]