lynx-dev
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

lynx-dev an optimization for large pages (patch)


From: Leonid Pauzner
Subject: lynx-dev an optimization for large pages (patch)
Date: Sun, 13 Oct 2002 19:02:17 +0400 (MSD)

Hi, everybody!

I now returns to lynx again. It renders some 800Kb documentation file too
slow. Profiling with this file shows a hot point in HTAnchor_findChild (11%)
and a patch attached below will decrease this number down to  0.1%

Now details:
-----------------------------------------------
                1.20    0.74    2687/6107        HTAnchor_findAddress [14]
                1.52    0.94    3420/6107        HTAnchor_findChildAndLink [7]
[9]     10.9    2.72    1.67    6107         HTAnchor_findChild [9]
                1.66    0.00 4989243/4998014     HTIdentical [16]
                0.01    0.00    3427/69137       HTSACopy [53]
                0.00    0.00    3427/3427        HTChildAnchor_new [209]
                0.00    0.00    3427/7204        HTList_addObject [201]
                0.00    0.00       7/528         HTList_new [225]
-----------------------------------------------

It turns out the page has 3168 anchors, and  HTAnchor_findChild() usage
shows a quadratic complexity on the number of anchors on the page.

Anchores are stored in a list which is traversed each time we call a
function. This is not a full story: anchors happens to be of two types:
"with a tag" and "without a tag", both are stored in the same list, but
the first force traversing list (=we add only unique tags to the list),
while the second added to the list blindly. Don't ask me why.

So I replace the list with a tree (anchors with a tag) and a list
(the rest, without tags). Search in a balanced tree is very effective.
I added a search method to HTBTree implementation, it wasn't there.

The resulting profile:
-----------------------------------------------
                0.00    0.01    2687/6107        HTAnchor_findAddress [36]
                0.00    0.01    3420/6107        HTAnchor_findChildAndLink [33]
[87]     0.1    0.00    0.01    6107         HTAnchor_findChild [87]
                0.00    0.01    3427/3427        HTChildAnchor_new [95]
                0.00    0.01    3168/6945        HTList_addObject [90]
                0.00    0.00     259/65969       HTSACopy [43]
                0.00    0.00     259/259         HTBTree_add [119]
                0.00    0.00       7/7           HTBTree_new [212]
                0.00    0.00       1/522         HTList_new [112]
                0.00    0.00    2932/2932        HTBTree_search [326]
-----------------------------------------------

Now a patch against the latest dev.9 -


* optimization for parsing large html - with thousands of anchors -
  remove quadratic complexity from HTAnchor_findChild() usage.
  HTParentAnchor::children list was traversed zillion of times,
  now replaced with a tree (anchors of one type, fast search)
  and a list (just a storage for the rest, no search required).
  This one gives 11% speedup for 3100 anchor html on my system. - LP
* add a search method to HTBTree implementation. - LP


diff -u -p -r LYNX2-8-.590/www/library/implemen/htanchor.h 
LYNX2-8-/www/library/implemen/htanchor.h
--- LYNX2-8-.590/www/library/implemen/htanchor.h        Thu Feb  8 18:50:00 2001
+++ LYNX2-8-/www/library/implemen/htanchor.h    Sun Oct 13 17:25:32 2002
@@ -15,6 +15,7 @@
 /* Version 1 of 24-Oct-1991 (JFG), written in C, browser-independent */

 #include <HTList.h>
+#include <HTBTree.h>
 #include <HTChunk.h>
 #include <HTAtom.h>
 #include <UCDefs.h>
@@ -52,7 +53,8 @@ struct _HTParentAnchor {
   HTParentAnchor * parent;     /* Parent of this anchor (self) */

   /* ParentAnchor-specific information */
-  HTList *     children;       /* Subanchors of this, if any */
+  HTList *     children_notag; /* Subanchors of this, if any. (tag is NULL) */
+  HTBTree *    children;       /* Subanchors ... sorted by nonNULL tag */
   HTList *     sources;        /* List of anchors pointing to this, if any */
   HyperDoc *   document;       /* The document within which this is an anchor 
*/
   char *       address;        /* Absolute address of this node */
@@ -194,15 +196,6 @@ extern BOOL HTAnchor_delete PARAMS((
 extern void HTAnchor_clearSourceCache PARAMS((
        HTParentAnchor *        me));
 #endif
-
-/*             Move an anchor to the head of the list of its siblings
-**             ------------------------------------------------------
-**
-**     This is to ensure that an anchor which might have already existed
-**     is put in the correct order as we load the document.
-*/
-extern void HTAnchor_makeLastChild PARAMS((
-       HTChildAnchor *         me));

 /*     Data access functions
 **     ---------------------
diff -u -p -r LYNX2-8-.590/www/library/implemen/htanchor.c 
LYNX2-8-/www/library/implemen/htanchor.c
--- LYNX2-8-.590/www/library/implemen/htanchor.c        Sun Oct  6 17:43:28 2002
+++ LYNX2-8-/www/library/implemen/htanchor.c    Sun Oct 13 17:25:30 2002
@@ -154,12 +154,6 @@ PRIVATE BOOL HTIdentical ARGS2(
        CONST char *,   t)
 {
     if (s && t) {  /* Make sure they point to something */
-#ifdef SH_EX   /* 1998/04/28 (Tue) 22:02:58 */
-       if (*s == 'P' || *t == 'P') {
-           if (strcmp(s + 1, "Name") == 0 || strcmp(t + 1, "Name") == 0)
-               return NO;
-       }
-#endif
        for (; *s && *t; s++, t++) {
            if (*s != *t) {
                return(NO);
@@ -172,6 +166,35 @@ PRIVATE BOOL HTIdentical ARGS2(
 }
 #endif /* CASE_INSENSITIVE_ANCHORS */

+/*
+ *  Three-way compare function
+ */
+PRIVATE int compare ARGS2(
+       HTChildAnchor *,        l,
+       HTChildAnchor *,        r)
+{
+    CONST char* a = l->tag;
+    CONST char* b = r->tag;
+
+    if (a && b) {  /* Make sure they point to something */
+#ifdef CASE_INSENSITIVE_ANCHORS
+       return strcasecomp(a, b); /* Case insensitive */
+#else
+       return strcmp(a, b);        /* Case sensitive - FM */
+#endif /* CASE_INSENSITIVE_ANCHORS */
+    } else {
+       /*  bring up an order when one or both strings are nulls */
+       if (a) {
+               return 1;
+       } else if (b) {
+               return -1;
+       } else {
+               /* both nulls - assume equivalent */
+               return 0;
+       }
+    }
+}
+

 /*     Create new or find old sub-anchor
 **     ---------------------------------
@@ -184,32 +207,33 @@ PUBLIC HTChildAnchor * HTAnchor_findChil
        CONST char *,           tag)
 {
     HTChildAnchor *child;
-    HTList *kids;

     if (!parent) {
        CTRACE((tfp, "HTAnchor_findChild called with NULL parent.\n"));
        return(NULL);
     }
-    if ((kids = parent->children) != 0) {
-       /*
-       **  Parent has children.  Search them.
-       */
-       if (tag && *tag) {              /* TBL */
-           while (NULL != (child=(HTChildAnchor *)HTList_nextObject(kids))) {
-#ifdef CASE_INSENSITIVE_ANCHORS
-               if (HTEquivalent(child->tag, tag)) /* Case insensitive */
-#else
-               if (HTIdentical(child->tag, tag)) /* Case sensitive - FM */
-#endif /* CASE_INSENSITIVE_ANCHORS */
-               {
-                   CTRACE((tfp, "Child anchor %p of parent %p with name `%s' 
already exists.\n",
-                               (void *)child, (void *)parent, tag));
-                   return(child);
-               }
+
+    if (tag && *tag) {         /* TBL */
+       if (parent->children) {
+           /*
+           **  Parent has children.  Search them.
+           */
+           HTChildAnchor sample;
+           sample.tag = (char*)tag;    /* for compare() only */
+
+           child = (HTChildAnchor *)HTBTree_search(parent->children, &sample);
+           if (child != NULL) {
+               CTRACE((tfp, "Child anchor %p of parent %p with name `%s' 
already exists.\n",
+                       (void *)child, (void *)parent, tag));
+               return(child);
            }
-       }  /*  end if tag is void */
-    } else {  /* parent doesn't have any children yet : create family */
-       parent->children = HTList_new();
+       } else {  /* parent doesn't have any children yet : create family */
+           parent->children = HTBTree_new((HTComparer)compare);
+       }
+    } else { /*  if tag is void */
+       if (!parent->children_notag)
+           /* create a family of this kind */
+           parent->children_notag = HTList_new();
     }

     child = HTChildAnchor_new();
@@ -217,9 +241,16 @@ PUBLIC HTChildAnchor * HTAnchor_findChil
                (void *)child,
                NonNull(tag),
                (void *)parent)); /* int for apollo */
-    HTList_addObject (parent->children, child);
     child->parent = parent;
-    StrAllocCopy(child->tag, tag);
+
+    if (tag && *tag) {
+       StrAllocCopy(child->tag, tag);   /* should be set before HTBTree_add */
+       HTBTree_add(parent->children, child);
+    } else {
+       child->tag = 0;
+       HTList_addObject(parent->children_notag, child);
+    }
+
     return(child);
 }

@@ -628,6 +659,7 @@ PUBLIC BOOL HTAnchor_delete ARGS1(
      * 05-27-94 Lynx 2-3-1 Garrett Arch Blythe
      */
     HTList *cur;
+    HTBTElement *ele;
     HTChildAnchor *child;

     /*
@@ -668,12 +700,18 @@ PUBLIC BOOL HTAnchor_delete ARGS1(
         *  Delete all outgoing links from children, do not
         *  delete the children, though.
         */
-       if (!HTList_isEmpty(me->children)) {
-           cur = me->children;
+       if (!HTList_isEmpty(me->children_notag)) {
+           cur = me->children_notag;
            while ((child = (HTChildAnchor *)HTList_nextObject(cur)) != 0) {
-               if (child != NULL) {
-                   deleteLinks((HTAnchor *)child);
-               }
+               deleteLinks((HTAnchor *)child);
+           }
+       }
+       if (me->children) {
+           ele = me->children->top;
+           while (ele != NULL) {
+               child = (HTChildAnchor *)HTBTree_object(ele);
+               deleteLinks((HTAnchor *)child);
+               ele = HTBTree_next(me->children, ele);
            }
        }
        me->underway = FALSE;
@@ -688,26 +726,33 @@ PUBLIC BOOL HTAnchor_delete ARGS1(
      * No more incoming links : kill everything
      * First, recursively delete children and their links.
      */
-    if (!HTList_isEmpty(me->children)) {
+    if (me->children) {
+       ele = me->children->top;
+       while (ele != NULL) {
+           child = (HTChildAnchor *)HTBTree_object(ele);
+           deleteLinks((HTAnchor *)child);
+           FREE(child->tag);
+           ele = HTBTree_next(me->children, ele);
+       }
+       HTBTreeAndObject_free(me->children);
+       me->children = NULL;
+    }
+    /* ...and this kind of children */
+    if (!HTList_isEmpty(me->children_notag)) {
        while ((child = (HTChildAnchor *)HTList_removeLastObject(
-                                                       me->children)) != 0) {
-           if (child) {
-               deleteLinks((HTAnchor *)child);
-               if (child->tag) {
-                   FREE(child->tag);
-               }
-               FREE(child);
-           }
+                                                       me->children_notag)) != 
0) {
+           deleteLinks((HTAnchor *)child);
+           FREE(child);
        }
     }
     me->underway = FALSE;

     /*
-     * Delete our empty list of children.
+     * Delete our empty list of children_notag.
      */
-    if (me->children) {
-       HTList_delete(me->children);
-       me->children = NULL;
+    if (me->children_notag) {
+       HTList_delete(me->children_notag);
+       me->children_notag = NULL;
     }

     /*
@@ -811,22 +856,6 @@ PUBLIC BOOL HTAnchor_delete ARGS1(
 }


-/*             Move an anchor to the head of the list of its siblings
-**             ------------------------------------------------------
-**
-**     This is to ensure that an anchor which might have already existed
-**     is put in the correct order as we load the document.
-*/
-PUBLIC void HTAnchor_makeLastChild ARGS1(
-       HTChildAnchor *,        me)
-{
-    if (me->parent != (HTParentAnchor *)me) {  /* Make sure it's a child */
-       HTList * siblings = me->parent->children;
-       HTList_removeObject (siblings, me);
-       HTList_addObject (siblings, me);
-    }
-}
-
 /*     Data access functions
 **     ---------------------
 */
@@ -919,7 +948,7 @@ PUBLIC BOOL HTAnchor_isISMAPScript ARGS1
 PUBLIC BOOL HTAnchor_hasChildren ARGS1(
        HTParentAnchor *,       me)
 {
-    return (BOOL) ( me ? ! HTList_isEmpty(me->children) : NO);
+    return (BOOL) ( me ? (me->children || !HTList_isEmpty(me->children_notag)) 
: NO);
 }

 #if defined(USE_COLOR_STYLE)
diff -u -p -r LYNX2-8-.590/www/library/implemen/htbtree.c 
LYNX2-8-/www/library/implemen/htbtree.c
--- LYNX2-8-.590/www/library/implemen/htbtree.c Thu Dec 21 18:44:12 2000
+++ LYNX2-8-/www/library/implemen/htbtree.c     Sun Oct 13 17:25:38 2002
@@ -83,6 +83,30 @@ PUBLIC void HTBTreeAndObject_free ARGS1(
 }


+PUBLIC void * HTBTree_search ARGS2(
+                   HTBTree*,  tree,
+                   void*,     object)
+    /**********************************************************************
+    ** Returns a pointer to equivalent object in a tree or NULL if none.
+    */
+{
+    HTBTElement * cur = tree->top;
+    int res;
+
+    while (cur != NULL)
+    {
+        res = tree->compare(object, cur->object);
+
+        if (res == 0)
+            return cur->object;
+        else if (res < 0)
+            cur = cur->left;
+        else if (res > 0)
+            cur = cur->right;
+    }
+    return NULL;
+}
+


 PUBLIC void HTBTree_add ARGS2(
@@ -147,7 +171,8 @@ PUBLIC void HTBTree_add ARGS2(
         forefather_of_element = NULL;
         while (father_found)
         {
-            if (tree->compare(object,father_of_element->object)<0)
+            int res = tree->compare(object,father_of_element->object);
+            if (res < 0)
            {
                 if (father_of_element->left != NULL)
                     father_of_element = father_of_element->left;
@@ -167,8 +192,8 @@ PUBLIC void HTBTree_add ARGS2(
                     added_element->right_depth = 0;
                 }
            }
-            if (tree->compare(object,father_of_element->object)>=0)
-            {
+            else /* res >= 0 */
+           {
                 if (father_of_element->right != NULL)
                     father_of_element = father_of_element->right;
                 else
@@ -188,6 +213,7 @@ PUBLIC void HTBTree_add ARGS2(
                }
             }
        }
+
             /*
             ** Changing of all depths that need to be changed
             */
diff -u -p -r LYNX2-8-.590/www/library/implemen/htbtree.h 
LYNX2-8-/www/library/implemen/htbtree.h
--- LYNX2-8-.590/www/library/implemen/htbtree.h Wed May 12 19:06:06 1999
+++ LYNX2-8-/www/library/implemen/htbtree.h     Sun Oct 13 17:25:40 2002
@@ -76,6 +76,16 @@ extern void HTBTree_add PARAMS((HTBTree*

 /*

+Search an object in a binary tree
+
+  returns          Pointer to equivalent object in a tree or NULL if none.
+ */
+
+extern void * HTBTree_search PARAMS((HTBTree* tree, void * object));
+
+
+/*
+
 Find user object for element

  */
@@ -91,7 +101,7 @@ Find next element in depth-first order
   ele                    if NULL, start with leftmost element. if != 0 give 
next object to
                          the right.

-  returns                Pointer to element ot NULL if none left.
+  returns                Pointer to element or NULL if none left.

  */
 extern HTBTElement * HTBTree_next PARAMS((HTBTree* tree, HTBTElement * ele));


; To UNSUBSCRIBE: Send "unsubscribe lynx-dev" to address@hidden

reply via email to

[Prev in Thread] Current Thread [Next in Thread]