help-smalltalk
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Help-smalltalk] faster startup 8/n, copy-on-write


From: Paolo Bonzini
Subject: [Help-smalltalk] faster startup 8/n, copy-on-write
Date: Fri, 22 Dec 2006 11:33:32 +0100
User-agent: Thunderbird 1.5.0.9 (Macintosh/20061207)

This patch enables GNU Smalltalk to use copy-on-write for the image file whenever possible. It is damn small compared to what it achieves!

Note that most of the previous speedup work was also a prerequisite for this patch, since copy-on-write requires e.g. the OOP table's address to not change between save and load. The speedup achieved is 16ms on my machine, or about 30%.

The additional benefit is that the parts of the image file that are not modified can be shared by all the instances of GNU Smalltalk.

We have now another leader in the profile, that is _gst_intern_string. So, further speedups can be obtained by doing less work to reset the global variables of the VM. For example, we need _gst_hashed_collection_class when creating the classes because it is a superclass of _gst_dictionary_class, but not when reloading the image. Same for the primitive numbers; in some way it should be possible to not look them up. Actually only ~30 symbols, plus the symbols in sym.c, should be looked up (right now we are at ~600 overall).

Paolo
2006-12-22  Paolo Bonzini  <address@hidden>
 
        * libgst/gstpriv.h: Reserve three less bits to runtime flags, add
        F_LOADED.
        * libgst/oop.c: The address of loaded objects never changes, and
        they're always old.  Never compact loaded objects.  Never free them too.
        * libgst/print.c: Print loaded OOPs correctly.
        * libgst/save.c: Define and use buffer_advance.  Try reusing the
        mmap-ed area in load_normal_oops; free buffer in load_normal_oops
        only if copy-on-write is not used.  Mmap all the file in a
        single step, and using MAP_PRIVATE, and only do the "read" method
        in buffer_fill.  Rename use_mmap to buf_used_mmap.  Save objects with
        the old bit set and without the F_RUNTIME flags.


--- orig/libgst/gstpriv.h
+++ mod/libgst/gstpriv.h
@@ -256,7 +256,7 @@ enum {
 
   /* The grouping of all the flags which are not valid across image
      saves and loads.  */
-  F_RUNTIME = 0xFFF000U,
+  F_RUNTIME = 0xFF8000U,
 
   /* Set if the OOP is currently unused.  */
   F_FREE = 0x10U,
@@ -286,6 +286,10 @@ enum {
      and contexts whose receiver is untrusted.  */
   F_UNTRUSTED = 0x800U,
 
+  /* Set for objects that were loaded from the image.  We never
+     garbage collect their contents, only the OOPs.  */
+  F_LOADED = 0x1000U,
+
   /* Set to the number of bytes unused in an object with byte-sized
      instance variables.  Note that this field and the following one
      should be initialized only by INIT_UNALIGNED_OBJECT (not really 


--- orig/libgst/oop.c
+++ mod/libgst/oop.c
@@ -747,18 +747,22 @@ _gst_make_oop_fixed (OOP oop)
   if (oop->flags & F_FIXED)
     return;
 
-  size = SIZE_TO_BYTES (TO_INT(oop->object->objSize));
-  newObj = (gst_object) _gst_mem_alloc (_gst_mem.fixed, size);
-  if (!newObj)
-    abort ();
+  if ((oop->flags & F_LOADED) == 0)
+    {
+      size = SIZE_TO_BYTES (TO_INT(oop->object->objSize));
+      newObj = (gst_object) _gst_mem_alloc (_gst_mem.fixed, size);
+      if (!newObj)
+        abort ();
 
-  memcpy (newObj, oop->object, size);
-  if (oop->flags & F_OLD)
-    _gst_mem_free (_gst_mem.old, oop->object);
-  else
-    _gst_mem.numOldOOPs++;
+      memcpy (newObj, oop->object, size);
+      if ((oop->flags & F_OLD) == 0)
+       _gst_mem.numOldOOPs++;
+      else
+        _gst_mem_free (_gst_mem.old, oop->object);
+
+      oop->object = newObj;
+    }
 
-  oop->object = newObj;
   oop->flags &= ~(F_SPACES | F_POOLED);
   oop->flags |= F_OLD | F_FIXED;
 }
@@ -778,7 +782,6 @@ _gst_tenure_oop (OOP oop)
         abort ();
 
       memcpy (newObj, oop->object, size);
-
       _gst_mem.numOldOOPs++;
 
       oop->object = newObj;
@@ -1051,7 +1054,7 @@ _gst_compact (size_t new_heap_limit)
        oop < &_gst_mem.ot[_gst_mem.ot_size]; oop++)
     {
       PREFETCH_LOOP (oop, PREF_READ | PREF_NTA);
-      if ((oop->flags & (F_OLD | F_FIXED)) == F_OLD)
+      if ((oop->flags & (F_OLD | F_FIXED | F_LOADED)) == F_OLD)
         {
           gst_object new;
           size_t size = SIZE_TO_BYTES (TO_INT (oop->object->objSize));
@@ -1423,14 +1426,16 @@ _gst_sweep_oop (OOP oop)
       _gst_mem.numOldOOPs--;
       stats.reclaimedOldSpaceBytesSinceLastGlobalGC +=
        SIZE_TO_BYTES (TO_INT (OOP_TO_OBJ (oop)->objSize));
-      _gst_mem_free (_gst_mem.fixed, oop->object);
+      if ((oop->flags & F_LOADED) == 0)
+        _gst_mem_free (_gst_mem.fixed, oop->object);
     }
   else if UNCOMMON (oop->flags & F_OLD)
     {
       _gst_mem.numOldOOPs--;
       stats.reclaimedOldSpaceBytesSinceLastGlobalGC +=
        SIZE_TO_BYTES (TO_INT (OOP_TO_OBJ (oop)->objSize));
-      _gst_mem_free (_gst_mem.old, oop->object);
+      if ((oop->flags & F_LOADED) == 0)
+        _gst_mem_free (_gst_mem.old, oop->object);
     }
 
   oop->flags = F_FREE;


--- orig/libgst/print.c
+++ mod/libgst/print.c
@@ -329,9 +329,10 @@ _gst_display_oop_short (OOP oop)
             oop->flags & F_WEAK ? "Weak" :
             oop->flags & F_EPHEMERON ? "Ephemeron" : "",
 
-            oop->flags & _gst_mem.active_flag ? "To-space" :
             oop->flags & F_FIXED ? "Fixed" :
-            oop->flags & F_OLD ? "Old" : "From-space",
+            oop->flags & F_LOADED ? "Permanent" :
+            oop->flags & F_OLD ? "Old" :
+            oop->flags & _gst_mem.active_flag ? "To-space" : "From-space",
 
            IS_EDEN_ADDR (oop->object) ? "Eden" :
            IS_SURVIVOR_ADDR (oop->object, 0) ? "Surv (Even)" :
@@ -359,9 +360,10 @@ _gst_display_oop (OOP oop)
             oop->flags & F_WEAK ? "Weak" :
             oop->flags & F_EPHEMERON ? "Ephemeron" : "",
 
-            oop->flags & _gst_mem.active_flag ? "To-space" :
             oop->flags & F_FIXED ? "Fixed" :
-            oop->flags & F_OLD ? "Old" : "From-space",
+            oop->flags & F_LOADED ? "Permanent" :
+            oop->flags & F_OLD ? "Old" :
+            oop->flags & _gst_mem.active_flag ? "To-space" : "From-space",
 
            IS_EDEN_ADDR (oop->object) ? "Eden" :
            IS_SURVIVOR_ADDR (oop->object, 0) ? "Surv (Even)" :


--- orig/libgst/save.c
+++ mod/libgst/save.c
@@ -132,7 +132,7 @@ static off_t file_size;
 static off_t file_pos;
 
 /* Whether we are using mmap to read the file.  */
-static mst_Boolean use_mmap;
+static mst_Boolean buf_used_mmap;
 
 
 /* This function establishes a buffer of size NUMBYTES for writes.  */
@@ -155,6 +155,12 @@ static void buffer_read_init (int imageF
 /* This function frees the buffer used for reads.  */
 static void buffer_read_free (int imageFd);
 
+/* This function, which only works if memory-mapped I/O is used, advances
+   the buffer pointer by NUMBYTES and returns the pointer to the previous
+   value of the buffer pointer.  */
+static inline PTR buffer_advance (int imageFd,
+                                 int numBytes);
+
 /* This function buffers reads from the image file whose descriptor
    is IMAGEFD.  Memory-mapped I/O is used is possible.  */
 static void buffer_read (int imageFd,
@@ -333,9 +339,9 @@ make_oop_table_to_be_saved (struct save_
     {
       if (IS_OOP_VALID_GC (oop))
        {
-         int numPointers;
-          myOOPTable[i].flags = oop->flags;
-         numPointers = NUM_OOPS (oop->object);
+         int numPointers = NUM_OOPS (oop->object);
+
+          myOOPTable[i].flags = (oop->flags & ~F_RUNTIME) | F_OLD;
 
          /* Cache the number of indexed instance variables.  We prefer
             to do more work upon saving (done once) than upon loading
@@ -437,8 +443,6 @@ _gst_load_from_file (const char *fileNam
   imageFd = _gst_open_file (fileName, "r");
   loaded = (imageFd >= 0) && load_snapshot (imageFd);
 
-  buffer_read_free (imageFd);
-
   close (imageFd);
   return (loaded);
 }
@@ -545,8 +549,11 @@ load_normal_oops (int imageFd)
   OOP oop;
   gst_object object;
   int i;
+  mst_Boolean use_copy_on_write
+    = buf_used_mmap && ~wrong_endianness && ot_delta == 0;
 
-  /* Now walk the oop table.  Start fixing the byte order.  */
+  /* Now walk the oop table.  Load the data (or get the addresses from the
+     mmap-ed area) and fix the byte order.  */
 
   _gst_mem.last_allocated_oop = &_gst_mem.ot[num_used_oops - 1];
   PREFETCH_START (_gst_mem.ot, PREF_WRITE | PREF_NTA);
@@ -570,32 +577,40 @@ load_normal_oops (int imageFd)
         to create new-space objects.  The solution is not however as neat
         as possible.  */
 
-      flags &= ~(F_SPACES | F_POOLED | F_RUNTIME);
-      flags |= F_OLD;
-
       _gst_mem.numOldOOPs++;
       size = sizeof (PTR) * (size_t) oop->object;
-      if (flags & F_FIXED)
+      if (use_copy_on_write)
        {
-         _gst_mem.numFixedOOPs++;
-          object = (gst_object) _gst_mem_alloc (_gst_mem.fixed, size);
+         oop->flags |= F_LOADED;
+         object = buffer_advance (imageFd, size);
        }
-      else
-        object = (gst_object) _gst_mem_alloc (_gst_mem.old, size);
 
-      buffer_read (imageFd, object, size);
-      if UNCOMMON (wrong_endianness)
-       fixup_byte_order (object, 
-                         (oop->flags & F_BYTE)
-                         ? OBJ_HEADER_SIZE_WORDS
-                         : size / sizeof (PTR));
+      else
+       {
+         if (flags & F_FIXED)
+           {
+             _gst_mem.numFixedOOPs++;
+              object = (gst_object) _gst_mem_alloc (_gst_mem.fixed, size);
+           }
+          else
+            object = (gst_object) _gst_mem_alloc (_gst_mem.old, size);
 
-      if (object->objSize != FROM_INT ((size_t) oop->object))
-       abort ();
+          buffer_read (imageFd, object, size);
+          if UNCOMMON (wrong_endianness)
+           fixup_byte_order (object, 
+                             (oop->flags & F_BYTE)
+                             ? OBJ_HEADER_SIZE_WORDS
+                             : size / sizeof (PTR));
+
+         /* Would be nice, but causes us to touch every page and lose most
+            of the startup-time benefits of copy-on-write.  So we only
+            do it in the slow case, anyway.  */
+         if (object->objSize != FROM_INT ((size_t) oop->object))
+           abort ();
+        }
 
       /* Remove flags that are invalid after an image has been loaded.  */
       oop->object = object;
-      oop->flags = flags;
 
       if (flags & F_WEAK)
        _gst_make_oop_weak (oop);
@@ -615,6 +630,9 @@ load_normal_oops (int imageFd)
           classOOP = OOP_ABSOLUTE (object->objClass);
          fixup_byte_order (object->data, CLASS_FIXED_FIELDS (classOOP));
        }
+
+  if (!use_copy_on_write)
+    buffer_read_free (imageFd);
 }
 
 
@@ -775,67 +793,53 @@ void
 buffer_fill (int imageFd)
 {
   buf_pos = 0;
-  if (use_mmap)
-    {
-#ifndef WIN32
-      if (buf)
-       _gst_osmem_free (buf, buf_size);
-
-      buf = mmap (NULL, buf_size, PROT_READ, MAP_SHARED, imageFd, file_pos);
-
-      if (buf != (PTR) -1)
-       {
-#ifdef HAVE_MADVISE
-#ifdef MADV_WILLNEED
-         madvise (buf, buf_size, MADV_WILLNEED);
-#endif
-
-         /* Ahem... this madvise causes a kernel OOPS on my machine!  */
-#if 0
-#ifdef MADV_SEQUENTIAL
-         madvise (buf, buf_size, MADV_SEQUENTIAL);
-#endif
-#endif
-
-#endif /* HAVE_MADVISE */
-         return;
-       }
-#endif /* !WIN32 */
-
-      /* First non-mmaped input operation.  Allocate the buffer.  */
-      buf = xmalloc (buf_size);
-      use_mmap = false;
-    }
-
-  /* Cannot mmap the file, use read(2).  */
   read (imageFd, buf, buf_size);
 }
 
 void
 buffer_read_init (int imageFd, int numBytes)
 {
-  if (numBytes)
+  struct stat st;
+  fstat (imageFd, &st);
+  file_size = st.st_size;
+  file_pos = 0;
+
+#ifndef WIN32
+  buf = mmap (NULL, file_size, PROT_READ, MAP_PRIVATE, imageFd, 0);
+
+  if (buf != (PTR) -1)
     {
-      struct stat st;
-      fstat (imageFd, &st);
-      file_size = st.st_size;
-      file_pos = 0;
-      buf_size = numBytes;
-      use_mmap = true;
-      buf = NULL;
-      buffer_fill (imageFd);
+      buf_size = file_size;
+      buf_used_mmap = true;
+      return;
     }
+#endif /* !WIN32 */
+
+  /* Non-mmaped input.  */
+  buf_used_mmap = false;
+  buf_size = numBytes;
+  buf = xmalloc (buf_size);
+  buffer_fill (imageFd);
 }
 
 void
 buffer_read_free (int imageFd)
 {
-  if (use_mmap)
+  if (buf_used_mmap)
     _gst_osmem_free (buf, buf_size);
   else
     xfree (buf);
 }
 
+PTR
+buffer_advance (int imageFd,
+               int numBytes)
+{
+  PTR current_pos = buf + buf_pos;
+  buf_pos += numBytes;
+  return current_pos;
+}
+
 void
 buffer_read (int imageFd,
             PTR pdata,
@@ -843,15 +847,6 @@ buffer_read (int imageFd,
 {
   char *data = (char *) pdata;
 
-#if 0
-  /* Avoid triggering a SIGBUS.  Unnecessary, and wastes time.  */
-  if UNCOMMON (numBytes > file_size - file_pos)
-    {
-      memzero (data + file_size - file_pos, numBytes - (file_size - file_pos));
-      numBytes = file_size - file_pos;
-    }
-#endif
-
   if UNCOMMON (numBytes > buf_size - buf_pos)
     {
       memcpy (data, buf + buf_pos, buf_size - buf_pos);




reply via email to

[Prev in Thread] Current Thread [Next in Thread]