[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
scratch/hash-table-perf e69035c6ef5 17/37: Leaner hash table dumping and
From: |
Mattias Engdegård |
Subject: |
scratch/hash-table-perf e69035c6ef5 17/37: Leaner hash table dumping and thawing |
Date: |
Sun, 7 Jan 2024 12:41:13 -0500 (EST) |
branch: scratch/hash-table-perf
commit e69035c6ef5399709a7f13b7bf3b3e3152d211ad
Author: Mattias Engdegård <mattiase@acm.org>
Commit: Mattias Engdegård <mattiase@acm.org>
Leaner hash table dumping and thawing
Only dump the actual data, and the test encoded as an enum. This
simplifies dumping, makes dump files smaller and saves space at run
time.
* src/lisp.h (hash_table_std_test_t): New enum.
(struct Lisp_Hash_Table): Add frozen_test member, consuming no extra space.
* src/fns.c (hashfn_user_defined): Now static.
(hash_table_test_from_std): New.
(hash_table_rehash): Rename to...
(hash_table_thaw): ...this and rewrite.
* src/pdumper.c (hash_table_contents): Only include actual data, not
unused space.
(hash_table_std_test): New.
(hash_table_freeze): Set frozen_test from test.
(dump_hash_table): Dump frozen_test, not the whole test struct.
Don't bother other dumping fields that can be derived.
---
src/fns.c | 53 ++++++++++++++++++++++++++++++-------------------
src/lisp.h | 12 ++++++++++--
src/pdumper.c | 63 +++++++++++++++++++++++++----------------------------------
3 files changed, 70 insertions(+), 58 deletions(-)
diff --git a/src/fns.c b/src/fns.c
index f9f41d1ead1..db2b5fdb5cc 100644
--- a/src/fns.c
+++ b/src/fns.c
@@ -4474,7 +4474,7 @@ hashfn_eql (Lisp_Object key, struct Lisp_Hash_Table *h)
/* Given H, return a hash code for KEY which uses a user-defined
function to compare keys. */
-Lisp_Object
+static Lisp_Object
hashfn_user_defined (Lisp_Object key, struct Lisp_Hash_Table *h)
{
Lisp_Object args[] = { h->test.user_hash_function, key };
@@ -4638,11 +4638,10 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h)
if (h->next_free < 0)
{
ptrdiff_t old_size = HASH_TABLE_SIZE (h);
- EMACS_INT new_size;
-
- double float_new_size = old_size * std_rehash_size;
- if (float_new_size < EMACS_INT_MAX)
- new_size = float_new_size;
+ /* FIXME: better growth management, ditch std_rehash_size */
+ EMACS_INT new_size = old_size * std_rehash_size;
+ if (new_size < EMACS_INT_MAX)
+ new_size = max (new_size, 32); /* avoid slow initial growth */
else
new_size = EMACS_INT_MAX;
if (PTRDIFF_MAX < new_size)
@@ -4691,20 +4690,39 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h)
}
}
-/* Recompute the hashes (and hence also the "next" pointers).
- Normally there's never a need to recompute hashes.
- This is done only on first access to a hash-table loaded from
- the "pdump", because the objects' addresses may have changed, thus
- affecting their hashes. */
+static const struct hash_table_test *
+hash_table_test_from_std (hash_table_std_test_t test)
+{
+ switch (test)
+ {
+ case Test_eq: return &hashtest_eq;
+ case Test_eql: return &hashtest_eql;
+ case Test_equal: return &hashtest_equal;
+ }
+ emacs_abort();
+}
+
+/* Rebuild a hash table from its frozen (dumped) form. */
void
-hash_table_rehash (Lisp_Object hash)
+hash_table_thaw (Lisp_Object hash_table)
{
- struct Lisp_Hash_Table *h = XHASH_TABLE (hash);
- ptrdiff_t i, count = h->count;
+ struct Lisp_Hash_Table *h = XHASH_TABLE (hash_table);
+
+ /* Freezing discarded most non-essential information; recompute it.
+ The allocation is minimal with no room for growth. */
+ h->test = *hash_table_test_from_std (h->frozen_test);
+ ptrdiff_t size = ASIZE (h->key_and_value) / 2;
+ h->count = size;
+ ptrdiff_t index_size = hash_index_size (size);
+ h->next_free = -1;
+
+ h->hash = make_nil_vector (size);
+ h->next = make_vector (size, make_fixnum (-1));
+ h->index = make_vector (index_size, make_fixnum (-1));
/* Recompute the actual hash codes for each entry in the table.
Order is still invalid. */
- for (i = 0; i < count; i++)
+ for (ptrdiff_t i = 0; i < size; i++)
{
Lisp_Object key = HASH_KEY (h, i);
Lisp_Object hash_code = hash_from_key (h, key);
@@ -4712,12 +4730,7 @@ hash_table_rehash (Lisp_Object hash)
set_hash_hash_slot (h, i, hash_code);
set_hash_next_slot (h, i, HASH_INDEX (h, start_of_bucket));
set_hash_index_slot (h, start_of_bucket, i);
- eassert (HASH_NEXT (h, i) != i); /* Stop loops. */
}
-
- ptrdiff_t size = ASIZE (h->next);
- for (; i + 1 < size; i++)
- set_hash_next_slot (h, i, i + 1);
}
/* Lookup KEY in hash table H. If HASH is non-null, return in *HASH
diff --git a/src/lisp.h b/src/lisp.h
index ee87b9567b0..1f3220c0179 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -2385,6 +2385,12 @@ INLINE int
struct Lisp_Hash_Table;
+typedef enum {
+ Test_eql,
+ Test_eq,
+ Test_equal,
+} hash_table_std_test_t;
+
struct hash_table_test
{
/* Function used to compare keys; always a bare symbol. */
@@ -2451,6 +2457,9 @@ struct Lisp_Hash_Table
/* Weakness of the table. */
hash_table_weakness_t weakness : 8;
+ /* Hash table test (only used when frozen in dump) */
+ hash_table_std_test_t frozen_test : 8;
+
/* True if the table can be purecopied. The table cannot be
changed afterwards. */
bool purecopy;
@@ -2541,7 +2550,7 @@ hash_from_key (struct Lisp_Hash_Table *h, Lisp_Object key)
return h->test.hashfn (key, h);
}
-void hash_table_rehash (Lisp_Object);
+void hash_table_thaw (Lisp_Object hash_table);
/* Default size for hash tables if not specified. */
@@ -3983,7 +3992,6 @@ extern void hexbuf_digest (char *, void const *, int);
extern char *extract_data_from_object (Lisp_Object, ptrdiff_t *, ptrdiff_t *);
EMACS_UINT hash_string (char const *, ptrdiff_t);
EMACS_UINT sxhash (Lisp_Object);
-Lisp_Object hashfn_user_defined (Lisp_Object, struct Lisp_Hash_Table *);
Lisp_Object make_hash_table (struct hash_table_test, EMACS_INT,
hash_table_weakness_t, bool);
Lisp_Object hash_table_weakness_symbol (hash_table_weakness_t weak);
diff --git a/src/pdumper.c b/src/pdumper.c
index 93ecd19a6ca..d9f24ce1a9e 100644
--- a/src/pdumper.c
+++ b/src/pdumper.c
@@ -2646,34 +2646,26 @@ dump_vectorlike_generic (struct dump_context *ctx,
return offset;
}
-/* Return a vector of KEY, VALUE pairs in the given hash table H. The
- first H->count pairs are valid, and the rest are unbound. */
+/* Return a vector of KEY, VALUE pairs in the given hash table H.
+ No room for growth is included. */
static Lisp_Object
hash_table_contents (struct Lisp_Hash_Table *h)
{
- if (h->test.hashfn == hashfn_user_defined)
- error ("cannot dump hash tables with user-defined tests"); /* Bug#36769 */
-
- ptrdiff_t size = HASH_TABLE_SIZE (h);
+ ptrdiff_t old_size = HASH_TABLE_SIZE (h);
+ ptrdiff_t size = h->count;
Lisp_Object key_and_value = make_uninit_vector (2 * size);
ptrdiff_t n = 0;
/* Make sure key_and_value ends up in the same order; charset.c
relies on it by expecting hash table indices to stay constant
across the dump. */
- for (ptrdiff_t i = 0; i < size; i++)
+ for (ptrdiff_t i = 0; i < old_size; i++)
if (!NILP (HASH_HASH (h, i)))
{
ASET (key_and_value, n++, HASH_KEY (h, i));
ASET (key_and_value, n++, HASH_VALUE (h, i));
}
- while (n < 2 * size)
- {
- ASET (key_and_value, n++, Qunbound);
- ASET (key_and_value, n++, Qnil);
- }
-
return key_and_value;
}
@@ -2686,25 +2678,32 @@ dump_hash_table_list (struct dump_context *ctx)
return 0;
}
-static void
-hash_table_freeze (struct Lisp_Hash_Table *h)
+static hash_table_std_test_t
+hash_table_std_test (const struct hash_table_test *t)
{
- ptrdiff_t npairs = ASIZE (h->key_and_value) / 2;
- h->key_and_value = hash_table_contents (h);
- h->next = h->hash = make_fixnum (npairs);
- h->index = make_fixnum (ASIZE (h->index));
- h->next_free = (npairs == h->count ? -1 : h->count);
+ if (BASE_EQ (t->name, Qeq))
+ return Test_eq;
+ if (BASE_EQ (t->name, Qeql))
+ return Test_eql;
+ if (BASE_EQ (t->name, Qequal))
+ return Test_equal;
+ error ("cannot dump hash tables with user-defined tests"); /* Bug#36769 */
}
+/* Compact contents and discard inessential information from a hash table,
+ preparing it for dumping.
+ See `hash_table_thaw' for the code that restores the object to a usable
+ state. */
static void
-hash_table_thaw (Lisp_Object hash)
+hash_table_freeze (struct Lisp_Hash_Table *h)
{
- struct Lisp_Hash_Table *h = XHASH_TABLE (hash);
- h->hash = make_nil_vector (XFIXNUM (h->hash));
- h->next = Fmake_vector (h->next, make_fixnum (-1));
- h->index = Fmake_vector (h->index, make_fixnum (-1));
-
- hash_table_rehash (hash);
+ h->key_and_value = hash_table_contents (h);
+ eassert (ASIZE (h->key_and_value) == h->count * 2);
+ h->next = Qnil;
+ h->hash = Qnil;
+ h->index = Qnil;
+ h->count = 0;
+ h->frozen_test = hash_table_std_test (&h->test);
}
static dump_off
@@ -2724,19 +2723,11 @@ dump_hash_table (struct dump_context *ctx, Lisp_Object
object)
dump_pseudovector_lisp_fields (ctx, &out->header, &hash->header);
/* TODO: dump the hash bucket vectors synchronously here to keep
them as close to the hash table as possible. */
- DUMP_FIELD_COPY (out, hash, count);
- DUMP_FIELD_COPY (out, hash, next_free);
DUMP_FIELD_COPY (out, hash, weakness);
DUMP_FIELD_COPY (out, hash, purecopy);
DUMP_FIELD_COPY (out, hash, mutable);
+ DUMP_FIELD_COPY (out, hash, frozen_test);
dump_field_lv (ctx, out, hash, &hash->key_and_value, WEIGHT_STRONG);
- dump_field_lv (ctx, out, hash, &hash->test.name, WEIGHT_STRONG);
- dump_field_lv (ctx, out, hash, &hash->test.user_hash_function,
- WEIGHT_STRONG);
- dump_field_lv (ctx, out, hash, &hash->test.user_cmp_function,
- WEIGHT_STRONG);
- dump_field_emacs_ptr (ctx, out, hash, &hash->test.cmpfn);
- dump_field_emacs_ptr (ctx, out, hash, &hash->test.hashfn);
eassert (hash->next_weak == NULL);
return finish_dump_pvec (ctx, &out->header);
}
- scratch/hash-table-perf 594152bf667 01/37: Add internal hash-table debug functions, (continued)
- scratch/hash-table-perf 594152bf667 01/37: Add internal hash-table debug functions, Mattias Engdegård, 2024/01/07
- scratch/hash-table-perf 3d2042c48a6 07/37: Refactor: extract hash index computation to a function, Mattias Engdegård, 2024/01/07
- scratch/hash-table-perf 9141966be51 09/37: ; * src/alloc.c (purecopy_hash_table): Simplify, Mattias Engdegård, 2024/01/07
- scratch/hash-table-perf 31950946290 04/37: Refactor: less egregious layering violation in composite.h, Mattias Engdegård, 2024/01/07
- scratch/hash-table-perf 4e8d7725fd4 11/37: ; * src/fns.c (Fmake_hash_table): ensure `test` is a bare symbol, Mattias Engdegård, 2024/01/07
- scratch/hash-table-perf 4b5d9f92abe 13/37: * src/print.c (print_object): Don't print empty hash-table data, Mattias Engdegård, 2024/01/07
- scratch/hash-table-perf 3e9e68333ae 16/37: Remove rehash-threshold and rehash-size struct members, Mattias Engdegård, 2024/01/07
- scratch/hash-table-perf c188b9f2bf5 08/37: Refactor hash table vector reallocation, Mattias Engdegård, 2024/01/07
- scratch/hash-table-perf fdc390f8dc0 10/37: Abstract predicate and constant for unused hash keys, Mattias Engdegård, 2024/01/07
- scratch/hash-table-perf 6ffbccbf1dd 15/37: Represent hash table weakness as an enum internally, Mattias Engdegård, 2024/01/07
- scratch/hash-table-perf e69035c6ef5 17/37: Leaner hash table dumping and thawing,
Mattias Engdegård <=
- scratch/hash-table-perf f3e985a16ba 14/37: Don't print or read the hash table size parameter, Mattias Engdegård, 2024/01/07
- scratch/hash-table-perf e2a6ce36d83 03/37: Decouple profiler from Lisp hash table internals, Mattias Engdegård, 2024/01/07
- scratch/hash-table-perf c4df6041de8 12/37: * src/print.c (print_object): Don't print hash table test if `eql`., Mattias Engdegård, 2024/01/07
- scratch/hash-table-perf 2d28042f56a 19/37: Use non-Lisp allocation for internal hash-table vectors, Mattias Engdegård, 2024/01/07
- scratch/hash-table-perf 54807fee4d0 23/37: Use hash_idx_t for storing hash indices, Mattias Engdegård, 2024/01/07
- scratch/hash-table-perf fc68176120f 24/37: Use hash_hash_t for storing hash values, Mattias Engdegård, 2024/01/07
- scratch/hash-table-perf 310f6584ccb 18/37: Allow zero hash table size, Mattias Engdegård, 2024/01/07
- scratch/hash-table-perf 8e80d1930e3 20/37: Store hash values as EMACS_UINT instead of Lisp_Object, Mattias Engdegård, 2024/01/07
- scratch/hash-table-perf 1ebd00f6d0a 21/37: Retype hash interfaces to use EMACS_UINT instead of Lisp fixnum, Mattias Engdegård, 2024/01/07
- scratch/hash-table-perf 8335891387a 22/37: Inlined and specialised hash table look-up, Mattias Engdegård, 2024/01/07