guix-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

02/02: Re-write insert-derivation-inputs in a more memory efficient mano


From: Christopher Baines
Subject: 02/02: Re-write insert-derivation-inputs in a more memory efficient manor
Date: Wed, 12 Jan 2022 19:22:52 -0500 (EST)

cbaines pushed a commit to branch master
in repository data-service.

commit 21cb33a859a25ac6ba82f32e014ea642e2e62afc
Author: Christopher Baines <mail@cbaines.net>
AuthorDate: Wed Jan 12 18:18:15 2022 +0000

    Re-write insert-derivation-inputs in a more memory efficient manor
    
    Previously it would compute a long list of strings, potentially more than
    100,000 elements long, then split this string up and insert it in chunks. 
Only
    then could memory be freed.
    
    This new approach builds the strings in batches for the insertion query, 
then
    moves on to the next batch. This should mean that more memory can be freed 
and
    reused along the way.
---
 guix-data-service/model/derivation.scm | 73 ++++++++++++++++++----------------
 guix-data-service/utils.scm            | 28 ++++++++++++-
 2 files changed, 65 insertions(+), 36 deletions(-)

diff --git a/guix-data-service/model/derivation.scm 
b/guix-data-service/model/derivation.scm
index 208bee6..9b88fc1 100644
--- a/guix-data-service/model/derivation.scm
+++ b/guix-data-service/model/derivation.scm
@@ -1329,48 +1329,51 @@ WHERE derivation_source_files.store_path = $1"
         #f)))
 
 (define (insert-derivation-inputs conn derivation-ids derivations)
-  (let ((data
-         (append-map
-          (lambda (derivation-id derivation)
-            (append-map
-             (match-lambda
-               (($ <derivation-input> derivation-or-path sub-derivations)
-                (let ((path
-                       (match derivation-or-path
-                         ((? derivation? d)
-                          ;; The first field changed to a derivation (from the 
file
-                          ;; name) in 5cf4b26d52bcea382d98fb4becce89be9ee37b55
-                          (derivation-file-name d))
-                         ((? string? s)
-                          s))))
-                  (map (lambda (sub-derivation)
-                         (string-append "("
-                                        (number->string derivation-id)
-                                        ", '" path
-                                        "', '" sub-derivation "')"))
-                       sub-derivations))))
-             (derivation-inputs derivation)))
-          derivation-ids
-          derivations)))
-
-    (unless (null? data)
-      (for-each
-       (lambda (chunk)
-         (exec-query
-          conn
-          (string-append
-           "
+  (define (process-chunk derivation-ids derivations)
+    (let ((query-parts
+           (append-map!
+            (lambda (derivation-id derivation)
+              (append-map!
+               (match-lambda
+                 (($ <derivation-input> derivation-or-path sub-derivations)
+                  (let ((path
+                         (match derivation-or-path
+                           ((? derivation? d)
+                            ;; The first field changed to a derivation (from 
the file
+                            ;; name) in 
5cf4b26d52bcea382d98fb4becce89be9ee37b55
+                            (derivation-file-name d))
+                           ((? string? s)
+                            s))))
+                    (map (lambda (sub-derivation)
+                           (string-append "("
+                                          (number->string derivation-id)
+                                          ", '" path
+                                          "', '" sub-derivation "')"))
+                         sub-derivations))))
+               (derivation-inputs derivation)))
+            derivation-ids
+            derivations)))
+
+      (unless (null? query-parts)
+        (exec-query
+         conn
+         (string-append
+          "
 INSERT INTO derivation_inputs (derivation_id, derivation_output_id)
 SELECT vals.derivation_id, derivation_outputs.id
 FROM (VALUES "
-           (string-join chunk ", ")
-           ") AS vals (derivation_id, file_name, output_name)
+          (string-join query-parts ", ")
+          ") AS vals (derivation_id, file_name, output_name)
 INNER JOIN derivations
   ON derivations.file_name = vals.file_name
 INNER JOIN derivation_outputs
   ON derivation_outputs.derivation_id = derivations.id
- AND vals.output_name = derivation_outputs.name")))
-       (chunk! data 1000)))))
+ AND vals.output_name = derivation_outputs.name")))))
+
+  (chunk-map! process-chunk
+              1000
+              (list-copy derivation-ids)
+              (list-copy derivations)))
 
 (define (select-from-derivation-source-files store-paths)
   (string-append
diff --git a/guix-data-service/utils.scm b/guix-data-service/utils.scm
index 4f66c9c..483f3ee 100644
--- a/guix-data-service/utils.scm
+++ b/guix-data-service/utils.scm
@@ -32,7 +32,8 @@
             letpar&
 
             chunk
-            chunk!))
+            chunk!
+            chunk-map!))
 
 (define (call-with-time-logging action thunk)
   (simple-format #t "debug: Starting ~A\n" action)
@@ -175,3 +176,28 @@
           (cons first-lst
                 (chunk! rest max-length))))
       (list lst)))
+
+(define* (chunk-map! proc chunk-size #:rest lsts)
+  (define (do-one-iteration lsts)
+    (if (> (length (car lsts))
+           chunk-size)
+        (let ((chunks-and-rest
+               (map (lambda (lst)
+                      (call-with-values (lambda ()
+                                          (split-at! lst chunk-size))
+                        (lambda (first-lst rest)
+                          (cons first-lst
+                                rest))))
+                    lsts)))
+          (apply proc
+                 (map car chunks-and-rest))
+          (do-one-iteration
+           (map cdr chunks-and-rest)))
+        (apply proc lsts)))
+
+  (unless (eq? 1
+               (length (delete-duplicates
+                        (map length lsts))))
+    (error "lists not equal length"))
+
+  (do-one-iteration lsts))



reply via email to

[Prev in Thread] Current Thread [Next in Thread]