guix-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[shepherd] 02/04: shepherd: In the non-signalfd case, do not run handler


From: Ludovic Courtès
Subject: [shepherd] 02/04: shepherd: In the non-signalfd case, do not run handlers from asyncs.
Date: Fri, 29 Dec 2023 18:30:25 -0500 (EST)

civodul pushed a commit to branch main
in repository shepherd.

commit 489cbd9be6708f656527b23314d85b96259ca5d8
Author: Ludovic Courtès <ludo@gnu.org>
AuthorDate: Sat Dec 30 00:02:46 2023 +0100

    shepherd: In the non-signalfd case, do not run handlers from asyncs.
    
    Previously, signal handlers would run as asyncs, which could deadlock.
    
    * modules/shepherd.scm (get-u8/timeout): New procedure.
    (run-daemon): When SIGNAL-PORT is false, call ‘pipe2’ and ‘sigaction’
    from here; return a thunk that reads from the pipe.
    (capture-dynamic-state): Remove.
    (main): Remove calls to ‘sigaction’ when SIGNAL-PORT is false.
---
 modules/shepherd.scm | 77 ++++++++++++++++++++++++++++++----------------------
 1 file changed, 45 insertions(+), 32 deletions(-)

diff --git a/modules/shepherd.scm b/modules/shepherd.scm
index c99d133..424c906 100644
--- a/modules/shepherd.scm
+++ b/modules/shepherd.scm
@@ -37,6 +37,12 @@
   #:use-module (shepherd system)
   #:use-module (shepherd args)
   #:use-module (shepherd comm)
+  #:autoload   (ice-9 binary-ports) (put-u8 get-u8)
+  #:autoload   (fibers operations) (choice-operation
+                                    perform-operation
+                                    wrap-operation)
+  #:autoload   (fibers io-wakeup) (wait-until-port-readable-operation)
+  #:autoload   (fibers timers) (sleep-operation)
   #:export (main))
 
 
@@ -197,6 +203,16 @@ using fallback mechanism."))
         (lambda (key . args)                      ;for Guile 2.2
           (handle-key-and-args-exception key args))))))
 
+(define (get-u8/timeout port timeout)
+  "Read a byte from @var{port}; return that byte, or the EOF object, or
+@code{'timeout} if @var{timeout} has expired and nothing was read."
+  (perform-operation
+   (choice-operation (wrap-operation (wait-until-port-readable-operation port)
+                                     (lambda ()
+                                       (get-u8 port)))
+                     (wrap-operation (sleep-operation timeout)
+                                     (const 'timeout)))))
+
 (define* (run-daemon #:key (config-file (default-config-file))
                      socket-file pid-file signal-port poll-services?)
   (define (signal-thunk signal-port)
@@ -206,15 +222,35 @@ using fallback mechanism."))
           (let loop ()
             (handle-signal-port signal-port)
             (loop)))
-        (lambda ()
-          ;; When not using signalfd(2), there's always a time window before
-          ;; 'select' during which a handler async can be queued but not
-          ;; executed.  Work around it by exiting 'select' every few seconds.
-          (let loop ()
-            (sleep (if poll-services? 0.5 30))
-            (when poll-services?
-              (check-for-dead-services))
-            (loop)))))
+        ;; When not using signalfd(2), register a signal handler.  The handler
+        ;; cannot safely send a message to, say, the process monitor, because
+        ;; handlers run as "asyncs", which may be called anytime, including
+        ;; possibly when the message recipient's fiber was active, leading to
+        ;; a deadlock.  To address that, the handler writes to a "self pipe";
+        ;; a fiber reads from the pipe and invokes the actual handler from
+        ;; there.
+        (match (pipe2 (logior O_NONBLOCK O_CLOEXEC))
+          ((input . output)
+           (let ((handler (lambda (signal)
+                            (put-u8 output signal))))
+             (for-each (lambda (signal)
+                         (sigaction signal handler SA_NOCLDSTOP))
+                       %precious-signals)
+             (lambda ()
+               ;; There's a time window before blocking on 'epoll_wait' during
+               ;; which a handler async can be queued but not executed.  Work
+               ;; around it by exiting Fibers' 'epoll_wait' call periodically.
+               ;;
+               ;; TODO: Avoid this race condition by having Fibers use
+               ;; 'epoll_waitp' or 'pselect' and run asyncs before blocking.
+               (let loop ()
+                 (match (get-u8/timeout input (if poll-services? 0.5 30))
+                   ('timeout
+                    (when poll-services?
+                      (check-for-dead-services)))
+                   (signal
+                    ((signal-handler signal))))
+                 (loop))))))))
 
   ;; We might have file descriptors inherited from our parent, as well as file
   ;; descriptors wrongfully opened by Guile or Fibers (see
@@ -280,16 +316,6 @@ using fallback mechanism."))
 
            (next-command))))))
 
-(define (capture-dynamic-state proc)
-  (let ((dynamic-state (current-dynamic-state)))
-    ;; (pk 'capturing ((@@ (shepherd service) current-process-monitor)))
-    (lambda args
-      (with-dynamic-state dynamic-state
-                          (lambda ()
-                            ;; (pk 'captured
-                            ;;     ((@@ (shepherd-service) 
current-process-monitor)))
-                            (apply proc args))))))
-
 (define-syntax replace-core-bindings!
   (syntax-rules (<>)
     "Replace the given core bindings in the current process, restoring them 
upon
@@ -472,19 +498,6 @@ fork in the child process."
                (register-services (list root-service))
                (start-service root-service)
 
-               ;; Install signal handlers on systems that don't support
-               ;; signalfd(2).  Capture the current dynamic state so that
-               ;; 'current-process-monitor' & co. have the right value when
-               ;; the handler is invoked.
-               (unless signal-port
-                 (for-each (lambda (signal)
-                             (sigaction signal
-                               (capture-dynamic-state (signal-handler 
signal))))
-                           (delete SIGCHLD %precious-signals))
-
-                 (sigaction SIGCHLD (capture-dynamic-state handle-SIGCHLD)
-                            SA_NOCLDSTOP))
-
                ;; Replace the default 'system*' binding with one that
                ;; cooperates instead of blocking on 'waitpid'.  Replace
                ;; 'primitive-load' (in C as of 3.0.9) with one that does



reply via email to

[Prev in Thread] Current Thread [Next in Thread]