[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[shepherd] 02/04: shepherd: In the non-signalfd case, do not run handler
From: |
Ludovic Courtès |
Subject: |
[shepherd] 02/04: shepherd: In the non-signalfd case, do not run handlers from asyncs. |
Date: |
Fri, 29 Dec 2023 18:30:25 -0500 (EST) |
civodul pushed a commit to branch main
in repository shepherd.
commit 489cbd9be6708f656527b23314d85b96259ca5d8
Author: Ludovic Courtès <ludo@gnu.org>
AuthorDate: Sat Dec 30 00:02:46 2023 +0100
shepherd: In the non-signalfd case, do not run handlers from asyncs.
Previously, signal handlers would run as asyncs, which could deadlock.
* modules/shepherd.scm (get-u8/timeout): New procedure.
(run-daemon): When SIGNAL-PORT is false, call ‘pipe2’ and ‘sigaction’
from here; return a thunk that reads from the pipe.
(capture-dynamic-state): Remove.
(main): Remove calls to ‘sigaction’ when SIGNAL-PORT is false.
---
modules/shepherd.scm | 77 ++++++++++++++++++++++++++++++----------------------
1 file changed, 45 insertions(+), 32 deletions(-)
diff --git a/modules/shepherd.scm b/modules/shepherd.scm
index c99d133..424c906 100644
--- a/modules/shepherd.scm
+++ b/modules/shepherd.scm
@@ -37,6 +37,12 @@
#:use-module (shepherd system)
#:use-module (shepherd args)
#:use-module (shepherd comm)
+ #:autoload (ice-9 binary-ports) (put-u8 get-u8)
+ #:autoload (fibers operations) (choice-operation
+ perform-operation
+ wrap-operation)
+ #:autoload (fibers io-wakeup) (wait-until-port-readable-operation)
+ #:autoload (fibers timers) (sleep-operation)
#:export (main))
@@ -197,6 +203,16 @@ using fallback mechanism."))
(lambda (key . args) ;for Guile 2.2
(handle-key-and-args-exception key args))))))
+(define (get-u8/timeout port timeout)
+ "Read a byte from @var{port}; return that byte, or the EOF object, or
+@code{'timeout} if @var{timeout} has expired and nothing was read."
+ (perform-operation
+ (choice-operation (wrap-operation (wait-until-port-readable-operation port)
+ (lambda ()
+ (get-u8 port)))
+ (wrap-operation (sleep-operation timeout)
+ (const 'timeout)))))
+
(define* (run-daemon #:key (config-file (default-config-file))
socket-file pid-file signal-port poll-services?)
(define (signal-thunk signal-port)
@@ -206,15 +222,35 @@ using fallback mechanism."))
(let loop ()
(handle-signal-port signal-port)
(loop)))
- (lambda ()
- ;; When not using signalfd(2), there's always a time window before
- ;; 'select' during which a handler async can be queued but not
- ;; executed. Work around it by exiting 'select' every few seconds.
- (let loop ()
- (sleep (if poll-services? 0.5 30))
- (when poll-services?
- (check-for-dead-services))
- (loop)))))
+ ;; When not using signalfd(2), register a signal handler. The handler
+ ;; cannot safely send a message to, say, the process monitor, because
+ ;; handlers run as "asyncs", which may be called anytime, including
+ ;; possibly when the message recipient's fiber was active, leading to
+ ;; a deadlock. To address that, the handler writes to a "self pipe";
+ ;; a fiber reads from the pipe and invokes the actual handler from
+ ;; there.
+ (match (pipe2 (logior O_NONBLOCK O_CLOEXEC))
+ ((input . output)
+ (let ((handler (lambda (signal)
+ (put-u8 output signal))))
+ (for-each (lambda (signal)
+ (sigaction signal handler SA_NOCLDSTOP))
+ %precious-signals)
+ (lambda ()
+ ;; There's a time window before blocking on 'epoll_wait' during
+ ;; which a handler async can be queued but not executed. Work
+ ;; around it by exiting Fibers' 'epoll_wait' call periodically.
+ ;;
+ ;; TODO: Avoid this race condition by having Fibers use
+ ;; 'epoll_waitp' or 'pselect' and run asyncs before blocking.
+ (let loop ()
+ (match (get-u8/timeout input (if poll-services? 0.5 30))
+ ('timeout
+ (when poll-services?
+ (check-for-dead-services)))
+ (signal
+ ((signal-handler signal))))
+ (loop))))))))
;; We might have file descriptors inherited from our parent, as well as file
;; descriptors wrongfully opened by Guile or Fibers (see
@@ -280,16 +316,6 @@ using fallback mechanism."))
(next-command))))))
-(define (capture-dynamic-state proc)
- (let ((dynamic-state (current-dynamic-state)))
- ;; (pk 'capturing ((@@ (shepherd service) current-process-monitor)))
- (lambda args
- (with-dynamic-state dynamic-state
- (lambda ()
- ;; (pk 'captured
- ;; ((@@ (shepherd-service)
current-process-monitor)))
- (apply proc args))))))
-
(define-syntax replace-core-bindings!
(syntax-rules (<>)
"Replace the given core bindings in the current process, restoring them
upon
@@ -472,19 +498,6 @@ fork in the child process."
(register-services (list root-service))
(start-service root-service)
- ;; Install signal handlers on systems that don't support
- ;; signalfd(2). Capture the current dynamic state so that
- ;; 'current-process-monitor' & co. have the right value when
- ;; the handler is invoked.
- (unless signal-port
- (for-each (lambda (signal)
- (sigaction signal
- (capture-dynamic-state (signal-handler
signal))))
- (delete SIGCHLD %precious-signals))
-
- (sigaction SIGCHLD (capture-dynamic-state handle-SIGCHLD)
- SA_NOCLDSTOP))
-
;; Replace the default 'system*' binding with one that
;; cooperates instead of blocking on 'waitpid'. Replace
;; 'primitive-load' (in C as of 3.0.9) with one that does