[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[shepherd] 11/13: service: Record time of last startup failures.
From: |
Ludovic Courtès |
Subject: |
[shepherd] 11/13: service: Record time of last startup failures. |
Date: |
Sun, 16 Apr 2023 17:38:37 -0400 (EDT) |
civodul pushed a commit to branch master
in repository shepherd.
commit 1cda4c3bc0829d8bf2d46e7bb69e0432a3b89cac
Author: Ludovic Courtès <ludo@gnu.org>
AuthorDate: Sun Apr 16 22:26:23 2023 +0200
service: Record time of last startup failures.
* modules/shepherd/service.scm (%max-recorded-startup-failures): New
variable.
(service-controller): Add 'failures' variable. Handle
'startup-failures' messages. In 'started-message?' and in
'stopped-message?' clauses, updates FAILURES.
(service-startup-failures): New procedure.
(service->sexp): Add 'startup-failures' field.
* modules/shepherd/support.scm (at-most): New procedure.
* tests/status-sexp.sh: Adjust accordingly.
---
modules/shepherd/service.scm | 21 +++++++++++++++++++--
modules/shepherd/support.scm | 15 +++++++++++++++
tests/status-sexp.sh | 5 +++--
3 files changed, 37 insertions(+), 4 deletions(-)
diff --git a/modules/shepherd/service.scm b/modules/shepherd/service.scm
index c24d404..96d2a71 100644
--- a/modules/shepherd/service.scm
+++ b/modules/shepherd/service.scm
@@ -350,6 +350,10 @@ denoting what the service provides."
(service-controller service channel))))
channel))
+(define %max-recorded-startup-failures
+ ;; Maximum number of service startup failures that are recorded.
+ 10)
+
(define (service-controller service channel)
"Encapsulate @var{service} state and serve requests arriving on
@var{channel}."
@@ -368,6 +372,7 @@ denoting what the service provides."
(value #f)
(condition #f)
(enabled? #t)
+ (failures '())
(respawns '())
(replacement #f))
(match (get-message channel)
@@ -383,6 +388,9 @@ denoting what the service provides."
(('respawn-times reply)
(put-message reply respawns)
(loop))
+ (('startup-failures reply)
+ (put-message reply failures)
+ (loop))
('enable ;no reply
(loop (enabled? #t)))
@@ -444,7 +452,11 @@ denoting what the service provides."
'running
'stopped))
(value (and (not (one-shot-service? service)) new-value))
- (condition #f))))
+ (condition #f)
+ (failures (if new-value
+ failures
+ (at-most %max-recorded-startup-failures
+ (cons (current-time) failures)))))))
(((? change-value-message?) new-value)
(local-output (l10n "Running value of service ~a changed to ~s.")
@@ -497,7 +509,7 @@ denoting what the service provides."
(service-canonical-name service))
(signal-condition! condition)
(loop (status 'stopped) (value #f) (condition #f)
- (respawns '())))
+ (respawns '()) (failures '())))
('notify-termination ;no reply
(loop (status 'stopped) (value #f)))
@@ -628,6 +640,10 @@ channel and wait for its reply."
;; Return the list of respawn times of @var{service}.
(service-control-message 'respawn-times))
+(define service-startup-failures
+ ;; Return the list of recent startup failure times for @var{service}.
+ (service-control-message 'startup-failures))
+
(define service-enabled?
;; Return true if @var{service} is enabled, false otherwise.
(service-control-message 'enabled?))
@@ -965,6 +981,7 @@ clients."
(running ,(result->sexp (service-running-value service)))
(conflicts ()) ;deprecated
(last-respawns ,(service-respawn-times service))
+ (startup-failures ,(service-startup-failures service))
(status ,(service-status service))
,@(if (one-shot-service? service)
'((one-shot? #t))
diff --git a/modules/shepherd/support.scm b/modules/shepherd/support.scm
index a298c36..2cf6aae 100644
--- a/modules/shepherd/support.scm
+++ b/modules/shepherd/support.scm
@@ -27,6 +27,7 @@
#:export (caught-error
assert
let-loop
+ at-most
buffering
catch-system-error
@@ -124,6 +125,20 @@ That reduces the amount of boilerplate for loops with many
variables."
...)))))
body ...)))
+(define (at-most max-length lst)
+ "If @var{lst} is shorter than @var{max-length}, return it and the empty list;
+otherwise return its @var{max-length} first elements and its tail."
+ (let loop ((len 0)
+ (lst lst)
+ (result '()))
+ (match lst
+ (()
+ (values (reverse result) '()))
+ ((head . tail)
+ (if (>= len max-length)
+ (values (reverse result) lst)
+ (loop (+ 1 len) tail (cons head result)))))))
+
(define (buffering port type . args)
"Return PORT after changing its buffering to TYPE and ARGS."
(apply setvbuf port type args)
diff --git a/tests/status-sexp.sh b/tests/status-sexp.sh
index c82e10a..fd019b3 100644
--- a/tests/status-sexp.sh
+++ b/tests/status-sexp.sh
@@ -72,6 +72,7 @@ root_service_sexp="
(respawn? #f)
(docstring \"The root service is used to operate on shepherd itself.\")
(enabled? #t) (running #t) (conflicts ()) (last-respawns ())
+ (startup-failures ())
(status running))"
"$GUILE" -c "
@@ -87,13 +88,13 @@ root_service_sexp="
(provides (foo)) (requires ())
(respawn? #t) (docstring \"Foo!\")
(enabled? #t) (running abc) (conflicts ())
- (last-respawns ())
+ (last-respawns ()) (startup-failures ())
(status running))
(service (version 0)
(provides (bar)) (requires (foo))
(respawn? #f) (docstring \"Bar!\")
(enabled? #t) (running #f) (conflicts ())
- (last-respawns ())
+ (last-respawns ()) (startup-failures ())
(status stopped)))))))
"
- [shepherd] branch master updated (353a91b -> fbca4e2), Ludovic Courtès, 2023/04/16
- [shepherd] 08/13: shepherd: Factorize command message gathering., Ludovic Courtès, 2023/04/16
- [shepherd] 02/13: monitoring: Log registered service names., Ludovic Courtès, 2023/04/16
- [shepherd] 03/13: service: Turn 'doc' method into a procedure., Ludovic Courtès, 2023/04/16
- [shepherd] 04/13: service: Turn 'action' method into a procedure., Ludovic Courtès, 2023/04/16
- [shepherd] 11/13: service: Record time of last startup failures.,
Ludovic Courtès <=
- [shepherd] 09/13: comm: Capture the client command protocol version., Ludovic Courtès, 2023/04/16
- [shepherd] 07/13: service: 'stop-service' returns the list of stopped services, not names., Ludovic Courtès, 2023/04/16
- [shepherd] 12/13: tests: Remove reference to non-existent file., Ludovic Courtès, 2023/04/16
- [shepherd] 01/13: service: Use 'lookup-service' instead of 'lookup-services'., Ludovic Courtès, 2023/04/16
- [shepherd] 13/13: herd: Report startup failure., Ludovic Courtès, 2023/04/16
- [shepherd] 06/13: service: Turn 'stop' method into a procedure., Ludovic Courtès, 2023/04/16
- [shepherd] 05/13: service: Turn 'start' method into a procedure., Ludovic Courtès, 2023/04/16
- [shepherd] 10/13: Add missing 'l10n' calls., Ludovic Courtès, 2023/04/16