From 18284f5a1beccae9cce357a1eed7cdf77348cddb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Fri, 20 Dec 2024 14:31:49 +0100 Subject: [PATCH 1/9] core: drop duplicated check in manager_{set,override}_watchdog Those functions call watchdog_setup() and watchdog_setup_pretimeout(), which internally do a similar check against the static variables watchdog_timeout and watchdog_pretimeout. The second check is not useful. --- src/core/manager.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/core/manager.c b/src/core/manager.c index c6c9779976..12c4e65e52 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -3582,9 +3582,6 @@ void manager_set_watchdog(Manager *m, WatchdogType t, usec_t timeout) { if (MANAGER_IS_USER(m)) return; - if (m->watchdog[t] == timeout) - return; - if (m->watchdog_overridden[t] == USEC_INFINITY) { if (t == WATCHDOG_RUNTIME) (void) watchdog_setup(timeout); @@ -3603,9 +3600,6 @@ void manager_override_watchdog(Manager *m, WatchdogType t, usec_t timeout) { if (MANAGER_IS_USER(m)) return; - if (m->watchdog_overridden[t] == timeout) - return; - usec = timeout == USEC_INFINITY ? m->watchdog[t] : timeout; if (t == WATCHDOG_RUNTIME) (void) watchdog_setup(usec); From ab37e9cdb2224e905500f91688c4b37a1de08e08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Fri, 20 Dec 2024 14:51:34 +0100 Subject: [PATCH 2/9] core/watchdog: initialize variables only if they'll be used This is not a hot path, but let's move the variables to the minimal scope in which they are needed. Also, all functions which call watchdog_open() check for fd < 0 first, so change the conditional return into an assert. --- src/shared/watchdog.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/shared/watchdog.c b/src/shared/watchdog.c index edf20edcf9..7ac689c150 100644 --- a/src/shared/watchdog.c +++ b/src/shared/watchdog.c @@ -312,8 +312,7 @@ static int watchdog_open(void) { char **try_order; int r; - if (watchdog_fd >= 0) - return 0; + assert(watchdog_fd < 0); /* Let's prefer new-style /dev/watchdog0 (i.e. kernel 3.5+) over classic /dev/watchdog. The former * has the benefit that we can easily find the matching directory in sysfs from it, as the relevant @@ -446,15 +445,13 @@ usec_t watchdog_runtime_wait(void) { usec_t ntime = now(CLOCK_BOOTTIME); assert(ntime >= watchdog_last_ping); - return usec_sub_unsigned(watchdog_last_ping + (timeout / 2), ntime); + return usec_sub_unsigned(watchdog_last_ping + timeout/2, ntime); } return timeout / 2; } int watchdog_ping(void) { - usec_t ntime, timeout; - if (watchdog_timeout == 0) return 0; @@ -462,14 +459,15 @@ int watchdog_ping(void) { /* open_watchdog() will automatically ping the device for us if necessary */ return watchdog_open(); - ntime = now(CLOCK_BOOTTIME); - timeout = watchdog_calc_timeout(); - /* Never ping earlier than watchdog_timeout/4 and try to ping * by watchdog_timeout/2 plus scheduling latencies at the latest */ if (timestamp_is_set(watchdog_last_ping)) { + usec_t ntime = now(CLOCK_BOOTTIME), + timeout = watchdog_calc_timeout(); + assert(ntime >= watchdog_last_ping); - if ((ntime - watchdog_last_ping) < (timeout / 4)) + + if (ntime - watchdog_last_ping < timeout/4) return 0; } From 839db33687e4751f3dd0b210d8b82339c17e7dd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Fri, 20 Dec 2024 15:02:16 +0100 Subject: [PATCH 3/9] test-watchdog: modernize --- src/test/test-watchdog.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/src/test/test-watchdog.c b/src/test/test-watchdog.c index 70d069c08b..1eb284cd29 100644 --- a/src/test/test-watchdog.c +++ b/src/test/test-watchdog.c @@ -8,26 +8,24 @@ #include "watchdog.h" int main(int argc, char *argv[]) { - usec_t t; - unsigned i, count; int r; - bool slow; test_setup_logging(LOG_DEBUG); - slow = slow_tests_enabled(); + bool slow = slow_tests_enabled(); - t = slow ? 10 * USEC_PER_SEC : 2 * USEC_PER_SEC; - count = slow ? 5 : 3; + usec_t timeout = slow ? 10 * USEC_PER_SEC : 2 * USEC_PER_SEC; + unsigned count = slow ? 5 : 3; - r = watchdog_setup(t); + log_info("Initializing watchdog with timeout of %s", FORMAT_TIMESPAN(timeout, USEC_PER_SEC)); + r = watchdog_setup(timeout); if (r < 0) log_warning_errno(r, "Failed to open watchdog: %m"); - for (i = 0; i < count; i++) { - t = watchdog_runtime_wait(); - log_info("Sleeping " USEC_FMT " microseconds...", t); - usleep_safe(t); + for (unsigned i = 0; i < count; i++) { + timeout = watchdog_runtime_wait(); + log_info("Sleeping %s…", FORMAT_TIMESPAN(timeout, USEC_PER_SEC)); + usleep_safe(timeout); log_info("Pinging..."); r = watchdog_ping(); if (r < 0) From 0b291130e8e21375559727b7e0dd9c3f2ed20463 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Fri, 20 Dec 2024 17:27:57 +0100 Subject: [PATCH 4/9] shared/watchdog: raise log levels for watchdog errors If we failed to open the watchdog device for any reason, we'd only log at debug level. This seems iffy: if the user configured a timeout for the watchdog, we should report when we can't set it up. ENOENT is still logged at debug level only, since it's somewhat expected to have a watchdog timeout set up, even for systems which don't have a watchdog, or the device might appear later. If the device doesn't support WDIOC_GETSUPPORT, still log that we opened a device. No change in behaviour, except for the log level threshold. As a side effect, the reason why we failed to open the device is now stored in watchdog_fd (previously it was -1 always), but this isn't used for anything. --- src/shared/watchdog.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/src/shared/watchdog.c b/src/shared/watchdog.c index 7ac689c150..dc94125379 100644 --- a/src/shared/watchdog.c +++ b/src/shared/watchdog.c @@ -195,10 +195,13 @@ usec_t watchdog_get_last_ping(clockid_t clock) { } static int watchdog_ping_now(void) { + int r; + assert(watchdog_fd >= 0); - if (ioctl(watchdog_fd, WDIOC_KEEPALIVE, 0) < 0) - return log_warning_errno(errno, "Failed to ping hardware watchdog, ignoring: %m"); + r = RET_NERRNO(ioctl(watchdog_fd, WDIOC_KEEPALIVE, 0)); + if (r < 0) + return log_warning_errno(r, "Failed to ping hardware watchdog, ignoring: %m"); watchdog_last_ping = now(CLOCK_BOOTTIME); @@ -322,7 +325,7 @@ static int watchdog_open(void) { STRV_MAKE("/dev/watchdog0", "/dev/watchdog") : STRV_MAKE(watchdog_device); STRV_FOREACH(wd, try_order) { - watchdog_fd = open(*wd, O_WRONLY|O_CLOEXEC); + watchdog_fd = RET_NERRNO(open(*wd, O_WRONLY|O_CLOEXEC)); if (watchdog_fd >= 0) { if (free_and_strdup(&watchdog_device, *wd) < 0) { r = log_oom_debug(); @@ -332,22 +335,27 @@ static int watchdog_open(void) { break; } - if (errno != ENOENT) - return log_debug_errno(errno, "Failed to open watchdog device %s: %m", *wd); + if (watchdog_fd != -ENOENT) + return log_warning_errno(watchdog_fd, "Failed to open watchdog device %s: %m", *wd); } if (watchdog_fd < 0) - return log_debug_errno(SYNTHETIC_ERRNO(ENOENT), "Failed to open watchdog device %s.", watchdog_device ?: "auto"); + return log_debug_errno(watchdog_fd, "Failed to open %swatchdog device%s%s.", + watchdog_device ? "" : "any ", + watchdog_device ? " " : "", + strempty(watchdog_device)); watchdog_last_ping = USEC_INFINITY; - if (ioctl(watchdog_fd, WDIOC_GETSUPPORT, &ident) < 0) - log_debug_errno(errno, "Hardware watchdog %s does not support WDIOC_GETSUPPORT ioctl, ignoring: %m", watchdog_device); + r = RET_NERRNO(ioctl(watchdog_fd, WDIOC_GETSUPPORT, &ident)); + if (r < 0) + log_info_errno(r, "Using hardware watchdog %s, no support for WDIOC_GETSUPPORT ioctl: %m", + watchdog_device); else - log_info("Using hardware watchdog '%s', version %x, device %s", + log_info("Using hardware watchdog %s: '%s', version %x.", + watchdog_device, ident.identity, - ident.firmware_version, - watchdog_device); + ident.firmware_version), r = watchdog_update_timeout(); if (r < 0) From 2c6397d1c25f45fadedf1be4c5d77dfb1e944844 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Fri, 20 Dec 2024 17:56:07 +0100 Subject: [PATCH 5/9] pid1: log if we failed to find a watchdog device I think we need to log at some point if the user configured a watchdog device, but no devices were found. We can't log ENOENT immediately, because the device may likely appear during boot. So wait until the end of the initial transaction and log then. --- src/core/manager.c | 3 +++ src/shared/watchdog.c | 17 +++++++++++++++++ src/shared/watchdog.h | 1 + 3 files changed, 21 insertions(+) diff --git a/src/core/manager.c b/src/core/manager.c index 12c4e65e52..6d20cf9423 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -3871,6 +3871,9 @@ static void manager_notify_finished(Manager *m) { bus_manager_send_finished(m, firmware_usec, loader_usec, kernel_usec, initrd_usec, userspace_usec, total_usec); + if (MANAGER_IS_SYSTEM(m) && detect_container() <= 0) + watchdog_report_if_missing(); + log_taint_string(m); } diff --git a/src/shared/watchdog.c b/src/shared/watchdog.c index dc94125379..22c28f0c37 100644 --- a/src/shared/watchdog.c +++ b/src/shared/watchdog.c @@ -482,6 +482,23 @@ int watchdog_ping(void) { return watchdog_ping_now(); } +void watchdog_report_if_missing(void) { + /* If we failed to open the watchdog because the device doesn't exist, report why. If we successfully + * opened a device, or opening failed for a different reason, we logged then. But ENOENT failure may + * be transient, for example because modules being loaded or the hardware being detected. This + * function can be called to log after things have settled down. + * + * If a device was specified explicitly, raise level. */ + + if (watchdog_fd == -ENOENT) + log_full_errno(watchdog_device ? LOG_WARNING : LOG_NOTICE, + watchdog_fd, + "Failed to open %swatchdog device%s%s before the initial transaction completed: %m", + watchdog_device ? "" : "any ", + watchdog_device ? " " : "", + strempty(watchdog_device)); +} + void watchdog_close(bool disarm) { /* Once closed, pinging the device becomes a NOP and we request a new diff --git a/src/shared/watchdog.h b/src/shared/watchdog.h index bf9212a865..da63730a5c 100644 --- a/src/shared/watchdog.h +++ b/src/shared/watchdog.h @@ -13,6 +13,7 @@ int watchdog_setup(usec_t timeout); int watchdog_setup_pretimeout(usec_t usec); int watchdog_setup_pretimeout_governor(const char *governor); int watchdog_ping(void); +void watchdog_report_if_missing(void); void watchdog_close(bool disarm); usec_t watchdog_runtime_wait(void); From b27c9279f6cbed793615bcc6fcdeeac6a8c5ec54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Fri, 20 Dec 2024 19:14:51 +0100 Subject: [PATCH 6/9] shared/watchdog: ratelimit the number of attempts to open watchdog We need to retry the open attempts for the watchdog, because the device becomes available asynchronously. The watchdog is opened in two places: - in pid1 in the main loop. The loop has a ratelimit, but during a boot we iterate in it fairly quickly. On my test VM with 'iTCO_wdt', version 2: $ journalctl -b --grep 'Failed to open any watchdog' | wc -l 3398 After the device has been processed by udev, it is initialized successfully. - in shutdown. In that case, we most likely don't need to try more than once, because we mostly care about the case where the watchdog device was present and configured previously. But in principle it is possible that we might attempt shutdown while the machine was initializing, so we don't want to disable retries. Nevertheless, watchdog_ping() is called from a loop that might be fairly tight, so we could end up trying to reopen the device fairly often. This probably doesn't matter *too* much, but it's still ugly to try to open the device without any ratelimit. Usually the watchdog timeout would be set to something like 30 s or a few minutes. OTOH, on my VM, the device becomes avaiable at 4.35 s after boot. So let's use 5 s or half the watchdog timeout, whatever is smaller, as the interval. --- src/shared/watchdog.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/src/shared/watchdog.c b/src/shared/watchdog.c index 22c28f0c37..449c070f8f 100644 --- a/src/shared/watchdog.c +++ b/src/shared/watchdog.c @@ -310,12 +310,20 @@ static int watchdog_update_timeout(void) { return watchdog_ping_now(); } -static int watchdog_open(void) { +static int watchdog_open(bool ignore_ratelimit) { + static RateLimit watchdog_open_ratelimit = { 0, 1 }; /* The interval is initialized dynamically below. */ struct watchdog_info ident; char **try_order; int r; assert(watchdog_fd < 0); + assert(watchdog_timeout > 0); + + watchdog_open_ratelimit.interval = MIN(watchdog_timeout / 2, 5 * USEC_PER_SEC); + + if (!ratelimit_below(&watchdog_open_ratelimit)) + if (!ignore_ratelimit) /* If ignore_ratelimit, we update the timestamp, but continue. */ + return -EWOULDBLOCK; /* Let's prefer new-style /dev/watchdog0 (i.e. kernel 3.5+) over classic /dev/watchdog. The former * has the benefit that we can easily find the matching directory in sysfs from it, as the relevant @@ -405,7 +413,7 @@ int watchdog_setup(usec_t timeout) { watchdog_timeout = timeout; if (watchdog_fd < 0) - return watchdog_open(); + return watchdog_open(/* ignore_ratelimit= */ false); r = watchdog_update_timeout(); if (r < 0) @@ -465,7 +473,7 @@ int watchdog_ping(void) { if (watchdog_fd < 0) /* open_watchdog() will automatically ping the device for us if necessary */ - return watchdog_open(); + return watchdog_open(/* ignore_ratelimit= */ false); /* Never ping earlier than watchdog_timeout/4 and try to ping * by watchdog_timeout/2 plus scheduling latencies at the latest */ @@ -490,13 +498,19 @@ void watchdog_report_if_missing(void) { * * If a device was specified explicitly, raise level. */ - if (watchdog_fd == -ENOENT) - log_full_errno(watchdog_device ? LOG_WARNING : LOG_NOTICE, - watchdog_fd, - "Failed to open %swatchdog device%s%s before the initial transaction completed: %m", - watchdog_device ? "" : "any ", - watchdog_device ? " " : "", - strempty(watchdog_device)); + if (watchdog_fd != -ENOENT) + return; + + /* We attempt to open the watchdog one last time here, so that if we log, we log accurately. */ + if (watchdog_open(/* ignore_ratelimit= */ true) >= 0) + return; + + log_full_errno(watchdog_device ? LOG_WARNING : LOG_NOTICE, + watchdog_fd, + "Failed to open %swatchdog device%s%s before the initial transaction completed: %m", + watchdog_device ? "" : "any ", + watchdog_device ? " " : "", + strempty(watchdog_device)); } void watchdog_close(bool disarm) { From 236ac39b65b0a741827c30dcad8a95d8b66276e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Fri, 21 Mar 2025 12:10:35 +0100 Subject: [PATCH 7/9] shared/watchdog: add MESSAGE_IDs to logs about watchdog opening One ID for the success case, with WATCHDOG_DEVICE= showing the device, and one ID for the failure case, with WATCHDOG_DEVICE= if configured, and ERRNO= set automatically. --- src/shared/watchdog.c | 41 ++++++++++++++++++++++++++------------- src/systemd/sd-messages.h | 6 ++++++ 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/src/shared/watchdog.c b/src/shared/watchdog.c index 449c070f8f..6828a22b42 100644 --- a/src/shared/watchdog.c +++ b/src/shared/watchdog.c @@ -7,6 +7,8 @@ #include #include +#include "sd-messages.h" + #include "devnum-util.h" #include "errno-util.h" #include "fd-util.h" @@ -344,7 +346,10 @@ static int watchdog_open(bool ignore_ratelimit) { } if (watchdog_fd != -ENOENT) - return log_warning_errno(watchdog_fd, "Failed to open watchdog device %s: %m", *wd); + return log_struct_errno(LOG_WARNING, watchdog_fd, + LOG_MESSAGE("Failed to open watchdog device %s: %m", *wd), + "MESSAGE_ID=" SD_MESSAGE_WATCHDOG_OPEN_FAILED_STR, + "WATCHDOG_DEVICE=%s", *wd); } if (watchdog_fd < 0) @@ -357,13 +362,19 @@ static int watchdog_open(bool ignore_ratelimit) { r = RET_NERRNO(ioctl(watchdog_fd, WDIOC_GETSUPPORT, &ident)); if (r < 0) - log_info_errno(r, "Using hardware watchdog %s, no support for WDIOC_GETSUPPORT ioctl: %m", - watchdog_device); + log_struct_errno(LOG_INFO, r, + LOG_MESSAGE("Using hardware watchdog %s, no support for WDIOC_GETSUPPORT ioctl: %m", + watchdog_device), + "MESSAGE_ID=" SD_MESSAGE_WATCHDOG_OPENED_STR, + "WATCHDOG_DEVICE=%s", watchdog_device); else - log_info("Using hardware watchdog %s: '%s', version %x.", - watchdog_device, - ident.identity, - ident.firmware_version), + log_struct(LOG_INFO, + LOG_MESSAGE("Using hardware watchdog %s: '%s', version %x.", + watchdog_device, + ident.identity, + ident.firmware_version), + "MESSAGE_ID=" SD_MESSAGE_WATCHDOG_OPENED_STR, + "WATCHDOG_DEVICE=%s", watchdog_device); r = watchdog_update_timeout(); if (r < 0) @@ -505,12 +516,16 @@ void watchdog_report_if_missing(void) { if (watchdog_open(/* ignore_ratelimit= */ true) >= 0) return; - log_full_errno(watchdog_device ? LOG_WARNING : LOG_NOTICE, - watchdog_fd, - "Failed to open %swatchdog device%s%s before the initial transaction completed: %m", - watchdog_device ? "" : "any ", - watchdog_device ? " " : "", - strempty(watchdog_device)); + if (watchdog_device) + log_struct_errno(LOG_WARNING, watchdog_fd, + LOG_MESSAGE("Failed to open watchdog device %s before the initial transaction completed: %m", + watchdog_device), + "MESSAGE_ID=" SD_MESSAGE_WATCHDOG_OPEN_FAILED_STR, + "WATCHDOG_DEVICE=%s", watchdog_device); + else + log_struct_errno(LOG_WARNING, watchdog_fd, + LOG_MESSAGE("Failed to open any watchdog device before the initial transaction completed: %m"), + "MESSAGE_ID=" SD_MESSAGE_WATCHDOG_OPEN_FAILED_STR); } void watchdog_close(bool disarm) { diff --git a/src/systemd/sd-messages.h b/src/systemd/sd-messages.h index 60b28087f6..c35b1bfdec 100644 --- a/src/systemd/sd-messages.h +++ b/src/systemd/sd-messages.h @@ -256,6 +256,12 @@ _SD_BEGIN_DECLARATIONS; #define SD_MESSAGE_TIME_BUMP SD_ID128_MAKE(7d,b7,3c,8a,f0,d9,4e,eb,82,2a,e0,43,23,fe,6a,b6) #define SD_MESSAGE_TIME_BUMP_STR SD_ID128_MAKE_STR(7d,b7,3c,8a,f0,d9,4e,eb,82,2a,e0,43,23,fe,6a,b6) +#define SD_MESSAGE_WATCHDOG_OPENED SD_ID128_MAKE(21,66,8d,bd,3d,7a,4a,32,a2,67,6d,53,da,da,b0,22) +#define SD_MESSAGE_WATCHDOG_OPENED_STR SD_ID128_MAKE_STR(21,66,8d,bd,3d,7a,4a,32,a2,67,6d,53,da,da,b0,22) + +#define SD_MESSAGE_WATCHDOG_OPEN_FAILED SD_ID128_MAKE(37,5a,c1,51,ef,9d,4d,e3,90,68,b3,ef,bf,ed,0c,ee) +#define SD_MESSAGE_WATCHDOG_OPEN_FAILED_STR SD_ID128_MAKE_STR(37,5a,c1,51,ef,9d,4d,e3,90,68,b3,ef,bf,ed,0c,ee) + #define SD_MESSAGE_SHUTDOWN_SCHEDULED SD_ID128_MAKE(9e,70,66,27,9d,c8,40,3d,a7,9c,e4,b1,a6,90,64,b2) #define SD_MESSAGE_SHUTDOWN_SCHEDULED_STR SD_ID128_MAKE_STR(9e,70,66,27,9d,c8,40,3d,a7,9c,e4,b1,a6,90,64,b2) From ab596e4cde5d4b55be5182a8d5554e3e97366131 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Fri, 21 Mar 2025 14:39:50 +0100 Subject: [PATCH 8/9] shared/watchdog: give up after a few failed pings Closes https://github.com/systemd/systemd/issues/35405. Apparently some watchdog devices can be opened, but then the pings start failing after some time. Since the timestamp of the last successful ping is not updated, we try to ping again immediately, causing a busy loop and excessive logging. After trying a few different approaches to fit this into the existing framework without changing the logic too much, I settled on an approach with a second timestamp. In particular, the timestamp of the last successful ping is public, exposed as WatchdogLastPingTimestamp over dbus. It'd be wrong to redefine this to mean the last ping *attempt*. So we need a second timestamp in some form. Also, if we give up on pinging, we probably should attempt to disarm the watchdog. It's possible that the pinging fails, but the watchdog would still fire. I don't think we want that, since it seems that our internal loop is working, it's just the watchdog that is broken. Structured message with SD_MESSAGE_WATCHDOG_PING_FAILED is logged if we fail to ping. I tested this by attaching gdb to pid 1 and calling close(watchdog_fd). We get a bunch of warning messages and then an attempt to close the watchdog: Mar 21 15:46:17 fedora systemd[1]: Failed to ping hardware watchdog /dev/watchdog0: Bad file descriptor Mar 21 15:46:20 fedora systemd[1]: Failed to ping hardware watchdog /dev/watchdog0: Bad file descriptor Mar 21 15:46:23 fedora systemd[1]: Failed to ping hardware watchdog /dev/watchdog0: Bad file descriptor Mar 21 15:46:26 fedora systemd[1]: Failed to ping hardware watchdog /dev/watchdog0: Bad file descriptor Mar 21 15:46:29 fedora systemd[1]: Failed to ping hardware watchdog /dev/watchdog0: Bad file descriptor Mar 21 15:46:32 fedora systemd[1]: Failed to ping hardware watchdog /dev/watchdog0: Bad file descriptor Mar 21 15:46:35 fedora systemd[1]: Failed to ping hardware watchdog /dev/watchdog0: Bad file descriptor Mar 21 15:46:37 fedora systemd[1]: Failed to ping hardware watchdog /dev/watchdog0: Bad file descriptor Mar 21 15:46:40 fedora systemd[1]: Failed to ping hardware watchdog /dev/watchdog0: Bad file descriptor Mar 21 15:46:43 fedora systemd[1]: Failed to ping hardware watchdog /dev/watchdog0: Bad file descriptor Mar 21 15:46:46 fedora systemd[1]: Failed to ping hardware watchdog /dev/watchdog0: Bad file descriptor Mar 21 15:46:49 fedora systemd[1]: Failed to ping hardware watchdog /dev/watchdog0: Bad file descriptor Mar 21 15:46:52 fedora systemd[1]: Failed to ping hardware watchdog /dev/watchdog0: Bad file descriptor Mar 21 15:46:55 fedora systemd[1]: Failed to ping hardware watchdog /dev/watchdog0: Bad file descriptor Mar 21 15:46:58 fedora systemd[1]: Failed to ping hardware watchdog /dev/watchdog0, closing watchdog after 15 attempts: Bad file descriptor Mar 21 15:46:58 fedora systemd[1]: Failed to disable hardware watchdog, ignoring: Bad file descriptor Mar 21 15:46:58 fedora systemd[1]: Failed to disarm watchdog timer, ignoring: Bad file descriptor --- src/core/main.c | 6 +-- src/core/manager.c | 2 +- src/shared/watchdog.c | 88 +++++++++++++++++++++++++++------------ src/shared/watchdog.h | 2 +- src/systemd/sd-messages.h | 3 ++ src/test/test-watchdog.c | 4 +- 6 files changed, 71 insertions(+), 34 deletions(-) diff --git a/src/core/main.c b/src/core/main.c index bca6c96b4e..23ce9aa92c 100644 --- a/src/core/main.c +++ b/src/core/main.c @@ -1985,9 +1985,9 @@ static int do_reexecute( assert(saved_rlimit_memlock); assert(ret_error_message); - /* Close and disarm the watchdog, so that the new instance can reinitialize it, but doesn't get - * rebooted while we do that */ - watchdog_close(true); + /* Close and disarm the watchdog, so that the new instance can reinitialize it, but the machine + * doesn't get rebooted while we do that. */ + watchdog_close(/* disarm= */ true); if (!switch_root_dir && objective == MANAGER_SOFT_REBOOT) { /* If no switch root dir is specified, then check if /run/nextroot/ qualifies and use that */ diff --git a/src/core/manager.c b/src/core/manager.c index 6d20cf9423..45f3464dd4 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -3391,7 +3391,7 @@ int manager_loop(Manager *m) { continue; /* Sleep for watchdog runtime wait time */ - r = sd_event_run(m->event, watchdog_runtime_wait()); + r = sd_event_run(m->event, watchdog_runtime_wait(/* divisor= */ 2)); if (r < 0) return log_error_errno(r, "Failed to run event loop: %m"); } diff --git a/src/shared/watchdog.c b/src/shared/watchdog.c index 6828a22b42..5b3f950b80 100644 --- a/src/shared/watchdog.c +++ b/src/shared/watchdog.c @@ -23,7 +23,9 @@ static int watchdog_fd = -EBADF; static char *watchdog_device = NULL; static usec_t watchdog_timeout = 0; /* 0 → close device and USEC_INFINITY → don't change timeout */ static usec_t watchdog_pretimeout = 0; /* 0 → disable pretimeout and USEC_INFINITY → don't change pretimeout */ -static usec_t watchdog_last_ping = USEC_INFINITY; +static usec_t watchdog_last_good_ping = USEC_INFINITY; +static usec_t watchdog_last_bad_ping = USEC_INFINITY; +static unsigned watchdog_bad_pings = 0; static bool watchdog_supports_pretimeout = false; /* Depends on kernel state that might change at runtime */ static char *watchdog_pretimeout_governor = NULL; @@ -36,6 +38,11 @@ static char *watchdog_pretimeout_governor = NULL; */ #define WATCHDOG_TIMEOUT_MAX_SEC (CONST_MIN(UINT_MAX/1000U, (unsigned)INT_MAX)) +/* How many times to ping the watchdog in a single burst. */ +#define WATCHDOG_PING_BURST 3 +/* How many times to ping the watchdog in total before giving up on it. */ +#define WATCHDOG_MAX_FAILED_PINGS 15 + #define WATCHDOG_GOV_NAME_MAXLEN 20 /* From the kernel watchdog driver */ static int saturated_usec_to_sec(usec_t val) { @@ -193,7 +200,7 @@ static int watchdog_set_pretimeout(void) { } usec_t watchdog_get_last_ping(clockid_t clock) { - return map_clock_usec(watchdog_last_ping, CLOCK_BOOTTIME, clock); + return map_clock_usec(watchdog_last_good_ping, CLOCK_BOOTTIME, clock); } static int watchdog_ping_now(void) { @@ -202,12 +209,35 @@ static int watchdog_ping_now(void) { assert(watchdog_fd >= 0); r = RET_NERRNO(ioctl(watchdog_fd, WDIOC_KEEPALIVE, 0)); - if (r < 0) - return log_warning_errno(r, "Failed to ping hardware watchdog, ignoring: %m"); + if (r < 0) { + watchdog_last_bad_ping = now(CLOCK_BOOTTIME); + if (++ watchdog_bad_pings >= WATCHDOG_MAX_FAILED_PINGS) { + log_struct_errno(LOG_ERR, r, + LOG_MESSAGE("Failed to ping hardware watchdog %s, closing watchdog after %u attempts: %m", + watchdog_device, watchdog_bad_pings), + "MESSAGE_ID=" SD_MESSAGE_WATCHDOG_PING_FAILED_STR, + "WATCHDOG_DEVICE=%s", watchdog_device); + watchdog_timeout = USEC_INFINITY; + watchdog_close(/* disarm= */ true); + } else + log_struct_errno(LOG_WARNING, r, + LOG_MESSAGE("Failed to ping hardware watchdog %s: %m", + watchdog_device), + "MESSAGE_ID=" SD_MESSAGE_WATCHDOG_PING_FAILED_STR, + "WATCHDOG_DEVICE=%s", watchdog_device); + } else { + watchdog_last_good_ping = now(CLOCK_BOOTTIME); + if (watchdog_bad_pings > 0) { + log_struct(LOG_INFO, + LOG_MESSAGE("Succesfullly pinged hardware watchdog %s after %u attempts: %m", + watchdog_device, watchdog_bad_pings + 1), + "WATCHDOG_DEVICE=%s", watchdog_device); + watchdog_bad_pings = 0; + watchdog_last_bad_ping = USEC_INFINITY; + } + } - watchdog_last_ping = now(CLOCK_BOOTTIME); - - return 0; + return r; } static int watchdog_update_pretimeout(void) { @@ -358,7 +388,8 @@ static int watchdog_open(bool ignore_ratelimit) { watchdog_device ? " " : "", strempty(watchdog_device)); - watchdog_last_ping = USEC_INFINITY; + watchdog_last_good_ping = watchdog_last_bad_ping = USEC_INFINITY; + watchdog_bad_pings = 0; r = RET_NERRNO(ioctl(watchdog_fd, WDIOC_GETSUPPORT, &ident)); if (r < 0) @@ -410,7 +441,7 @@ int watchdog_setup(usec_t timeout) { * it's a nop if the device is already opened). */ if (timeout == 0) { - watchdog_close(true); + watchdog_close(/* disarm= */ true); return 0; } @@ -454,6 +485,10 @@ int watchdog_setup_pretimeout_governor(const char *governor) { } static usec_t watchdog_calc_timeout(void) { + /* If we failed to many times, don't schedule the next attempt at all. */ + if (watchdog_bad_pings >= WATCHDOG_MAX_FAILED_PINGS) + return 0; + /* Calculate the effective timeout which accounts for the watchdog * pretimeout if configured and supported. */ if (watchdog_supports_pretimeout && timestamp_is_set(watchdog_pretimeout) && watchdog_timeout >= watchdog_pretimeout) @@ -462,20 +497,26 @@ static usec_t watchdog_calc_timeout(void) { return watchdog_timeout; } -usec_t watchdog_runtime_wait(void) { +usec_t watchdog_runtime_wait(unsigned divisor) { usec_t timeout = watchdog_calc_timeout(); if (!timestamp_is_set(timeout)) return USEC_INFINITY; - /* Sleep half the watchdog timeout since the last successful ping at most */ - if (timestamp_is_set(watchdog_last_ping)) { - usec_t ntime = now(CLOCK_BOOTTIME); + /* Sleep watchdog timeout / divisor since the last successful ping attempt at most, + * or the fifth of that if the ping failed. */ - assert(ntime >= watchdog_last_ping); - return usec_sub_unsigned(watchdog_last_ping + timeout/2, ntime); + usec_t ts = MAX(timestamp_is_set(watchdog_last_good_ping) ? watchdog_last_good_ping : 0u, + timestamp_is_set(watchdog_last_bad_ping) ? watchdog_last_bad_ping : 0u); + if (ts > 0) { + usec_t ntime = now(CLOCK_BOOTTIME); + if (ts == watchdog_last_bad_ping) + divisor *= 5; + + assert(ntime >= ts); + return usec_sub_unsigned(ts + timeout / divisor, ntime); } - return timeout / 2; + return timeout / divisor; } int watchdog_ping(void) { @@ -486,17 +527,10 @@ int watchdog_ping(void) { /* open_watchdog() will automatically ping the device for us if necessary */ return watchdog_open(/* ignore_ratelimit= */ false); - /* Never ping earlier than watchdog_timeout/4 and try to ping - * by watchdog_timeout/2 plus scheduling latencies at the latest */ - if (timestamp_is_set(watchdog_last_ping)) { - usec_t ntime = now(CLOCK_BOOTTIME), - timeout = watchdog_calc_timeout(); - - assert(ntime >= watchdog_last_ping); - - if (ntime - watchdog_last_ping < timeout/4) - return 0; - } + /* Ping approximately watchdog_timeout/4 after a successful ping, or even less than that + * after an unsuccessful ping. */ + if (watchdog_runtime_wait(/* divisor= */ 4) > 0) + return 0; return watchdog_ping_now(); } diff --git a/src/shared/watchdog.h b/src/shared/watchdog.h index da63730a5c..c0fcccbd07 100644 --- a/src/shared/watchdog.h +++ b/src/shared/watchdog.h @@ -15,7 +15,7 @@ int watchdog_setup_pretimeout_governor(const char *governor); int watchdog_ping(void); void watchdog_report_if_missing(void); void watchdog_close(bool disarm); -usec_t watchdog_runtime_wait(void); +usec_t watchdog_runtime_wait(unsigned divisor); static inline void watchdog_free_device(void) { (void) watchdog_set_device(NULL); diff --git a/src/systemd/sd-messages.h b/src/systemd/sd-messages.h index c35b1bfdec..502a2e5d5a 100644 --- a/src/systemd/sd-messages.h +++ b/src/systemd/sd-messages.h @@ -262,6 +262,9 @@ _SD_BEGIN_DECLARATIONS; #define SD_MESSAGE_WATCHDOG_OPEN_FAILED SD_ID128_MAKE(37,5a,c1,51,ef,9d,4d,e3,90,68,b3,ef,bf,ed,0c,ee) #define SD_MESSAGE_WATCHDOG_OPEN_FAILED_STR SD_ID128_MAKE_STR(37,5a,c1,51,ef,9d,4d,e3,90,68,b3,ef,bf,ed,0c,ee) +#define SD_MESSAGE_WATCHDOG_PING_FAILED SD_ID128_MAKE(87,39,78,9e,ca,06,43,25,af,15,a8,ed,0e,cf,c5,56) +#define SD_MESSAGE_WATCHDOG_PING_FAILED_STR SD_ID128_MAKE_STR(87,39,78,9e,ca,06,43,25,af,15,a8,ed,0e,cf,c5,56) + #define SD_MESSAGE_SHUTDOWN_SCHEDULED SD_ID128_MAKE(9e,70,66,27,9d,c8,40,3d,a7,9c,e4,b1,a6,90,64,b2) #define SD_MESSAGE_SHUTDOWN_SCHEDULED_STR SD_ID128_MAKE_STR(9e,70,66,27,9d,c8,40,3d,a7,9c,e4,b1,a6,90,64,b2) diff --git a/src/test/test-watchdog.c b/src/test/test-watchdog.c index 1eb284cd29..67e6581cfb 100644 --- a/src/test/test-watchdog.c +++ b/src/test/test-watchdog.c @@ -23,7 +23,7 @@ int main(int argc, char *argv[]) { log_warning_errno(r, "Failed to open watchdog: %m"); for (unsigned i = 0; i < count; i++) { - timeout = watchdog_runtime_wait(); + timeout = watchdog_runtime_wait(/* divisor= */ 2); log_info("Sleeping %s…", FORMAT_TIMESPAN(timeout, USEC_PER_SEC)); usleep_safe(timeout); log_info("Pinging..."); @@ -32,6 +32,6 @@ int main(int argc, char *argv[]) { log_warning_errno(r, "Failed to ping watchdog: %m"); } - watchdog_close(true); + watchdog_close(/* disarm= */ true); return 0; } From a9cee8f4de5588166a4d4a9ca2f31ee995e25aab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Fri, 21 Mar 2025 15:06:28 +0100 Subject: [PATCH 9/9] core/manager: do not exclude watchdog logic from busy-loop protection As reported in https://github.com/systemd/systemd/issues/35405, if the watchdog ping failed, we effectively started a busy loop here. The previous commits should fix this, but in general, the protection here is intended as a safety net in case the logic is broken somewhere else. We shouldn't exclude the watchdog stuff from this. --- src/core/manager.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/manager.c b/src/core/manager.c index 45f3464dd4..afccb48d7e 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -3352,14 +3352,14 @@ int manager_loop(Manager *m) { while (m->objective == MANAGER_OK) { - (void) watchdog_ping(); - if (!ratelimit_below(&rl)) { /* Yay, something is going seriously wrong, pause a little */ log_warning("Looping too fast. Throttling execution a little."); sleep(1); } + (void) watchdog_ping(); + if (manager_dispatch_load_queue(m) > 0) continue;