From e568fea9fcd2189d4366df254a8a4031dc433762 Mon Sep 17 00:00:00 2001 From: Richard Phibel Date: Thu, 6 Jul 2023 14:33:52 +0200 Subject: [PATCH 1/2] service: add new RestartMode option When this option is set to direct, the service restarts without entering a failed state. Dependent units are not notified of transitory failure. This is useful for the following use case: We have a target with Requires=my-service, After=my-service. my-service.service is a oneshot service and has Restart=on-failure in its definition. my-service.service can get stuck for various reasons and time out, in which case it is restarted. Currently, when it fails the first time, the target fails, even though my-service is restarted. The behavior we're looking for is that until my-service is not restarted anymore, the target stays pending waiting for my-service.service to start successfully or fail without being restarted anymore. --- man/org.freedesktop.systemd1.xml | 6 +++++ man/systemd.service.xml | 22 +++++++++++++++++++ src/core/dbus-service.c | 6 +++++ src/core/load-fragment-gperf.gperf.in | 1 + src/core/load-fragment.c | 2 ++ src/core/load-fragment.h | 1 + src/core/service.c | 10 ++++++++- src/core/service.h | 11 ++++++++++ src/shared/bus-unit-util.c | 1 + src/test/test-tables.c | 1 + .../fails-on-restart-restartdirect.service | 11 ++++++++++ .../fails-on-restart-restartdirect.target | 3 +++ .../fails-on-restart.service | 11 ++++++++++ .../fails-on-restart.target | 3 +++ .../succeeds-on-restart-restartdirect.service | 6 +++++ .../succeeds-on-restart-restartdirect.target | 3 +++ .../succeeds-on-restart.service | 6 +++++ .../testsuite-03.units/succeeds-on-restart.sh | 10 +++++++++ .../succeeds-on-restart.target | 3 +++ test/units/testsuite-03.sh | 13 +++++++++++ 20 files changed, 129 insertions(+), 1 deletion(-) create mode 100644 test/testsuite-03.units/fails-on-restart-restartdirect.service create mode 100755 test/testsuite-03.units/fails-on-restart-restartdirect.target create mode 100644 test/testsuite-03.units/fails-on-restart.service create mode 100755 test/testsuite-03.units/fails-on-restart.target create mode 100755 test/testsuite-03.units/succeeds-on-restart-restartdirect.service create mode 100755 test/testsuite-03.units/succeeds-on-restart-restartdirect.target create mode 100755 test/testsuite-03.units/succeeds-on-restart.service create mode 100755 test/testsuite-03.units/succeeds-on-restart.sh create mode 100755 test/testsuite-03.units/succeeds-on-restart.target diff --git a/man/org.freedesktop.systemd1.xml b/man/org.freedesktop.systemd1.xml index 560ae252e3..5d3a51f40f 100644 --- a/man/org.freedesktop.systemd1.xml +++ b/man/org.freedesktop.systemd1.xml @@ -2588,6 +2588,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s Restart = '...'; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") + readonly s RestartMode = '...'; + @org.freedesktop.DBus.Property.EmitsChangedSignal("const") readonly s PIDFile = '...'; readonly s NotifyAccess = '...'; @org.freedesktop.DBus.Property.EmitsChangedSignal("const") @@ -3233,6 +3235,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + + @@ -3813,6 +3817,8 @@ node /org/freedesktop/systemd1/unit/avahi_2ddaemon_2eservice { + + diff --git a/man/systemd.service.xml b/man/systemd.service.xml index a183a9eedf..d5674aeffd 100644 --- a/man/systemd.service.xml +++ b/man/systemd.service.xml @@ -897,6 +897,28 @@ + + RestartMode= + + + Takes a string value that specifies how a service should restart: + + If set to (the default), the service restarts by + going through a failed/inactive state. + + If set to , the service transitions to the activating + state directly during auto-restart, skipping failed/inactive state. + ExecStopPost= is invoked. + OnSuccess= and OnFailure= are skipped. + + + + This option is useful in cases where a dependency can fail temporarily + but we don't want these temporary failures to make the dependent units fail. + When this option is set to , dependent units are not notified of these temporary failures. + + + SuccessExitStatus= diff --git a/src/core/dbus-service.c b/src/core/dbus-service.c index ea82a665a6..6068c742ca 100644 --- a/src/core/dbus-service.c +++ b/src/core/dbus-service.c @@ -33,6 +33,7 @@ static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_type, service_type, ServiceType static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_exit_type, service_exit_type, ServiceExitType); static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_result, service_result, ServiceResult); static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_restart, service_restart, ServiceRestart); +static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_restart_mode, service_restart_mode, ServiceRestartMode); static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_emergency_action, emergency_action, EmergencyAction); static BUS_DEFINE_PROPERTY_GET2(property_get_notify_access, "s", Service, service_get_notify_access, notify_access_to_string); static BUS_DEFINE_PROPERTY_GET(property_get_restart_usec_next, "t", Service, service_restart_usec_next); @@ -322,6 +323,7 @@ const sd_bus_vtable bus_service_vtable[] = { SD_BUS_PROPERTY("Type", "s", property_get_type, offsetof(Service, type), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("ExitType", "s", property_get_exit_type, offsetof(Service, exit_type), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("Restart", "s", property_get_restart, offsetof(Service, restart), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("RestartMode", "s", property_get_restart_mode, offsetof(Service, restart_mode), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PIDFile", "s", NULL, offsetof(Service, pid_file), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("NotifyAccess", "s", property_get_notify_access, 0, SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE), SD_BUS_PROPERTY("RestartUSec", "t", bus_property_get_usec, offsetof(Service, restart_usec), SD_BUS_VTABLE_PROPERTY_CONST), @@ -512,6 +514,7 @@ static BUS_DEFINE_SET_TRANSIENT_PARSE(notify_access, NotifyAccess, notify_access static BUS_DEFINE_SET_TRANSIENT_PARSE(service_type, ServiceType, service_type_from_string); static BUS_DEFINE_SET_TRANSIENT_PARSE(service_exit_type, ServiceExitType, service_exit_type_from_string); static BUS_DEFINE_SET_TRANSIENT_PARSE(service_restart, ServiceRestart, service_restart_from_string); +static BUS_DEFINE_SET_TRANSIENT_PARSE(service_restart_mode, ServiceRestartMode, service_restart_mode_from_string); static BUS_DEFINE_SET_TRANSIENT_PARSE(oom_policy, OOMPolicy, oom_policy_from_string); static BUS_DEFINE_SET_TRANSIENT_STRING_WITH_CHECK(bus_name, sd_bus_service_name_is_valid); static BUS_DEFINE_SET_TRANSIENT_PARSE(timeout_failure_mode, ServiceTimeoutFailureMode, service_timeout_failure_mode_from_string); @@ -660,6 +663,9 @@ static int bus_service_set_transient_property( if (streq(name, "Restart")) return bus_set_transient_service_restart(u, name, &s->restart, message, flags, error); + if (streq(name, "RestartMode")) + return bus_set_transient_service_restart_mode(u, name, &s->restart_mode, message, flags, error); + if (streq(name, "RestartPreventExitStatus")) return bus_set_transient_exit_status(u, name, &s->restart_prevent_status, message, flags, error); diff --git a/src/core/load-fragment-gperf.gperf.in b/src/core/load-fragment-gperf.gperf.in index 382b60ea90..b66adf2811 100644 --- a/src/core/load-fragment-gperf.gperf.in +++ b/src/core/load-fragment-gperf.gperf.in @@ -427,6 +427,7 @@ Service.RebootArgument, config_parse_unit_string_printf, Service.Type, config_parse_service_type, 0, offsetof(Service, type) Service.ExitType, config_parse_service_exit_type, 0, offsetof(Service, exit_type) Service.Restart, config_parse_service_restart, 0, offsetof(Service, restart) +Service.RestartMode, config_parse_service_restart_mode, 0, offsetof(Service, restart_mode) Service.PermissionsStartOnly, config_parse_bool, 0, offsetof(Service, permissions_start_only) Service.RootDirectoryStartOnly, config_parse_bool, 0, offsetof(Service, root_directory_start_only) Service.RemainAfterExit, config_parse_bool, 0, offsetof(Service, remain_after_exit) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index f0f60cf2ab..c69d47c228 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -142,6 +142,7 @@ DEFINE_CONFIG_PARSE_ENUM(config_parse_exec_preserve_mode, exec_preserve_mode, Ex DEFINE_CONFIG_PARSE_ENUM(config_parse_service_type, service_type, ServiceType, "Failed to parse service type"); DEFINE_CONFIG_PARSE_ENUM(config_parse_service_exit_type, service_exit_type, ServiceExitType, "Failed to parse service exit type"); DEFINE_CONFIG_PARSE_ENUM(config_parse_service_restart, service_restart, ServiceRestart, "Failed to parse service restart specifier"); +DEFINE_CONFIG_PARSE_ENUM(config_parse_service_restart_mode, service_restart_mode, ServiceRestartMode, "Failed to parse service restart mode"); DEFINE_CONFIG_PARSE_ENUM(config_parse_service_timeout_failure_mode, service_timeout_failure_mode, ServiceTimeoutFailureMode, "Failed to parse timeout failure mode"); DEFINE_CONFIG_PARSE_ENUM(config_parse_socket_bind, socket_address_bind_ipv6_only_or_bool, SocketAddressBindIPv6Only, "Failed to parse bind IPv6 only value"); DEFINE_CONFIG_PARSE_ENUM(config_parse_oom_policy, oom_policy, OOMPolicy, "Failed to parse OOM policy"); @@ -6299,6 +6300,7 @@ void unit_dump_config_items(FILE *f) { { config_parse_service_type, "SERVICETYPE" }, { config_parse_service_exit_type, "SERVICEEXITTYPE" }, { config_parse_service_restart, "SERVICERESTART" }, + { config_parse_service_restart_mode, "SERVICERESTARTMODE" }, { config_parse_service_timeout_failure_mode, "TIMEOUTMODE" }, { config_parse_kill_mode, "KILLMODE" }, { config_parse_signal, "SIGNAL" }, diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h index 3cfb50969a..39378b3a3c 100644 --- a/src/core/load-fragment.h +++ b/src/core/load-fragment.h @@ -41,6 +41,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_service_timeout_failure_mode); CONFIG_PARSER_PROTOTYPE(config_parse_service_type); CONFIG_PARSER_PROTOTYPE(config_parse_service_exit_type); CONFIG_PARSER_PROTOTYPE(config_parse_service_restart); +CONFIG_PARSER_PROTOTYPE(config_parse_service_restart_mode); CONFIG_PARSER_PROTOTYPE(config_parse_socket_bindtodevice); CONFIG_PARSER_PROTOTYPE(config_parse_exec_output); CONFIG_PARSER_PROTOTYPE(config_parse_exec_input); diff --git a/src/core/service.c b/src/core/service.c index a90b4de536..c6178cf2cd 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -2011,7 +2011,8 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart) * are only transitionary and followed by an automatic restart. We have fine-grained * low-level states for this though so that software can distinguish the permanent UNIT_INACTIVE * state from this transitionary UNIT_INACTIVE state by looking at the low-level states. */ - service_set_state(s, restart_state); + if (s->restart_mode != SERVICE_RESTART_MODE_DIRECT) + service_set_state(s, restart_state); r = service_arm_timer(s, /* relative= */ true, service_restart_usec_next(s)); if (r < 0) @@ -5009,6 +5010,13 @@ static const char* const service_restart_table[_SERVICE_RESTART_MAX] = { DEFINE_STRING_TABLE_LOOKUP(service_restart, ServiceRestart); +static const char* const service_restart_mode_table[_SERVICE_RESTART_MODE_MAX] = { + [SERVICE_RESTART_MODE_NORMAL] = "normal", + [SERVICE_RESTART_MODE_DIRECT] = "direct", +}; + +DEFINE_STRING_TABLE_LOOKUP(service_restart_mode, ServiceRestartMode); + static const char* const service_type_table[_SERVICE_TYPE_MAX] = { [SERVICE_SIMPLE] = "simple", [SERVICE_FORKING] = "forking", diff --git a/src/core/service.h b/src/core/service.h index 0e578c9280..3bf5fb1e14 100644 --- a/src/core/service.h +++ b/src/core/service.h @@ -91,6 +91,13 @@ typedef enum ServiceTimeoutFailureMode { _SERVICE_TIMEOUT_FAILURE_MODE_INVALID = -EINVAL, } ServiceTimeoutFailureMode; +typedef enum ServiceRestartMode { + SERVICE_RESTART_MODE_NORMAL, + SERVICE_RESTART_MODE_DIRECT, + _SERVICE_RESTART_MODE_MAX, + _SERVICE_RESTART_MODE_INVALID = -EINVAL, +} ServiceRestartMode; + struct ServiceFDStore { Service *service; @@ -108,6 +115,7 @@ struct Service { ServiceType type; ServiceExitType exit_type; ServiceRestart restart; + ServiceRestartMode restart_mode; ExitStatusSet restart_prevent_status; ExitStatusSet restart_force_status; ExitStatusSet success_status; @@ -249,6 +257,9 @@ usec_t service_restart_usec_next(Service *s); const char* service_restart_to_string(ServiceRestart i) _const_; ServiceRestart service_restart_from_string(const char *s) _pure_; +const char* service_restart_mode_to_string(ServiceRestartMode i) _const_; +ServiceRestartMode service_restart_mode_from_string(const char *s) _pure_; + const char* service_type_to_string(ServiceType i) _const_; ServiceType service_type_from_string(const char *s) _pure_; diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index cc287feb8e..494484f0bf 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -2206,6 +2206,7 @@ static int bus_append_service_property(sd_bus_message *m, const char *field, con "Type", "ExitType", "Restart", + "RestartMode", "BusName", "NotifyAccess", "USBFunctionDescriptors", diff --git a/src/test/test-tables.c b/src/test/test-tables.c index 6301dedb09..db41c69df5 100644 --- a/src/test/test-tables.c +++ b/src/test/test-tables.c @@ -95,6 +95,7 @@ int main(int argc, char **argv) { test_table(scope_state, SCOPE_STATE); test_table(service_exec_command, SERVICE_EXEC_COMMAND); test_table(service_restart, SERVICE_RESTART); + test_table(service_restart_mode, SERVICE_RESTART_MODE); test_table(service_result, SERVICE_RESULT); test_table(service_state, SERVICE_STATE); test_table(service_type, SERVICE_TYPE); diff --git a/test/testsuite-03.units/fails-on-restart-restartdirect.service b/test/testsuite-03.units/fails-on-restart-restartdirect.service new file mode 100644 index 0000000000..60ffd7a600 --- /dev/null +++ b/test/testsuite-03.units/fails-on-restart-restartdirect.service @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +[Unit] +Description=Fail on restart +StartLimitIntervalSec=1m +StartLimitBurst=3 + +[Service] +Type=oneshot +ExecStart=false +Restart=on-failure +RestartMode=direct diff --git a/test/testsuite-03.units/fails-on-restart-restartdirect.target b/test/testsuite-03.units/fails-on-restart-restartdirect.target new file mode 100755 index 0000000000..58e2561039 --- /dev/null +++ b/test/testsuite-03.units/fails-on-restart-restartdirect.target @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +[Unit] +Requires=fails-on-restart-restartdirect.service diff --git a/test/testsuite-03.units/fails-on-restart.service b/test/testsuite-03.units/fails-on-restart.service new file mode 100644 index 0000000000..fb7e7aeb4c --- /dev/null +++ b/test/testsuite-03.units/fails-on-restart.service @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +[Unit] +Description=Fail on restart +StartLimitIntervalSec=1m +StartLimitBurst=3 + +[Service] +Type=oneshot +ExecStart=false +Restart=on-failure +RestartMode=normal diff --git a/test/testsuite-03.units/fails-on-restart.target b/test/testsuite-03.units/fails-on-restart.target new file mode 100755 index 0000000000..865fb2af44 --- /dev/null +++ b/test/testsuite-03.units/fails-on-restart.target @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +[Unit] +Requires=fails-on-restart.service diff --git a/test/testsuite-03.units/succeeds-on-restart-restartdirect.service b/test/testsuite-03.units/succeeds-on-restart-restartdirect.service new file mode 100755 index 0000000000..b05f2f8dcf --- /dev/null +++ b/test/testsuite-03.units/succeeds-on-restart-restartdirect.service @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +[Service] +Type=oneshot +ExecStart=/usr/lib/systemd/tests/testdata/testsuite-03.units/succeeds-on-restart.sh +Restart=on-failure +RestartMode=direct diff --git a/test/testsuite-03.units/succeeds-on-restart-restartdirect.target b/test/testsuite-03.units/succeeds-on-restart-restartdirect.target new file mode 100755 index 0000000000..2cf3c60d2a --- /dev/null +++ b/test/testsuite-03.units/succeeds-on-restart-restartdirect.target @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +[Unit] +Requires=succeeds-on-restart-restartdirect.service diff --git a/test/testsuite-03.units/succeeds-on-restart.service b/test/testsuite-03.units/succeeds-on-restart.service new file mode 100755 index 0000000000..d7b3c7a210 --- /dev/null +++ b/test/testsuite-03.units/succeeds-on-restart.service @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +[Service] +Type=oneshot +ExecStart=/usr/lib/systemd/tests/testdata/testsuite-03.units/succeeds-on-restart.sh +Restart=on-failure +RestartMode=normal diff --git a/test/testsuite-03.units/succeeds-on-restart.sh b/test/testsuite-03.units/succeeds-on-restart.sh new file mode 100755 index 0000000000..1428b186e5 --- /dev/null +++ b/test/testsuite-03.units/succeeds-on-restart.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: LGPL-2.1-or-later +if [[ ! -f "/succeeds-on-restart.ko" ]] +then + touch "/succeeds-on-restart.ko" + exit 1 +else + rm "/succeeds-on-restart.ko" + exit 0 +fi diff --git a/test/testsuite-03.units/succeeds-on-restart.target b/test/testsuite-03.units/succeeds-on-restart.target new file mode 100755 index 0000000000..eb82f47aa3 --- /dev/null +++ b/test/testsuite-03.units/succeeds-on-restart.target @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +[Unit] +Requires=succeeds-on-restart.service diff --git a/test/units/testsuite-03.sh b/test/units/testsuite-03.sh index aa1d92a691..e9ef31d946 100755 --- a/test/units/testsuite-03.sh +++ b/test/units/testsuite-03.sh @@ -153,4 +153,17 @@ systemctl restart propagatestopto-indirect.target assert_rc 3 systemctl --quiet is-active propagatestopto-and-pullin.target assert_rc 3 systemctl --quiet is-active sleep-infinity-simple.service +# Test restart mode direct +systemctl start succeeds-on-restart-restartdirect.target +assert_rc 0 systemctl --quiet is-active succeeds-on-restart-restartdirect.target + +systemctl start fails-on-restart-restartdirect.target || : +assert_rc 3 systemctl --quiet is-active fails-on-restart-restartdirect.target + +systemctl start succeeds-on-restart.target || : +assert_rc 3 systemctl --quiet is-active succeeds-on-restart.target + +systemctl start fails-on-restart.target || : +assert_rc 3 systemctl --quiet is-active fails-on-restart.target + touch /testok From 2a39b91459a4c27985d9a58309c0fda25f3cd397 Mon Sep 17 00:00:00 2001 From: Richard Phibel Date: Thu, 6 Jul 2023 14:03:35 +0200 Subject: [PATCH 2/2] service: fix for RestartMode=direct option With the fix done in PR28215, the unit restart job is created with type JOB_START. Because of that, it is not properly merged anymore with the old one: the merged job has state JOB_RUNNING. It should have state JOB_WAITING. I think that the old job is not cleaned up because we don't go through the failed state. With this fix, the merged job is properly created with state JOB_WAITING. --- src/core/job.c | 4 ++-- src/core/job.h | 2 +- src/core/transaction.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/core/job.c b/src/core/job.c index 3d5e4e42d1..95e71c48e2 100644 --- a/src/core/job.c +++ b/src/core/job.c @@ -217,7 +217,7 @@ static void job_merge_into_installed(Job *j, Job *other) { j->ignore_order = j->ignore_order || other->ignore_order; } -Job* job_install(Job *j) { +Job* job_install(Job *j, JobMode mode) { Job **pj; Job *uj; @@ -235,7 +235,7 @@ Job* job_install(Job *j) { /* not conflicting, i.e. mergeable */ if (uj->state == JOB_WAITING || - (job_type_allows_late_merge(j->type) && job_type_is_superset(uj->type, j->type))) { + (job_type_allows_late_merge(j->type) && mode != JOB_RESTART_DEPENDENCIES && job_type_is_superset(uj->type, j->type))) { job_merge_into_installed(uj, j); log_unit_debug(uj->unit, "Merged %s/%s into installed job %s/%s as %"PRIu32, diff --git a/src/core/job.h b/src/core/job.h index d3b98d98b6..2c4fbdf4ed 100644 --- a/src/core/job.h +++ b/src/core/job.h @@ -169,7 +169,7 @@ Job* job_new(Unit *unit, JobType type); Job* job_new_raw(Unit *unit); void job_unlink(Job *job); Job* job_free(Job *job); -Job* job_install(Job *j); +Job* job_install(Job *j, JobMode mode); int job_install_deserialized(Job *j); void job_uninstall(Job *j); void job_dump(Job *j, FILE *f, const char *prefix); diff --git a/src/core/transaction.c b/src/core/transaction.c index c85bc667ce..8b8e02f1c7 100644 --- a/src/core/transaction.c +++ b/src/core/transaction.c @@ -657,7 +657,7 @@ static int transaction_apply( /* Clean the job dependencies */ transaction_unlink_job(tr, j, false); - installed_job = job_install(j); + installed_job = job_install(j, mode); if (installed_job != j) { /* j has been merged into a previously installed job */ if (tr->anchor_job == j)