mirror of
https://github.com/morgan9e/systemd
synced 2026-04-15 00:47:10 +09:00
Merge pull request #27244 from bluca/uphold_retry
Uphold/StopWhenUnneeded/BindsTo: add retry timer on rate limit
This commit is contained in:
@@ -38,3 +38,12 @@ unsigned ratelimit_num_dropped(RateLimit *r) {
|
||||
|
||||
return r->num > r->burst ? r->num - r->burst : 0;
|
||||
}
|
||||
|
||||
usec_t ratelimit_end(const RateLimit *rl) {
|
||||
assert(rl);
|
||||
|
||||
if (rl->begin == 0)
|
||||
return 0;
|
||||
|
||||
return usec_add(rl->begin, rl->interval);
|
||||
}
|
||||
|
||||
@@ -23,3 +23,5 @@ static inline bool ratelimit_configured(RateLimit *rl) {
|
||||
bool ratelimit_below(RateLimit *r);
|
||||
|
||||
unsigned ratelimit_num_dropped(RateLimit *r);
|
||||
|
||||
usec_t ratelimit_end(const RateLimit *rl);
|
||||
|
||||
@@ -1051,6 +1051,12 @@ finish:
|
||||
job_add_to_gc_queue(other->job);
|
||||
}
|
||||
|
||||
/* Ensure that when an upheld/unneeded/bound unit activation job fails we requeue it, if it still
|
||||
* necessary. If there are no state changes in the triggerer, it would not be retried otherwise. */
|
||||
unit_submit_to_start_when_upheld_queue(u);
|
||||
unit_submit_to_stop_when_bound_queue(u);
|
||||
unit_submit_to_stop_when_unneeded_queue(u);
|
||||
|
||||
manager_check_finished(u->manager);
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -1409,6 +1409,48 @@ static unsigned manager_dispatch_gc_job_queue(Manager *m) {
|
||||
return n;
|
||||
}
|
||||
|
||||
static int manager_ratelimit_requeue(sd_event_source *s, uint64_t usec, void *userdata) {
|
||||
Unit *u = userdata;
|
||||
|
||||
assert(u);
|
||||
assert(s == u->auto_start_stop_event_source);
|
||||
|
||||
u->auto_start_stop_event_source = sd_event_source_unref(u->auto_start_stop_event_source);
|
||||
|
||||
/* Re-queue to all queues, if the rate limit hit we might have been throttled on any of them. */
|
||||
unit_submit_to_stop_when_unneeded_queue(u);
|
||||
unit_submit_to_start_when_upheld_queue(u);
|
||||
unit_submit_to_stop_when_bound_queue(u);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int manager_ratelimit_check_and_queue(Unit *u) {
|
||||
int r;
|
||||
|
||||
assert(u);
|
||||
|
||||
if (ratelimit_below(&u->auto_start_stop_ratelimit))
|
||||
return 1;
|
||||
|
||||
/* Already queued, no need to requeue */
|
||||
if (u->auto_start_stop_event_source)
|
||||
return 0;
|
||||
|
||||
r = sd_event_add_time(
|
||||
u->manager->event,
|
||||
&u->auto_start_stop_event_source,
|
||||
CLOCK_MONOTONIC,
|
||||
ratelimit_end(&u->auto_start_stop_ratelimit),
|
||||
0,
|
||||
manager_ratelimit_requeue,
|
||||
u);
|
||||
if (r < 0)
|
||||
return log_unit_error_errno(u, r, "Failed to queue timer on event loop: %m");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned manager_dispatch_stop_when_unneeded_queue(Manager *m) {
|
||||
unsigned n = 0;
|
||||
Unit *u;
|
||||
@@ -1433,8 +1475,11 @@ static unsigned manager_dispatch_stop_when_unneeded_queue(Manager *m) {
|
||||
/* If stopping a unit fails continuously we might enter a stop loop here, hence stop acting on the
|
||||
* service being unnecessary after a while. */
|
||||
|
||||
if (!ratelimit_below(&u->auto_start_stop_ratelimit)) {
|
||||
log_unit_warning(u, "Unit not needed anymore, but not stopping since we tried this too often recently.");
|
||||
r = manager_ratelimit_check_and_queue(u);
|
||||
if (r <= 0) {
|
||||
log_unit_warning(u,
|
||||
"Unit not needed anymore, but not stopping since we tried this too often recently.%s",
|
||||
r == 0 ? " Will retry later." : "");
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -1472,8 +1517,12 @@ static unsigned manager_dispatch_start_when_upheld_queue(Manager *m) {
|
||||
/* If stopping a unit fails continuously we might enter a stop loop here, hence stop acting on the
|
||||
* service being unnecessary after a while. */
|
||||
|
||||
if (!ratelimit_below(&u->auto_start_stop_ratelimit)) {
|
||||
log_unit_warning(u, "Unit needs to be started because active unit %s upholds it, but not starting since we tried this too often recently.", culprit->id);
|
||||
r = manager_ratelimit_check_and_queue(u);
|
||||
if (r <= 0) {
|
||||
log_unit_warning(u,
|
||||
"Unit needs to be started because active unit %s upholds it, but not starting since we tried this too often recently.%s",
|
||||
culprit->id,
|
||||
r == 0 ? " Will retry later." : "");
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -1510,8 +1559,12 @@ static unsigned manager_dispatch_stop_when_bound_queue(Manager *m) {
|
||||
/* If stopping a unit fails continuously we might enter a stop loop here, hence stop acting on the
|
||||
* service being unnecessary after a while. */
|
||||
|
||||
if (!ratelimit_below(&u->auto_start_stop_ratelimit)) {
|
||||
log_unit_warning(u, "Unit needs to be stopped because it is bound to inactive unit %s it, but not stopping since we tried this too often recently.", culprit->id);
|
||||
r = manager_ratelimit_check_and_queue(u);
|
||||
if (r <= 0) {
|
||||
log_unit_warning(u,
|
||||
"Unit needs to be stopped because it is bound to inactive unit %s it, but not stopping since we tried this too often recently.%s",
|
||||
culprit->id,
|
||||
r == 0 ? " Will retry later." : "");
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
@@ -732,6 +732,8 @@ Unit* unit_free(Unit *u) {
|
||||
if (!u)
|
||||
return NULL;
|
||||
|
||||
sd_event_source_disable_unref(u->auto_start_stop_event_source);
|
||||
|
||||
u->transient_file = safe_fclose(u->transient_file);
|
||||
|
||||
if (!MANAGER_IS_RELOADING(u->manager))
|
||||
|
||||
@@ -350,6 +350,7 @@ typedef struct Unit {
|
||||
|
||||
/* Make sure we never enter endless loops with the StopWhenUnneeded=, BindsTo=, Uphold= logic */
|
||||
RateLimit auto_start_stop_ratelimit;
|
||||
sd_event_source *auto_start_stop_event_source;
|
||||
|
||||
/* Reference to a specific UID/GID */
|
||||
uid_t ref_uid;
|
||||
|
||||
9
test/units/testsuite-57-retry-fail.service
Normal file
9
test/units/testsuite-57-retry-fail.service
Normal file
@@ -0,0 +1,9 @@
|
||||
# SPDX-License-Identifier: LGPL-2.1-or-later
|
||||
[Unit]
|
||||
Description=Failed Dependency Unit
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
RemainAfterExit=yes
|
||||
ExecStart=/bin/sh -c "if [ -f /tmp/testsuite-57-retry-fail ]; then exit 0; else exit 1; fi"
|
||||
Restart=no
|
||||
10
test/units/testsuite-57-retry-upheld.service
Normal file
10
test/units/testsuite-57-retry-upheld.service
Normal file
@@ -0,0 +1,10 @@
|
||||
# SPDX-License-Identifier: LGPL-2.1-or-later
|
||||
[Unit]
|
||||
Description=Upheld Unit
|
||||
Requires=testsuite-57-retry-fail.service
|
||||
After=testsuite-57-retry-fail.service
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
RemainAfterExit=yes
|
||||
ExecStart=/bin/echo ok
|
||||
7
test/units/testsuite-57-retry-uphold.service
Normal file
7
test/units/testsuite-57-retry-uphold.service
Normal file
@@ -0,0 +1,7 @@
|
||||
# SPDX-License-Identifier: LGPL-2.1-or-later
|
||||
[Unit]
|
||||
Description=Upholding Unit
|
||||
Upholds=testsuite-57-retry-upheld.service
|
||||
|
||||
[Service]
|
||||
ExecStart=/bin/sleep infinity
|
||||
@@ -26,6 +26,33 @@ done
|
||||
|
||||
systemctl stop testsuite-57-uphold.service
|
||||
|
||||
# Idea is this:
|
||||
# 1. we start testsuite-57-retry-uphold.service
|
||||
# 2. which through Uphold= starts testsuite-57-retry-upheld.service
|
||||
# 3. which through Requires= starts testsuite-57-retry-fail.service
|
||||
# 4. which fails as /tmp/testsuite-57-retry-fail does not exist, so testsuite-57-retry-upheld.service
|
||||
# is no longer restarted
|
||||
# 5. we create /tmp/testsuite-57-retry-fail
|
||||
# 6. now testsuite-57-retry-upheld.service will be restarted since upheld, and its dependency will
|
||||
# be satisfied
|
||||
|
||||
rm -f /tmp/testsuite-57-retry-fail
|
||||
systemctl start testsuite-57-retry-uphold.service
|
||||
|
||||
while ! systemctl is-failed testsuite-57-retry-fail.service ; do
|
||||
sleep .5
|
||||
done
|
||||
|
||||
systemctl is-active testsuite-57-retry-upheld.service && { echo 'unexpected success'; exit 1; }
|
||||
|
||||
touch /tmp/testsuite-57-retry-fail
|
||||
|
||||
while ! systemctl is-active testsuite-57-retry-upheld.service ; do
|
||||
sleep .5
|
||||
done
|
||||
|
||||
systemctl stop testsuite-57-retry-uphold.service testsuite-57-retry-fail.service testsuite-57-retry-upheld.service
|
||||
|
||||
# Idea is this:
|
||||
# 1. we start testsuite-57-prop-stop-one.service
|
||||
# 2. which through Wants=/After= pulls in testsuite-57-prop-stop-two.service as well
|
||||
|
||||
Reference in New Issue
Block a user