core: increment start limit counter only when we can start the unit

Otherwise, e.g. requesting to start a unit that is under stopping may
enter the failed state.

This makes
- rename .can_start() -> .test_startable(), and make it allow to return
  boolean and refuse to start units when it returns false,
- refuse earlier to start units that are in the deactivating state, so
  several redundant conditions in .start() can be dropped,
- move checks for unit states mapped to UNIT_ACTIVATING from .start() to
  .test_startable().

Fixes #39247.
This commit is contained in:
Yu Watanabe
2025-10-20 19:40:28 +09:00
committed by Mike Yuan
parent c05758663b
commit 8eefd0f4de
10 changed files with 94 additions and 85 deletions

View File

@@ -1039,7 +1039,7 @@ static bool automount_supported(void) {
return supported;
}
static int automount_can_start(Unit *u) {
static int automount_test_startable(Unit *u) {
Automount *a = ASSERT_PTR(AUTOMOUNT(u));
int r;
@@ -1049,7 +1049,7 @@ static int automount_can_start(Unit *u) {
return r;
}
return 1;
return true;
}
static const char* const automount_result_table[_AUTOMOUNT_RESULT_MAX] = {
@@ -1115,5 +1115,5 @@ const UnitVTable automount_vtable = {
},
},
.can_start = automount_can_start,
.test_startable = automount_test_startable,
};

View File

@@ -1363,19 +1363,6 @@ static int mount_start(Unit *u) {
Mount *m = ASSERT_PTR(MOUNT(u));
int r;
/* We cannot fulfill this request right now, try again later
* please! */
if (IN_SET(m->state,
MOUNT_UNMOUNTING,
MOUNT_UNMOUNTING_SIGTERM,
MOUNT_UNMOUNTING_SIGKILL,
MOUNT_CLEANING))
return -EAGAIN;
/* Already on it! */
if (IN_SET(m->state, MOUNT_MOUNTING, MOUNT_MOUNTING_DONE))
return 0;
assert(IN_SET(m->state, MOUNT_DEAD, MOUNT_FAILED));
r = unit_acquire_invocation_id(u);
@@ -2382,10 +2369,14 @@ static int mount_can_clean(Unit *u, ExecCleanMask *ret) {
return exec_context_get_clean_mask(&m->exec_context, ret);
}
static int mount_can_start(Unit *u) {
static int mount_test_startable(Unit *u) {
Mount *m = ASSERT_PTR(MOUNT(u));
int r;
/* It is already being started. */
if (IN_SET(m->state, MOUNT_MOUNTING, MOUNT_MOUNTING_DONE))
return false;
r = unit_test_start_limit(u);
if (r < 0) {
mount_enter_dead(m, MOUNT_FAILURE_START_LIMIT_HIT, /* flush_result = */ false);
@@ -2395,7 +2386,7 @@ static int mount_can_start(Unit *u) {
if (!get_mount_parameters_fragment(m))
return -ENOENT;
return 1;
return true;
}
static int mount_subsystem_ratelimited(Manager *m) {
@@ -2561,7 +2552,7 @@ const UnitVTable mount_vtable = {
},
},
.can_start = mount_can_start,
.test_startable = mount_test_startable,
.notify_plymouth = true,
};

View File

@@ -898,7 +898,7 @@ static void path_reset_failed(Unit *u) {
p->result = PATH_SUCCESS;
}
static int path_can_start(Unit *u) {
static int path_test_startable(Unit *u) {
Path *p = ASSERT_PTR(PATH(u));
int r;
@@ -908,7 +908,7 @@ static int path_can_start(Unit *u) {
return r;
}
return 1;
return true;
}
static void activation_details_path_done(ActivationDetails *details) {
@@ -1042,7 +1042,7 @@ const UnitVTable path_vtable = {
.bus_set_property = bus_path_set_property,
.can_start = path_can_start,
.test_startable = path_test_startable,
};
const ActivationDetailsVTable activation_details_path_vtable = {

View File

@@ -2951,17 +2951,6 @@ static int service_start(Unit *u) {
Service *s = ASSERT_PTR(SERVICE(u));
int r;
/* We cannot fulfill this request right now, try again later
* please! */
if (IN_SET(s->state,
SERVICE_STOP, SERVICE_STOP_WATCHDOG, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
SERVICE_FINAL_WATCHDOG, SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL, SERVICE_CLEANING))
return -EAGAIN;
/* Already on it! */
if (IN_SET(s->state, SERVICE_CONDITION, SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST))
return 0;
if (s->state == SERVICE_AUTO_RESTART) {
/* As mentioned in unit_start(), we allow manual starts to act as "hurry up" signals
* for auto restart. We need to re-enqueue the job though, as the job type has changed
@@ -5558,10 +5547,17 @@ static const char* service_finished_job(Unit *u, JobType t, JobResult result) {
return NULL;
}
static int service_can_start(Unit *u) {
static int service_test_startable(Unit *u) {
Service *s = ASSERT_PTR(SERVICE(u));
int r;
/* First check the state, and do not increment start limit counter if the service cannot start due to
* that e.g. it is already being started. Note, the service states mapped to UNIT_ACTIVE,
* UNIT_RELOADING, UNIT_DEACTIVATING, UNIT_MAINTENANCE, and UNIT_REFRESHING are already filtered in
* unit_start(). Hence, here we only need to check states that mapped to UNIT_ACTIVATING. */
if (IN_SET(s->state, SERVICE_CONDITION, SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST))
return false;
/* Make sure we don't enter a busy loop of some kind. */
r = unit_test_start_limit(u);
if (r < 0) {
@@ -5569,7 +5565,7 @@ static int service_can_start(Unit *u) {
return r;
}
return 1;
return true;
}
static void service_release_resources(Unit *u) {
@@ -5851,7 +5847,7 @@ const UnitVTable service_vtable = {
.finished_job = service_finished_job,
},
.can_start = service_can_start,
.test_startable = service_test_startable,
.notify_plymouth = true,

View File

@@ -2623,26 +2623,6 @@ static int socket_start(Unit *u) {
Socket *s = ASSERT_PTR(SOCKET(u));
int r;
/* We cannot fulfill this request right now, try again later
* please! */
if (IN_SET(s->state,
SOCKET_STOP_PRE,
SOCKET_STOP_PRE_SIGKILL,
SOCKET_STOP_PRE_SIGTERM,
SOCKET_STOP_POST,
SOCKET_FINAL_SIGTERM,
SOCKET_FINAL_SIGKILL,
SOCKET_CLEANING))
return -EAGAIN;
/* Already on it! */
if (IN_SET(s->state,
SOCKET_START_PRE,
SOCKET_START_OPEN,
SOCKET_START_CHOWN,
SOCKET_START_POST))
return 0;
/* Cannot run this without the service being around */
if (UNIT_ISSET(s->service)) {
Service *service = ASSERT_PTR(SERVICE(UNIT_DEREF(s->service)));
@@ -3650,17 +3630,25 @@ static int socket_can_clean(Unit *u, ExecCleanMask *ret) {
return exec_context_get_clean_mask(&s->exec_context, ret);
}
static int socket_can_start(Unit *u) {
static int socket_test_startable(Unit *u) {
Socket *s = ASSERT_PTR(SOCKET(u));
int r;
/* It is already being started. */
if (IN_SET(s->state,
SOCKET_START_PRE,
SOCKET_START_OPEN,
SOCKET_START_CHOWN,
SOCKET_START_POST))
return false;
r = unit_test_start_limit(u);
if (r < 0) {
socket_enter_dead(s, SOCKET_FAILURE_START_LIMIT_HIT);
return r;
}
return 1;
return true;
}
static const char* const socket_exec_command_table[_SOCKET_EXEC_COMMAND_MAX] = {
@@ -3801,5 +3789,5 @@ const UnitVTable socket_vtable = {
},
},
.can_start = socket_can_start,
.test_startable = socket_test_startable,
};

View File

@@ -864,19 +864,6 @@ static int swap_start(Unit *u) {
int r;
assert(s);
/* We cannot fulfill this request right now, try again later please! */
if (IN_SET(s->state,
SWAP_DEACTIVATING,
SWAP_DEACTIVATING_SIGTERM,
SWAP_DEACTIVATING_SIGKILL,
SWAP_CLEANING))
return -EAGAIN;
/* Already on it! */
if (s->state == SWAP_ACTIVATING)
return 0;
assert(IN_SET(s->state, SWAP_DEAD, SWAP_FAILED));
if (detect_container() > 0)
@@ -1521,17 +1508,21 @@ static int swap_can_clean(Unit *u, ExecCleanMask *ret) {
return exec_context_get_clean_mask(&s->exec_context, ret);
}
static int swap_can_start(Unit *u) {
static int swap_test_startable(Unit *u) {
Swap *s = ASSERT_PTR(SWAP(u));
int r;
/* It is already being started. */
if (s->state == SWAP_ACTIVATING)
return false;
r = unit_test_start_limit(u);
if (r < 0) {
swap_enter_dead(s, SWAP_FAILURE_START_LIMIT_HIT);
return r;
}
return 1;
return true;
}
int swap_get_priority(const Swap *s) {
@@ -1652,7 +1643,7 @@ const UnitVTable swap_vtable = {
},
},
.can_start = swap_can_start,
.test_startable = swap_test_startable,
.notify_plymouth = true,
};

View File

@@ -906,7 +906,7 @@ static int timer_can_clean(Unit *u, ExecCleanMask *ret) {
return 0;
}
static int timer_can_start(Unit *u) {
static int timer_test_startable(Unit *u) {
Timer *t = ASSERT_PTR(TIMER(u));
int r;
@@ -916,7 +916,7 @@ static int timer_can_start(Unit *u) {
return r;
}
return 1;
return true;
}
static void activation_details_timer_serialize(const ActivationDetails *details, FILE *f) {
@@ -1093,7 +1093,7 @@ const UnitVTable timer_vtable = {
.bus_set_property = bus_timer_set_property,
.can_start = timer_can_start,
.test_startable = timer_test_startable,
};
const ActivationDetailsVTable activation_details_timer_vtable = {

View File

@@ -1936,7 +1936,7 @@ int unit_start(Unit *u, ActivationDetails *details) {
state = unit_active_state(u);
if (UNIT_IS_ACTIVE_OR_RELOADING(state))
return -EALREADY;
if (state == UNIT_MAINTENANCE)
if (IN_SET(state, UNIT_DEACTIVATING, UNIT_MAINTENANCE))
return -EAGAIN;
/* Units that aren't loaded cannot be started */
@@ -1983,10 +1983,11 @@ int unit_start(Unit *u, ActivationDetails *details) {
if (u->freezer_state != FREEZER_RUNNING)
return -EDEADLK;
/* Check our ability to start early so that failure conditions don't cause us to enter a busy loop. */
if (UNIT_VTABLE(u)->can_start) {
r = UNIT_VTABLE(u)->can_start(u);
if (r < 0)
/* Check our ability to start early so that ratelimited or already starting/started units don't
* cause us to enter a busy loop. */
if (UNIT_VTABLE(u)->test_startable) {
r = UNIT_VTABLE(u)->test_startable(u);
if (r <= 0)
return r;
}

View File

@@ -726,8 +726,8 @@ typedef struct UnitVTable {
bool (*supported)(void);
/* If this function is set, it's invoked first as part of starting a unit to allow start rate
* limiting checks to occur before we do anything else. */
int (*can_start)(Unit *u);
* limiting checks and unit state checks to occur before we do anything else. */
int (*test_startable)(Unit *u);
/* Returns > 0 if the whole subsystem is ratelimited, and new start operations should not be started
* for this unit type right now. */

View File

@@ -0,0 +1,42 @@
#!/usr/bin/env bash
# SPDX-License-Identifier: LGPL-2.1-or-later
set -eux
set -o pipefail
# For issue #39247.
at_exit() {
set +e
rm -rf /run/systemd/system/systemd-resolved.service.d/
systemctl daemon-reload
systemctl restart systemd-resolved.service
}
trap at_exit EXIT
mkdir -p /run/systemd/system/systemd-resolved.service.d/
cat >/run/systemd/system/systemd-resolved.service.d/99-start-limit.conf <<EOF
[Unit]
StartLimitBurst=5
StartLimitInterval=30
[Service]
ExecStopPost=sleep 10
EOF
systemctl daemon-reload
systemctl restart systemd-resolved.service
systemctl reset-failed systemd-resolved.service
systemctl status --no-pager systemd-resolved.service
systemctl show systemd-resolved.service | grep StartLimit
for i in {1..5}; do
echo "Start #$i"
systemctl stop --no-block systemd-resolved.service
if ! resolvectl; then
journalctl -o short-monotonic --no-hostname --no-pager -u systemd-resolved.service -n 15
exit 1
fi
done