mirror of
https://github.com/morgan9e/systemd
synced 2026-04-15 00:47:10 +09:00
This test occasionally fails due to a race where systemd processes kernel's SIGKILL before the OOM notification, so the test service dies with Result=signal instead of the expected Result=oom-kill: [ 51.008765] TEST-55-OOMD.sh[906]: + systemd-run --wait --unit oom-kill -p OOMPolicy=kill -p Delegate=yes -p DelegateSubgroup=init.scope /tmp/script.sh [ 51.048747] TEST-55-OOMD.sh[907]: Running as unit: oom-kill.service; invocation ID: 456645347d554ea2878463404b181bd8 [ 51.066296] sysrq: Manual OOM execution [ 51.066596] kworker/1:0 invoked oom-killer: gfp_mask=0xcc0(GFP_KERNEL), order=-1, oom_score_adj=0 [ 51.066915] CPU: 1 UID: 0 PID: 27 Comm: kworker/1:0 Not tainted 6.17.1-arch1-1 #1 PREEMPT(full) d2b229857b2eb4001337041f41d3c4f131433540 [ 51.066919] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS Arch Linux 1.17.0-2-2 04/01/2014 [ 51.066921] Workqueue: events moom_callback [ 51.066928] Call Trace: [ 51.066931] <TASK> [ 51.066936] dump_stack_lvl+0x5d/0x80 [ 51.066942] dump_header+0x43/0x1aa <...snip...> [ 51.087814] 47583 pages reserved [ 51.087969] 0 pages cma reserved [ 51.088208] 0 pages hwpoisoned [ 51.088519] Out of memory: Killed process 908 (sleep) total-vm:3264kB, anon-rss:256kB, file-rss:1916kB, shmem-rss:0kB, UID:0 pgtables:44kB oom_score_adj:1000 [ 51.090263] TEST-55-OOMD.sh[907]: Finished with result: signal [ 51.094416] TEST-55-OOMD.sh[907]: Main processes terminated with: code=killed, status=9/KILL [ 51.094898] TEST-55-OOMD.sh[907]: Service runtime: 58ms [ 51.095436] TEST-55-OOMD.sh[907]: CPU time consumed: 22ms [ 51.095854] TEST-55-OOMD.sh[907]: Memory peak: 1.6M (swap: 0B) [ 51.096722] TEST-55-OOMD.sh[912]: ++ systemctl show oom-kill -P Result [ 51.106549] TEST-55-OOMD.sh[879]: + assert_eq signal oom-kill [ 51.107394] TEST-55-OOMD.sh[913]: + set +ex [ 51.108256] TEST-55-OOMD.sh[913]: FAIL: expected: 'oom-kill' actual: 'signal' [FAILED] Failed to start TEST-55-OOMD.service. To mitigate this, let's spawn a child process and move it to the subcgroup to get killed instead of the main process, so systemd has more time to react to the OOM notification and terminate the service with the expected oom-kill result.
359 lines
13 KiB
Bash
Executable File
359 lines
13 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# SPDX-License-Identifier: LGPL-2.1-or-later
|
|
set -eux
|
|
set -o pipefail
|
|
|
|
# shellcheck source=test/units/test-control.sh
|
|
. "$(dirname "$0")"/test-control.sh
|
|
# shellcheck source=test/units/util.sh
|
|
. "$(dirname "$0")"/util.sh
|
|
|
|
# Ensure that the init.scope.d drop-in is applied on boot
|
|
test "$(cat /sys/fs/cgroup/init.scope/memory.high)" != "max"
|
|
|
|
# Loose checks to ensure the environment has the necessary features for systemd-oomd
|
|
[[ -e /proc/pressure ]] || echo "no PSI" >>/skipped
|
|
[[ "$(get_cgroup_hierarchy)" == "unified" ]] || echo "no cgroupsv2" >>/skipped
|
|
[[ -x /usr/lib/systemd/systemd-oomd ]] || echo "no oomd" >>/skipped
|
|
if [[ -s /skipped ]]; then
|
|
exit 77
|
|
fi
|
|
|
|
# Activate swap file if we are in a VM
|
|
if systemd-detect-virt --vm --quiet; then
|
|
swapoff --all
|
|
rm -f /swapfile
|
|
if [[ "$(findmnt -n -o FSTYPE /)" == btrfs ]]; then
|
|
btrfs filesystem mkswapfile -s 64M /swapfile
|
|
else
|
|
dd if=/dev/zero of=/swapfile bs=1M count=64
|
|
chmod 0600 /swapfile
|
|
mkswap /swapfile
|
|
fi
|
|
|
|
swapon /swapfile
|
|
swapon --show
|
|
fi
|
|
|
|
# Configure oomd explicitly to avoid conflicts with distro dropins
|
|
mkdir -p /run/systemd/oomd.conf.d/
|
|
cat >/run/systemd/oomd.conf.d/99-oomd-test.conf <<EOF
|
|
[OOM]
|
|
DefaultMemoryPressureDurationSec=2s
|
|
EOF
|
|
|
|
mkdir -p /run/systemd/system/-.slice.d/
|
|
cat >/run/systemd/system/-.slice.d/99-oomd-test.conf <<EOF
|
|
[Slice]
|
|
ManagedOOMSwap=auto
|
|
EOF
|
|
|
|
mkdir -p /run/systemd/system/user@.service.d/
|
|
cat >/run/systemd/system/user@.service.d/99-oomd-test.conf <<EOF
|
|
[Service]
|
|
ManagedOOMMemoryPressure=auto
|
|
ManagedOOMMemoryPressureLimit=0%
|
|
EOF
|
|
|
|
mkdir -p /run/systemd/system/systemd-oomd.service.d/
|
|
cat >/run/systemd/system/systemd-oomd.service.d/debug.conf <<EOF
|
|
[Service]
|
|
Environment=SYSTEMD_LOG_LEVEL=debug
|
|
EOF
|
|
|
|
systemctl daemon-reload
|
|
|
|
# enable the service to ensure dbus-org.freedesktop.oom1.service exists
|
|
# and D-Bus activation works
|
|
systemctl enable systemd-oomd.service
|
|
|
|
# if oomd is already running for some reasons, then restart it to make sure the above settings to be applied
|
|
if systemctl is-active systemd-oomd.service; then
|
|
systemctl restart systemd-oomd.service
|
|
fi
|
|
|
|
# Check if the oomd.conf drop-in config is loaded.
|
|
assert_in 'Default Memory Pressure Duration: 2s' "$(oomctl)"
|
|
|
|
if [[ -v ASAN_OPTIONS || -v UBSAN_OPTIONS ]]; then
|
|
# If we're running with sanitizers, sd-executor might pull in quite a significant chunk of shared
|
|
# libraries, which in turn causes a lot of pressure that can put us in the front when sd-oomd decides to
|
|
# go on a killing spree. This fact is exacerbated further on Arch Linux which ships unstripped gcc-libs,
|
|
# so sd-executor pulls in over 30M of libs on startup. Let's make the MemoryHigh= limit a bit more
|
|
# generous when running with sanitizers to make the test happy.
|
|
systemctl edit --runtime --stdin --drop-in=99-MemoryHigh.conf TEST-55-OOMD-testchill.service <<EOF
|
|
[Service]
|
|
MemoryHigh=60M
|
|
EOF
|
|
# Do the same for the user instance as well
|
|
mkdir -p /run/systemd/user/
|
|
cp -rfv /run/systemd/system/TEST-55-OOMD-testchill.service.d/ /run/systemd/user/
|
|
else
|
|
# Ensure that we can start services even with a very low hard memory cap without oom-kills, but skip
|
|
# under sanitizers as they balloon memory usage.
|
|
systemd-run -t -p MemoryMax=10M -p MemorySwapMax=0 -p MemoryZSwapMax=0 true
|
|
fi
|
|
|
|
test_basic() {
|
|
local cgroup_path="${1:?}"
|
|
shift
|
|
|
|
systemctl "$@" start TEST-55-OOMD-testchill.service
|
|
systemctl "$@" status TEST-55-OOMD-testchill.service
|
|
systemctl "$@" status TEST-55-OOMD-workload.slice
|
|
|
|
# Verify systemd-oomd is monitoring the expected units.
|
|
timeout 1m bash -xec "until oomctl | grep -q -F 'Path: $cgroup_path'; do sleep 1; done"
|
|
assert_in 'Memory Pressure Limit: 20.00%' \
|
|
"$(oomctl | tac | sed -e '/Memory Pressure Monitored CGroups:/q' | tac | grep -A8 "Path: $cgroup_path")"
|
|
|
|
systemctl "$@" start TEST-55-OOMD-testbloat.service
|
|
|
|
# systemd-oomd watches for elevated pressure for 2 seconds before acting.
|
|
# It can take time to build up pressure so either wait 2 minutes or for the service to fail.
|
|
for _ in {0..59}; do
|
|
if ! systemctl "$@" status TEST-55-OOMD-testbloat.service; then
|
|
break
|
|
fi
|
|
oomctl
|
|
sleep 2
|
|
done
|
|
|
|
# testbloat should be killed and testchill should be fine
|
|
if systemctl "$@" status TEST-55-OOMD-testbloat.service; then exit 42; fi
|
|
if ! systemctl "$@" status TEST-55-OOMD-testchill.service; then exit 24; fi
|
|
|
|
assert_eq "$(systemctl "$@" show TEST-55-OOMD-testbloat.service -P ManagedOOMKills)" "1"
|
|
|
|
systemctl "$@" kill --signal=KILL TEST-55-OOMD-testbloat.service || :
|
|
systemctl "$@" stop TEST-55-OOMD-testbloat.service
|
|
systemctl "$@" stop TEST-55-OOMD-testchill.service
|
|
systemctl "$@" stop TEST-55-OOMD-workload.slice
|
|
}
|
|
|
|
testcase_basic_system() {
|
|
test_basic /TEST.slice/TEST-55.slice/TEST-55-OOMD.slice/TEST-55-OOMD-workload.slice
|
|
}
|
|
|
|
testcase_basic_user() {
|
|
# Make sure we also work correctly on user units.
|
|
loginctl enable-linger testuser
|
|
|
|
test_basic "/user.slice/user-$(id -u testuser).slice/user@$(id -u testuser).service/TEST.slice/TEST-55.slice/TEST-55-OOMD.slice/TEST-55-OOMD-workload.slice" \
|
|
--machine "testuser@.host" --user
|
|
|
|
loginctl disable-linger testuser
|
|
}
|
|
|
|
testcase_preference_avoid() {
|
|
# only run this portion of the test if we can set xattrs
|
|
if ! cgroupfs_supports_user_xattrs; then
|
|
echo "cgroup does not support user xattrs, skipping test for ManagedOOMPreference=avoid"
|
|
return 0
|
|
fi
|
|
|
|
mkdir -p /run/systemd/system/TEST-55-OOMD-testbloat.service.d/
|
|
cat >/run/systemd/system/TEST-55-OOMD-testbloat.service.d/99-managed-oom-preference.conf <<EOF
|
|
[Service]
|
|
ManagedOOMPreference=avoid
|
|
EOF
|
|
|
|
systemctl daemon-reload
|
|
systemctl start TEST-55-OOMD-testchill.service
|
|
systemctl start TEST-55-OOMD-testmunch.service
|
|
systemctl start TEST-55-OOMD-testbloat.service
|
|
|
|
for _ in {0..59}; do
|
|
if ! systemctl status TEST-55-OOMD-testmunch.service; then
|
|
break
|
|
fi
|
|
oomctl
|
|
sleep 2
|
|
done
|
|
|
|
# testmunch should be killed since testbloat had the avoid xattr on it
|
|
if ! systemctl status TEST-55-OOMD-testbloat.service; then exit 25; fi
|
|
if systemctl status TEST-55-OOMD-testmunch.service; then exit 43; fi
|
|
if ! systemctl status TEST-55-OOMD-testchill.service; then exit 24; fi
|
|
|
|
systemctl kill --signal=KILL TEST-55-OOMD-testbloat.service || :
|
|
systemctl kill --signal=KILL TEST-55-OOMD-testmunch.service || :
|
|
systemctl stop TEST-55-OOMD-testbloat.service
|
|
systemctl stop TEST-55-OOMD-testmunch.service
|
|
systemctl stop TEST-55-OOMD-testchill.service
|
|
systemctl stop TEST-55-OOMD-workload.slice
|
|
|
|
# clean up overrides since test cases can be run in any order
|
|
# and overrides shouldn't affect other tests
|
|
rm -rf /run/systemd/system/TEST-55-OOMD-testbloat.service.d
|
|
systemctl daemon-reload
|
|
}
|
|
|
|
testcase_duration_analyze() {
|
|
# Verify memory pressure duration is valid if >= 1 second
|
|
cat <<EOF >/tmp/TEST-55-OOMD-valid-duration.service
|
|
[Service]
|
|
ExecStart=echo hello
|
|
ManagedOOMMemoryPressureDurationSec=1s
|
|
EOF
|
|
|
|
# Verify memory pressure duration is invalid if < 1 second
|
|
cat <<EOF >/tmp/TEST-55-OOMD-invalid-duration.service
|
|
[Service]
|
|
ExecStart=echo hello
|
|
ManagedOOMMemoryPressureDurationSec=0
|
|
EOF
|
|
|
|
systemd-analyze --recursive-errors=no verify /tmp/TEST-55-OOMD-valid-duration.service
|
|
(! systemd-analyze --recursive-errors=no verify /tmp/TEST-55-OOMD-invalid-duration.service)
|
|
|
|
rm -f /tmp/TEST-55-OOMD-valid-duration.service
|
|
rm -f /tmp/TEST-55-OOMD-invalid-duration.service
|
|
}
|
|
|
|
testcase_duration_override() {
|
|
# Verify memory pressure duration can be overridden to non-zero values
|
|
mkdir -p /run/systemd/system/TEST-55-OOMD-testmunch.service.d/
|
|
cat >/run/systemd/system/TEST-55-OOMD-testmunch.service.d/99-duration-test.conf <<EOF
|
|
[Service]
|
|
ManagedOOMMemoryPressureDurationSec=3s
|
|
ManagedOOMMemoryPressure=kill
|
|
EOF
|
|
|
|
# Verify memory pressure duration will use default if set to empty
|
|
mkdir -p /run/systemd/system/TEST-55-OOMD-testchill.service.d/
|
|
cat >/run/systemd/system/TEST-55-OOMD-testchill.service.d/99-duration-test.conf <<EOF
|
|
[Service]
|
|
ManagedOOMMemoryPressureDurationSec=
|
|
ManagedOOMMemoryPressure=kill
|
|
EOF
|
|
|
|
systemctl daemon-reload
|
|
systemctl start TEST-55-OOMD-testmunch.service
|
|
systemctl start TEST-55-OOMD-testchill.service
|
|
|
|
timeout 1m bash -xec 'until oomctl | grep "/TEST-55-OOMD-testmunch.service"; do sleep 1; done'
|
|
oomctl | grep -A 2 "/TEST-55-OOMD-testmunch.service" | grep "Memory Pressure Duration: 3s"
|
|
|
|
timeout 1m bash -xec 'until oomctl | grep "/TEST-55-OOMD-testchill.service"; do sleep 1; done'
|
|
oomctl | grep -A 2 "/TEST-55-OOMD-testchill.service" | grep "Memory Pressure Duration: 2s"
|
|
|
|
[[ "$(systemctl show -P ManagedOOMMemoryPressureDurationUSec TEST-55-OOMD-testmunch.service)" == "3s" ]]
|
|
[[ "$(systemctl show -P ManagedOOMMemoryPressureDurationUSec TEST-55-OOMD-testchill.service)" == "[not set]" ]]
|
|
|
|
for _ in {0..59}; do
|
|
if ! systemctl status TEST-55-OOMD-testmunch.service; then
|
|
break
|
|
fi
|
|
oomctl
|
|
sleep 2
|
|
done
|
|
|
|
if systemctl status TEST-55-OOMD-testmunch.service; then exit 44; fi
|
|
if ! systemctl status TEST-55-OOMD-testchill.service; then exit 23; fi
|
|
|
|
systemctl kill --signal=KILL TEST-55-OOMD-testmunch.service || :
|
|
systemctl stop TEST-55-OOMD-testmunch.service
|
|
systemctl stop TEST-55-OOMD-testchill.service
|
|
systemctl stop TEST-55-OOMD-workload.slice
|
|
|
|
# clean up overrides since test cases can be run in any order
|
|
# and overrides shouldn't affect other tests
|
|
rm -rf /run/systemd/system/TEST-55-OOMD-testmunch.service.d
|
|
rm -rf /run/systemd/system/TEST-55-OOMD-testchill.service.d
|
|
systemctl daemon-reload
|
|
}
|
|
|
|
testcase_reload() {
|
|
# Check if the oomd.conf drop-in config is loaded.
|
|
assert_in 'Swap Used Limit: 90.00%' "$(oomctl)"
|
|
assert_in 'Default Memory Pressure Limit: 60.00%' "$(oomctl)"
|
|
assert_in 'Default Memory Pressure Duration: 2s' "$(oomctl)"
|
|
|
|
# Test oomd reload
|
|
mkdir -p /run/systemd/oomd.conf.d/
|
|
{
|
|
echo "[OOM]"
|
|
echo "SwapUsedLimit=80%"
|
|
echo "DefaultMemoryPressureLimit=55%"
|
|
echo "DefaultMemoryPressureDurationSec=5s"
|
|
} >/run/systemd/oomd.conf.d/99-oomd-test.conf
|
|
|
|
systemctl reload systemd-oomd.service
|
|
assert_in 'Swap Used Limit: 80.00%' "$(oomctl)"
|
|
assert_in 'Default Memory Pressure Limit: 55.00%' "$(oomctl)"
|
|
assert_in 'Default Memory Pressure Duration: 5s' "$(oomctl)"
|
|
|
|
# Set back to default via reload
|
|
mkdir -p /run/systemd/oomd.conf.d/
|
|
{
|
|
echo "[OOM]"
|
|
echo "DefaultMemoryPressureDurationSec=2s"
|
|
} >/run/systemd/oomd.conf.d/99-oomd-test.conf
|
|
|
|
systemctl reload systemd-oomd.service
|
|
|
|
assert_in 'Swap Used Limit: 90.00%' "$(oomctl)"
|
|
assert_in 'Default Memory Pressure Limit: 60.00%' "$(oomctl)"
|
|
assert_in 'Default Memory Pressure Duration: 2s' "$(oomctl)"
|
|
}
|
|
|
|
testcase_kernel_oom() {
|
|
cat >/tmp/script.sh <<"EOF"
|
|
#!/usr/bin/env bash
|
|
set -x
|
|
choom --adjust '+1000' -- bash -c 'echo f >/proc/sysrq-trigger && exec sleep infinity'
|
|
choom --adjust '+1000' -p $$
|
|
echo f >/proc/sysrq-trigger
|
|
exec sleep infinity
|
|
EOF
|
|
chmod +x /tmp/script.sh
|
|
|
|
(! systemd-run --wait --unit oom-kill -p OOMPolicy=continue /tmp/script.sh)
|
|
# With OOMPolicy=continue, we shouldn't get the oom-kill result.
|
|
assert_eq "$(systemctl show oom-kill -P Result)" "signal"
|
|
# Check that OOMKills reports 2 individual processes killed.
|
|
assert_eq "$(systemctl show oom-kill -P OOMKills)" "2"
|
|
systemctl reset-failed
|
|
|
|
(! systemd-run --wait --unit oom-kill -p OOMPolicy=kill /tmp/script.sh)
|
|
# Check that a regular kernel oom kill with OOMPolicy=kill results in the oom-kill result.
|
|
assert_eq "$(systemctl show oom-kill -P Result)" "oom-kill"
|
|
# Check that OOMKills reports 1 oom group kill instead of the number of processes that were killed.
|
|
assert_eq "$(systemctl show oom-kill -P OOMKills)" "1"
|
|
systemctl reset-failed
|
|
|
|
cat >/tmp/script.sh <<"EOF"
|
|
#!/usr/bin/env bash
|
|
set -x
|
|
echo '+memory' >/sys/fs/cgroup/system.slice/oom-kill.service/cgroup.subtree_control
|
|
mkdir /sys/fs/cgroup/system.slice/oom-kill.service/sub
|
|
echo 1 >/sys/fs/cgroup/system.slice/oom-kill.service/sub/memory.oom.group
|
|
|
|
# Start a child process in the subcgroup that will trigger OOM and be killed but keep the main process
|
|
# outside the subcgroup to avoid a race condition where the kernel SIGKILLs the main process before systemd
|
|
# can process the OOM notification. With the main process still alive, systemd should have time to receive
|
|
# the OOM event and enter the 'oom-kill' state before the service exits.
|
|
(
|
|
echo $BASHPID >/sys/fs/cgroup/system.slice/oom-kill.service/sub/cgroup.procs
|
|
choom --adjust '+1000' -p $BASHPID
|
|
echo f >/proc/sysrq-trigger
|
|
exec sleep infinity
|
|
) &
|
|
wait $! || :
|
|
exec sleep infinity
|
|
EOF
|
|
chmod +x /tmp/script.sh
|
|
|
|
(! systemd-run --wait --unit oom-kill -p OOMPolicy=kill -p Delegate=yes -p DelegateSubgroup=init.scope /tmp/script.sh)
|
|
# Test that an oom-kill in a delegated unit in a subcgroup with memory.oom.group=1 also results in the
|
|
# oom-kill exit status.
|
|
assert_eq "$(systemctl show oom-kill -P Result)" "oom-kill"
|
|
assert_eq "$(systemctl show oom-kill -P OOMKills)" "1"
|
|
systemctl reset-failed
|
|
}
|
|
|
|
run_testcases
|
|
|
|
touch /testok
|