Files
systemd/test/units/TEST-55-OOMD.sh
Frantisek Sumsal 1ec5d43a0e test: let kernel OOM-kill a child process instead of the main one
This test occasionally fails due to a race where systemd processes
kernel's SIGKILL before the OOM notification, so the test service dies
with Result=signal instead of the expected Result=oom-kill:

[   51.008765] TEST-55-OOMD.sh[906]: + systemd-run --wait --unit oom-kill -p OOMPolicy=kill -p Delegate=yes -p DelegateSubgroup=init.scope /tmp/script.sh
[   51.048747] TEST-55-OOMD.sh[907]: Running as unit: oom-kill.service; invocation ID: 456645347d554ea2878463404b181bd8
[   51.066296] sysrq: Manual OOM execution
[   51.066596] kworker/1:0 invoked oom-killer: gfp_mask=0xcc0(GFP_KERNEL), order=-1, oom_score_adj=0
[   51.066915] CPU: 1 UID: 0 PID: 27 Comm: kworker/1:0 Not tainted 6.17.1-arch1-1 #1 PREEMPT(full)  d2b229857b2eb4001337041f41d3c4f131433540
[   51.066919] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS Arch Linux 1.17.0-2-2 04/01/2014
[   51.066921] Workqueue: events moom_callback
[   51.066928] Call Trace:
[   51.066931]  <TASK>
[   51.066936]  dump_stack_lvl+0x5d/0x80
[   51.066942]  dump_header+0x43/0x1aa
<...snip...>
[   51.087814] 47583 pages reserved
[   51.087969] 0 pages cma reserved
[   51.088208] 0 pages hwpoisoned
[   51.088519] Out of memory: Killed process 908 (sleep) total-vm:3264kB, anon-rss:256kB, file-rss:1916kB, shmem-rss:0kB, UID:0 pgtables:44kB oom_score_adj:1000
[   51.090263] TEST-55-OOMD.sh[907]:           Finished with result: signal
[   51.094416] TEST-55-OOMD.sh[907]: Main processes terminated with: code=killed, status=9/KILL
[   51.094898] TEST-55-OOMD.sh[907]:                Service runtime: 58ms
[   51.095436] TEST-55-OOMD.sh[907]:              CPU time consumed: 22ms
[   51.095854] TEST-55-OOMD.sh[907]:                    Memory peak: 1.6M (swap: 0B)
[   51.096722] TEST-55-OOMD.sh[912]: ++ systemctl show oom-kill -P Result
[   51.106549] TEST-55-OOMD.sh[879]: + assert_eq signal oom-kill
[   51.107394] TEST-55-OOMD.sh[913]: + set +ex
[   51.108256] TEST-55-OOMD.sh[913]: FAIL: expected: 'oom-kill' actual: 'signal'
[FAILED] Failed to start TEST-55-OOMD.service.

To mitigate this, let's spawn a child process and move it to the
subcgroup to get killed instead of the main process, so systemd has more
time to react to the OOM notification and terminate the service with the
expected oom-kill result.
2025-10-17 15:19:16 +02:00

359 lines
13 KiB
Bash
Executable File

#!/usr/bin/env bash
# SPDX-License-Identifier: LGPL-2.1-or-later
set -eux
set -o pipefail
# shellcheck source=test/units/test-control.sh
. "$(dirname "$0")"/test-control.sh
# shellcheck source=test/units/util.sh
. "$(dirname "$0")"/util.sh
# Ensure that the init.scope.d drop-in is applied on boot
test "$(cat /sys/fs/cgroup/init.scope/memory.high)" != "max"
# Loose checks to ensure the environment has the necessary features for systemd-oomd
[[ -e /proc/pressure ]] || echo "no PSI" >>/skipped
[[ "$(get_cgroup_hierarchy)" == "unified" ]] || echo "no cgroupsv2" >>/skipped
[[ -x /usr/lib/systemd/systemd-oomd ]] || echo "no oomd" >>/skipped
if [[ -s /skipped ]]; then
exit 77
fi
# Activate swap file if we are in a VM
if systemd-detect-virt --vm --quiet; then
swapoff --all
rm -f /swapfile
if [[ "$(findmnt -n -o FSTYPE /)" == btrfs ]]; then
btrfs filesystem mkswapfile -s 64M /swapfile
else
dd if=/dev/zero of=/swapfile bs=1M count=64
chmod 0600 /swapfile
mkswap /swapfile
fi
swapon /swapfile
swapon --show
fi
# Configure oomd explicitly to avoid conflicts with distro dropins
mkdir -p /run/systemd/oomd.conf.d/
cat >/run/systemd/oomd.conf.d/99-oomd-test.conf <<EOF
[OOM]
DefaultMemoryPressureDurationSec=2s
EOF
mkdir -p /run/systemd/system/-.slice.d/
cat >/run/systemd/system/-.slice.d/99-oomd-test.conf <<EOF
[Slice]
ManagedOOMSwap=auto
EOF
mkdir -p /run/systemd/system/user@.service.d/
cat >/run/systemd/system/user@.service.d/99-oomd-test.conf <<EOF
[Service]
ManagedOOMMemoryPressure=auto
ManagedOOMMemoryPressureLimit=0%
EOF
mkdir -p /run/systemd/system/systemd-oomd.service.d/
cat >/run/systemd/system/systemd-oomd.service.d/debug.conf <<EOF
[Service]
Environment=SYSTEMD_LOG_LEVEL=debug
EOF
systemctl daemon-reload
# enable the service to ensure dbus-org.freedesktop.oom1.service exists
# and D-Bus activation works
systemctl enable systemd-oomd.service
# if oomd is already running for some reasons, then restart it to make sure the above settings to be applied
if systemctl is-active systemd-oomd.service; then
systemctl restart systemd-oomd.service
fi
# Check if the oomd.conf drop-in config is loaded.
assert_in 'Default Memory Pressure Duration: 2s' "$(oomctl)"
if [[ -v ASAN_OPTIONS || -v UBSAN_OPTIONS ]]; then
# If we're running with sanitizers, sd-executor might pull in quite a significant chunk of shared
# libraries, which in turn causes a lot of pressure that can put us in the front when sd-oomd decides to
# go on a killing spree. This fact is exacerbated further on Arch Linux which ships unstripped gcc-libs,
# so sd-executor pulls in over 30M of libs on startup. Let's make the MemoryHigh= limit a bit more
# generous when running with sanitizers to make the test happy.
systemctl edit --runtime --stdin --drop-in=99-MemoryHigh.conf TEST-55-OOMD-testchill.service <<EOF
[Service]
MemoryHigh=60M
EOF
# Do the same for the user instance as well
mkdir -p /run/systemd/user/
cp -rfv /run/systemd/system/TEST-55-OOMD-testchill.service.d/ /run/systemd/user/
else
# Ensure that we can start services even with a very low hard memory cap without oom-kills, but skip
# under sanitizers as they balloon memory usage.
systemd-run -t -p MemoryMax=10M -p MemorySwapMax=0 -p MemoryZSwapMax=0 true
fi
test_basic() {
local cgroup_path="${1:?}"
shift
systemctl "$@" start TEST-55-OOMD-testchill.service
systemctl "$@" status TEST-55-OOMD-testchill.service
systemctl "$@" status TEST-55-OOMD-workload.slice
# Verify systemd-oomd is monitoring the expected units.
timeout 1m bash -xec "until oomctl | grep -q -F 'Path: $cgroup_path'; do sleep 1; done"
assert_in 'Memory Pressure Limit: 20.00%' \
"$(oomctl | tac | sed -e '/Memory Pressure Monitored CGroups:/q' | tac | grep -A8 "Path: $cgroup_path")"
systemctl "$@" start TEST-55-OOMD-testbloat.service
# systemd-oomd watches for elevated pressure for 2 seconds before acting.
# It can take time to build up pressure so either wait 2 minutes or for the service to fail.
for _ in {0..59}; do
if ! systemctl "$@" status TEST-55-OOMD-testbloat.service; then
break
fi
oomctl
sleep 2
done
# testbloat should be killed and testchill should be fine
if systemctl "$@" status TEST-55-OOMD-testbloat.service; then exit 42; fi
if ! systemctl "$@" status TEST-55-OOMD-testchill.service; then exit 24; fi
assert_eq "$(systemctl "$@" show TEST-55-OOMD-testbloat.service -P ManagedOOMKills)" "1"
systemctl "$@" kill --signal=KILL TEST-55-OOMD-testbloat.service || :
systemctl "$@" stop TEST-55-OOMD-testbloat.service
systemctl "$@" stop TEST-55-OOMD-testchill.service
systemctl "$@" stop TEST-55-OOMD-workload.slice
}
testcase_basic_system() {
test_basic /TEST.slice/TEST-55.slice/TEST-55-OOMD.slice/TEST-55-OOMD-workload.slice
}
testcase_basic_user() {
# Make sure we also work correctly on user units.
loginctl enable-linger testuser
test_basic "/user.slice/user-$(id -u testuser).slice/user@$(id -u testuser).service/TEST.slice/TEST-55.slice/TEST-55-OOMD.slice/TEST-55-OOMD-workload.slice" \
--machine "testuser@.host" --user
loginctl disable-linger testuser
}
testcase_preference_avoid() {
# only run this portion of the test if we can set xattrs
if ! cgroupfs_supports_user_xattrs; then
echo "cgroup does not support user xattrs, skipping test for ManagedOOMPreference=avoid"
return 0
fi
mkdir -p /run/systemd/system/TEST-55-OOMD-testbloat.service.d/
cat >/run/systemd/system/TEST-55-OOMD-testbloat.service.d/99-managed-oom-preference.conf <<EOF
[Service]
ManagedOOMPreference=avoid
EOF
systemctl daemon-reload
systemctl start TEST-55-OOMD-testchill.service
systemctl start TEST-55-OOMD-testmunch.service
systemctl start TEST-55-OOMD-testbloat.service
for _ in {0..59}; do
if ! systemctl status TEST-55-OOMD-testmunch.service; then
break
fi
oomctl
sleep 2
done
# testmunch should be killed since testbloat had the avoid xattr on it
if ! systemctl status TEST-55-OOMD-testbloat.service; then exit 25; fi
if systemctl status TEST-55-OOMD-testmunch.service; then exit 43; fi
if ! systemctl status TEST-55-OOMD-testchill.service; then exit 24; fi
systemctl kill --signal=KILL TEST-55-OOMD-testbloat.service || :
systemctl kill --signal=KILL TEST-55-OOMD-testmunch.service || :
systemctl stop TEST-55-OOMD-testbloat.service
systemctl stop TEST-55-OOMD-testmunch.service
systemctl stop TEST-55-OOMD-testchill.service
systemctl stop TEST-55-OOMD-workload.slice
# clean up overrides since test cases can be run in any order
# and overrides shouldn't affect other tests
rm -rf /run/systemd/system/TEST-55-OOMD-testbloat.service.d
systemctl daemon-reload
}
testcase_duration_analyze() {
# Verify memory pressure duration is valid if >= 1 second
cat <<EOF >/tmp/TEST-55-OOMD-valid-duration.service
[Service]
ExecStart=echo hello
ManagedOOMMemoryPressureDurationSec=1s
EOF
# Verify memory pressure duration is invalid if < 1 second
cat <<EOF >/tmp/TEST-55-OOMD-invalid-duration.service
[Service]
ExecStart=echo hello
ManagedOOMMemoryPressureDurationSec=0
EOF
systemd-analyze --recursive-errors=no verify /tmp/TEST-55-OOMD-valid-duration.service
(! systemd-analyze --recursive-errors=no verify /tmp/TEST-55-OOMD-invalid-duration.service)
rm -f /tmp/TEST-55-OOMD-valid-duration.service
rm -f /tmp/TEST-55-OOMD-invalid-duration.service
}
testcase_duration_override() {
# Verify memory pressure duration can be overridden to non-zero values
mkdir -p /run/systemd/system/TEST-55-OOMD-testmunch.service.d/
cat >/run/systemd/system/TEST-55-OOMD-testmunch.service.d/99-duration-test.conf <<EOF
[Service]
ManagedOOMMemoryPressureDurationSec=3s
ManagedOOMMemoryPressure=kill
EOF
# Verify memory pressure duration will use default if set to empty
mkdir -p /run/systemd/system/TEST-55-OOMD-testchill.service.d/
cat >/run/systemd/system/TEST-55-OOMD-testchill.service.d/99-duration-test.conf <<EOF
[Service]
ManagedOOMMemoryPressureDurationSec=
ManagedOOMMemoryPressure=kill
EOF
systemctl daemon-reload
systemctl start TEST-55-OOMD-testmunch.service
systemctl start TEST-55-OOMD-testchill.service
timeout 1m bash -xec 'until oomctl | grep "/TEST-55-OOMD-testmunch.service"; do sleep 1; done'
oomctl | grep -A 2 "/TEST-55-OOMD-testmunch.service" | grep "Memory Pressure Duration: 3s"
timeout 1m bash -xec 'until oomctl | grep "/TEST-55-OOMD-testchill.service"; do sleep 1; done'
oomctl | grep -A 2 "/TEST-55-OOMD-testchill.service" | grep "Memory Pressure Duration: 2s"
[[ "$(systemctl show -P ManagedOOMMemoryPressureDurationUSec TEST-55-OOMD-testmunch.service)" == "3s" ]]
[[ "$(systemctl show -P ManagedOOMMemoryPressureDurationUSec TEST-55-OOMD-testchill.service)" == "[not set]" ]]
for _ in {0..59}; do
if ! systemctl status TEST-55-OOMD-testmunch.service; then
break
fi
oomctl
sleep 2
done
if systemctl status TEST-55-OOMD-testmunch.service; then exit 44; fi
if ! systemctl status TEST-55-OOMD-testchill.service; then exit 23; fi
systemctl kill --signal=KILL TEST-55-OOMD-testmunch.service || :
systemctl stop TEST-55-OOMD-testmunch.service
systemctl stop TEST-55-OOMD-testchill.service
systemctl stop TEST-55-OOMD-workload.slice
# clean up overrides since test cases can be run in any order
# and overrides shouldn't affect other tests
rm -rf /run/systemd/system/TEST-55-OOMD-testmunch.service.d
rm -rf /run/systemd/system/TEST-55-OOMD-testchill.service.d
systemctl daemon-reload
}
testcase_reload() {
# Check if the oomd.conf drop-in config is loaded.
assert_in 'Swap Used Limit: 90.00%' "$(oomctl)"
assert_in 'Default Memory Pressure Limit: 60.00%' "$(oomctl)"
assert_in 'Default Memory Pressure Duration: 2s' "$(oomctl)"
# Test oomd reload
mkdir -p /run/systemd/oomd.conf.d/
{
echo "[OOM]"
echo "SwapUsedLimit=80%"
echo "DefaultMemoryPressureLimit=55%"
echo "DefaultMemoryPressureDurationSec=5s"
} >/run/systemd/oomd.conf.d/99-oomd-test.conf
systemctl reload systemd-oomd.service
assert_in 'Swap Used Limit: 80.00%' "$(oomctl)"
assert_in 'Default Memory Pressure Limit: 55.00%' "$(oomctl)"
assert_in 'Default Memory Pressure Duration: 5s' "$(oomctl)"
# Set back to default via reload
mkdir -p /run/systemd/oomd.conf.d/
{
echo "[OOM]"
echo "DefaultMemoryPressureDurationSec=2s"
} >/run/systemd/oomd.conf.d/99-oomd-test.conf
systemctl reload systemd-oomd.service
assert_in 'Swap Used Limit: 90.00%' "$(oomctl)"
assert_in 'Default Memory Pressure Limit: 60.00%' "$(oomctl)"
assert_in 'Default Memory Pressure Duration: 2s' "$(oomctl)"
}
testcase_kernel_oom() {
cat >/tmp/script.sh <<"EOF"
#!/usr/bin/env bash
set -x
choom --adjust '+1000' -- bash -c 'echo f >/proc/sysrq-trigger && exec sleep infinity'
choom --adjust '+1000' -p $$
echo f >/proc/sysrq-trigger
exec sleep infinity
EOF
chmod +x /tmp/script.sh
(! systemd-run --wait --unit oom-kill -p OOMPolicy=continue /tmp/script.sh)
# With OOMPolicy=continue, we shouldn't get the oom-kill result.
assert_eq "$(systemctl show oom-kill -P Result)" "signal"
# Check that OOMKills reports 2 individual processes killed.
assert_eq "$(systemctl show oom-kill -P OOMKills)" "2"
systemctl reset-failed
(! systemd-run --wait --unit oom-kill -p OOMPolicy=kill /tmp/script.sh)
# Check that a regular kernel oom kill with OOMPolicy=kill results in the oom-kill result.
assert_eq "$(systemctl show oom-kill -P Result)" "oom-kill"
# Check that OOMKills reports 1 oom group kill instead of the number of processes that were killed.
assert_eq "$(systemctl show oom-kill -P OOMKills)" "1"
systemctl reset-failed
cat >/tmp/script.sh <<"EOF"
#!/usr/bin/env bash
set -x
echo '+memory' >/sys/fs/cgroup/system.slice/oom-kill.service/cgroup.subtree_control
mkdir /sys/fs/cgroup/system.slice/oom-kill.service/sub
echo 1 >/sys/fs/cgroup/system.slice/oom-kill.service/sub/memory.oom.group
# Start a child process in the subcgroup that will trigger OOM and be killed but keep the main process
# outside the subcgroup to avoid a race condition where the kernel SIGKILLs the main process before systemd
# can process the OOM notification. With the main process still alive, systemd should have time to receive
# the OOM event and enter the 'oom-kill' state before the service exits.
(
echo $BASHPID >/sys/fs/cgroup/system.slice/oom-kill.service/sub/cgroup.procs
choom --adjust '+1000' -p $BASHPID
echo f >/proc/sysrq-trigger
exec sleep infinity
) &
wait $! || :
exec sleep infinity
EOF
chmod +x /tmp/script.sh
(! systemd-run --wait --unit oom-kill -p OOMPolicy=kill -p Delegate=yes -p DelegateSubgroup=init.scope /tmp/script.sh)
# Test that an oom-kill in a delegated unit in a subcgroup with memory.oom.group=1 also results in the
# oom-kill exit status.
assert_eq "$(systemctl show oom-kill -P Result)" "oom-kill"
assert_eq "$(systemctl show oom-kill -P OOMKills)" "1"
systemctl reset-failed
}
run_testcases
touch /testok