Merge pull request #30633 from mrc0mmand/cocci-shenanigans

coccinelle: rework how we run the Coccinelle transformations
This commit is contained in:
Yu Watanabe
2023-12-26 05:45:58 +09:00
committed by GitHub
21 changed files with 88 additions and 71 deletions

View File

@@ -3,7 +3,6 @@
expression e, v, flags;
expression list args;
@@
+ return
- json_log(v, flags, 0, args);
+ json_log(v, flags, SYNTHETIC_ERRNO(e), args);
- return -e;
+ return json_log(v, flags, SYNTHETIC_ERRNO(e), args);

View File

@@ -2,6 +2,14 @@
# SPDX-License-Identifier: LGPL-2.1-or-later
set -e
# FIXME:
# - Coccinelle doesn't like our TEST() macros, which then causes name conflicts; i.e. Cocci can't process
# that TEST(xsetxattr) yields test_xsetxattr() and uses just xsetxattr() in this case, which then conflicts
# with the tested xsetxattr() function, leading up to the whole test case getting skipped due to
# conflicting typedefs
# - something keeps pulling in src/boot/efi/*.h stuff, even though it's excluded
# - Coccinelle has issues with some of our more complex macros
# Exclude following paths from the Coccinelle transformations
EXCLUDED_PATHS=(
"src/boot/efi/*"
@@ -10,13 +18,17 @@ EXCLUDED_PATHS=(
# Symlinked to test-bus-vtable-cc.cc, which causes issues with the IN_SET macro
"src/libsystemd/sd-bus/test-bus-vtable.c"
"src/libsystemd/sd-journal/lookup3.c"
# Ignore man examples, as they redefine some macros we use internally, which makes Coccinelle complain
# and ignore code that tries to use the redefined stuff
"man/*"
)
TOP_DIR="$(git rev-parse --show-toplevel)"
CACHE_DIR="$(dirname "$0")/.coccinelle-cache"
ARGS=()
# Create an array from files tracked by git...
mapfile -t FILES < <(git ls-files ':/*.[ch]')
mapfile -t FILES < <(git ls-files ':/*.c')
# ...and filter everything that matches patterns from EXCLUDED_PATHS
for excl in "${EXCLUDED_PATHS[@]}"; do
# shellcheck disable=SC2206
@@ -37,12 +49,43 @@ fi
[[ ${#@} -ne 0 ]] && SCRIPTS=("$@") || SCRIPTS=("$TOP_DIR"/coccinelle/*.cocci)
mkdir -p "$CACHE_DIR"
echo "--x-- Using Coccinelle cache directory: $CACHE_DIR"
echo "--x--"
echo "--x-- Note: running spatch for the first time without populated cache takes"
echo "--x-- a _long_ time (15-30 minutes). Also, the cache is quite large"
echo "--x-- (~15 GiB), so make sure you have enough free space."
echo
for script in "${SCRIPTS[@]}"; do
echo "--x-- Processing $script --x--"
TMPFILE="$(mktemp)"
echo "+ spatch --sp-file $script ${ARGS[*]} ..."
parallel --halt now,fail=1 --keep-order --noswap --max-args=20 \
spatch --macro-file="$TOP_DIR/coccinelle/macros.h" --smpl-spacing --sp-file "$script" "${ARGS[@]}" ::: "${FILES[@]}" \
2>"$TMPFILE" || cat "$TMPFILE"
# A couple of notes:
#
# 1) Limit this to 10 files at once, as processing the ASTs is _very_ memory hungry - e.g. with 20 files
# at once one spatch process can take around 2.5 GiB of RAM, which can easily eat up all available RAM
# when paired together with parallel
#
# 2) Make sure spatch can find our includes via -I <dir>, similarly as we do when compiling stuff
#
# 3) Make sure to include includes from includes (--recursive-includes), but use them only to get type
# definitions (--include-headers-for-types) - otherwise we'd start formating them as well, which might be
# unwanted, especially for includes we fetch verbatim from third-parties
#
# 4) Use cache, since generating the full AST is _very_ expensive, i.e. the uncached run takes 15 - 30
# minutes (for one rule(!)), vs 30 - 90 seconds when the cache is populated. One major downside of the
# cache is that it's quite big - ATTOW the cache takes around 15 GiB, but the performance boost is
# definitely worth it
parallel --halt now,fail=1 --keep-order --noswap --max-args=10 \
spatch --cache-prefix "$CACHE_DIR" \
-I src \
--recursive-includes \
--include-headers-for-types \
--smpl-spacing \
--sp-file "$script" \
"${ARGS[@]}" ::: "${FILES[@]}" \
2>"$TMPFILE" || cat "$TMPFILE"
rm -f "$TMPFILE"
echo -e "--x-- Processed $script --x--\n"
done

View File

@@ -1,28 +1,13 @@
/* SPDX-License-Identifier: LGPL-2.1-or-later */
@@
position p : script:python() { p[0].file != "src/journal/lookup3.c" };
identifier id;
expression e;
expression e,e1;
@@
if (...)
- {
- if (e) {
+ if (e)
(
id@p(...);
e1@p;
|
e@p;
)
- }
@@
position p : script:python() { p[0].file != "src/journal/lookup3.c" };
identifier id;
expression e;
@@
if (...)
- {
(
return id@p(...);
|
return e@p;
return e1@p;
)
- }