* [PATCH] selftest/mm: Make hugetlb_reparenting_test tolerant to async reparenting
@ 2025-04-07 8:42 Li Wang
2025-04-08 3:46 ` Donet Tom
0 siblings, 1 reply; 2+ messages in thread
From: Li Wang @ 2025-04-07 8:42 UTC (permalink / raw)
To: linux-mm, linux-kselftest, linux-kernel, akpm, liwang
Cc: Waiman Long, Anshuman Khandual, Dev Jain, Kirill A. Shuemov, Shuah Khan
In cgroup v2, memory and hugetlb usage reparenting is asynchronous.
This can cause test flakiness when immediately asserting usage after
deleting a child cgroup. To address this, add a helper function
`assert_with_retry()` that checks usage values with a timeout-based retry.
This improves test stability without relying on fixed sleep delays.
Also bump up the tolerance size to 7MB.
To avoid False Positives:
...
# Assert memory charged correctly for child only use.
# actual a = 11 MB
# expected a = 0 MB
# fail
# cleanup
# [FAIL]
not ok 11 hugetlb_reparenting_test.sh -cgroup-v2 # exit=1
# 0
# SUMMARY: PASS=10 SKIP=0 FAIL=1
Signed-off-by: Li Wang <liwang@redhat.com>
Cc: Waiman Long <longman@redhat.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Kirill A. Shuemov <kirill.shutemov@linux.intel.com>
Cc: Shuah Khan <shuah@kernel.org>
---
.../selftests/mm/hugetlb_reparenting_test.sh | 96 ++++++++-----------
1 file changed, 41 insertions(+), 55 deletions(-)
diff --git a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
index 11f9bbe7dc22..1c172c6999f4 100755
--- a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
+++ b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
@@ -36,7 +36,7 @@ else
do_umount=1
fi
fi
-MNT='/mnt/huge/'
+MNT='/mnt/huge'
function get_machine_hugepage_size() {
hpz=$(grep -i hugepagesize /proc/meminfo)
@@ -60,6 +60,41 @@ function cleanup() {
set -e
}
+function assert_with_retry() {
+ local actual_path="$1"
+ local expected="$2"
+ local tolerance=$((7 * 1024 * 1024))
+ local timeout=20
+ local interval=1
+ local start_time
+ local now
+ local elapsed
+ local actual
+
+ start_time=$(date +%s)
+
+ while true; do
+ actual="$(cat "$actual_path")"
+
+ if [[ $actual -ge $(($expected - $tolerance)) ]] &&
+ [[ $actual -le $(($expected + $tolerance)) ]]; then
+ return 0
+ fi
+
+ now=$(date +%s)
+ elapsed=$((now - start_time))
+
+ if [[ $elapsed -ge $timeout ]]; then
+ echo "actual = $((${actual%% *} / 1024 / 1024)) MB"
+ echo "expected = $((${expected%% *} / 1024 / 1024)) MB"
+ cleanup
+ exit 1
+ fi
+
+ sleep $interval
+ done
+}
+
function assert_state() {
local expected_a="$1"
local expected_a_hugetlb="$2"
@@ -70,58 +105,13 @@ function assert_state() {
expected_b="$3"
expected_b_hugetlb="$4"
fi
- local tolerance=$((5 * 1024 * 1024))
-
- local actual_a
- actual_a="$(cat "$CGROUP_ROOT"/a/memory.$usage_file)"
- if [[ $actual_a -lt $(($expected_a - $tolerance)) ]] ||
- [[ $actual_a -gt $(($expected_a + $tolerance)) ]]; then
- echo actual a = $((${actual_a%% *} / 1024 / 1024)) MB
- echo expected a = $((${expected_a%% *} / 1024 / 1024)) MB
- echo fail
-
- cleanup
- exit 1
- fi
-
- local actual_a_hugetlb
- actual_a_hugetlb="$(cat "$CGROUP_ROOT"/a/hugetlb.${MB}MB.$usage_file)"
- if [[ $actual_a_hugetlb -lt $(($expected_a_hugetlb - $tolerance)) ]] ||
- [[ $actual_a_hugetlb -gt $(($expected_a_hugetlb + $tolerance)) ]]; then
- echo actual a hugetlb = $((${actual_a_hugetlb%% *} / 1024 / 1024)) MB
- echo expected a hugetlb = $((${expected_a_hugetlb%% *} / 1024 / 1024)) MB
- echo fail
-
- cleanup
- exit 1
- fi
-
- if [[ -z "$expected_b" || -z "$expected_b_hugetlb" ]]; then
- return
- fi
-
- local actual_b
- actual_b="$(cat "$CGROUP_ROOT"/a/b/memory.$usage_file)"
- if [[ $actual_b -lt $(($expected_b - $tolerance)) ]] ||
- [[ $actual_b -gt $(($expected_b + $tolerance)) ]]; then
- echo actual b = $((${actual_b%% *} / 1024 / 1024)) MB
- echo expected b = $((${expected_b%% *} / 1024 / 1024)) MB
- echo fail
-
- cleanup
- exit 1
- fi
- local actual_b_hugetlb
- actual_b_hugetlb="$(cat "$CGROUP_ROOT"/a/b/hugetlb.${MB}MB.$usage_file)"
- if [[ $actual_b_hugetlb -lt $(($expected_b_hugetlb - $tolerance)) ]] ||
- [[ $actual_b_hugetlb -gt $(($expected_b_hugetlb + $tolerance)) ]]; then
- echo actual b hugetlb = $((${actual_b_hugetlb%% *} / 1024 / 1024)) MB
- echo expected b hugetlb = $((${expected_b_hugetlb%% *} / 1024 / 1024)) MB
- echo fail
+ assert_with_retry "$CGROUP_ROOT/a/memory.$usage_file" "$expected_a"
+ assert_with_retry "$CGROUP_ROOT/a/hugetlb.${MB}MB.$usage_file" "$expected_a_hugetlb"
- cleanup
- exit 1
+ if [[ -n "$expected_b" && -n "$expected_b_hugetlb" ]]; then
+ assert_with_retry "$CGROUP_ROOT/a/b/memory.$usage_file" "$expected_b"
+ assert_with_retry "$CGROUP_ROOT/a/b/hugetlb.${MB}MB.$usage_file" "$expected_b_hugetlb"
fi
}
@@ -174,7 +164,6 @@ size=$((${MB} * 1024 * 1024 * 25)) # 50MB = 25 * 2MB hugepages.
cleanup
-echo
echo
echo Test charge, rmdir, uncharge
setup
@@ -195,7 +184,6 @@ cleanup
echo done
echo
-echo
if [[ ! $cgroup2 ]]; then
echo "Test parent and child hugetlb usage"
setup
@@ -212,7 +200,6 @@ if [[ ! $cgroup2 ]]; then
assert_state 0 $(($size * 2)) 0 $size
rmdir "$CGROUP_ROOT"/a/b
- sleep 5
echo Assert memory reparent correctly.
assert_state 0 $(($size * 2))
@@ -224,7 +211,6 @@ if [[ ! $cgroup2 ]]; then
cleanup
fi
-echo
echo
echo "Test child only hugetlb usage"
echo setup
--
2.48.1
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH] selftest/mm: Make hugetlb_reparenting_test tolerant to async reparenting
2025-04-07 8:42 [PATCH] selftest/mm: Make hugetlb_reparenting_test tolerant to async reparenting Li Wang
@ 2025-04-08 3:46 ` Donet Tom
0 siblings, 0 replies; 2+ messages in thread
From: Donet Tom @ 2025-04-08 3:46 UTC (permalink / raw)
To: Li Wang, linux-mm, linux-kselftest, linux-kernel, akpm
Cc: Waiman Long, Anshuman Khandual, Dev Jain, Kirill A. Shuemov, Shuah Khan
On 4/7/25 2:12 PM, Li Wang wrote:
> In cgroup v2, memory and hugetlb usage reparenting is asynchronous.
> This can cause test flakiness when immediately asserting usage after
> deleting a child cgroup. To address this, add a helper function
> `assert_with_retry()` that checks usage values with a timeout-based retry.
> This improves test stability without relying on fixed sleep delays.
>
> Also bump up the tolerance size to 7MB.
>
> To avoid False Positives:
> ...
> # Assert memory charged correctly for child only use.
> # actual a = 11 MB
> # expected a = 0 MB
> # fail
> # cleanup
> # [FAIL]
> not ok 11 hugetlb_reparenting_test.sh -cgroup-v2 # exit=1
> # 0
> # SUMMARY: PASS=10 SKIP=0 FAIL=1
I was also seeing this failure. I have tested this patch on my powerPC
setup and it is passing now.
./hugetlb_reparenting_test.sh -cgroup-v2
cleanup
Test charge, rmdir, uncharge
mkdir
write
Writing to this path: /mnt/huge/test
Writing this size: 52428800
Populating.
Not writing to memory.
Using method=0
Shared mapping.
RESERVE mapping.
Allocating using HUGETLBFS.
rmdir
uncharge
cleanup
done
Test child only hugetlb usage
setup
write
Writing to this path: /mnt/huge/test2
Writing this size: 52428800
Populating.
Not writing to memory.
Using method=0
Shared mapping.
RESERVE mapping.
Allocating using HUGETLBFS.
Assert memory charged correctly for child only use.
actual = 10 MB
expected = 0 MB
cleanup
Feel free to add
Tested-by Donet Tom <donettom@linux.ibm.com>
>
> Signed-off-by: Li Wang <liwang@redhat.com>
> Cc: Waiman Long <longman@redhat.com>
> Cc: Anshuman Khandual <anshuman.khandual@arm.com>
> Cc: Dev Jain <dev.jain@arm.com>
> Cc: Kirill A. Shuemov <kirill.shutemov@linux.intel.com>
> Cc: Shuah Khan <shuah@kernel.org>
> ---
> .../selftests/mm/hugetlb_reparenting_test.sh | 96 ++++++++-----------
> 1 file changed, 41 insertions(+), 55 deletions(-)
>
> diff --git a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
> index 11f9bbe7dc22..1c172c6999f4 100755
> --- a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
> +++ b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
> @@ -36,7 +36,7 @@ else
> do_umount=1
> fi
> fi
> -MNT='/mnt/huge/'
> +MNT='/mnt/huge'
>
> function get_machine_hugepage_size() {
> hpz=$(grep -i hugepagesize /proc/meminfo)
> @@ -60,6 +60,41 @@ function cleanup() {
> set -e
> }
>
> +function assert_with_retry() {
> + local actual_path="$1"
> + local expected="$2"
> + local tolerance=$((7 * 1024 * 1024))
> + local timeout=20
> + local interval=1
> + local start_time
> + local now
> + local elapsed
> + local actual
> +
> + start_time=$(date +%s)
> +
> + while true; do
> + actual="$(cat "$actual_path")"
> +
> + if [[ $actual -ge $(($expected - $tolerance)) ]] &&
> + [[ $actual -le $(($expected + $tolerance)) ]]; then
> + return 0
> + fi
> +
> + now=$(date +%s)
> + elapsed=$((now - start_time))
> +
> + if [[ $elapsed -ge $timeout ]]; then
> + echo "actual = $((${actual%% *} / 1024 / 1024)) MB"
> + echo "expected = $((${expected%% *} / 1024 / 1024)) MB"
> + cleanup
> + exit 1
> + fi
> +
> + sleep $interval
> + done
> +}
> +
> function assert_state() {
> local expected_a="$1"
> local expected_a_hugetlb="$2"
> @@ -70,58 +105,13 @@ function assert_state() {
> expected_b="$3"
> expected_b_hugetlb="$4"
> fi
> - local tolerance=$((5 * 1024 * 1024))
> -
> - local actual_a
> - actual_a="$(cat "$CGROUP_ROOT"/a/memory.$usage_file)"
> - if [[ $actual_a -lt $(($expected_a - $tolerance)) ]] ||
> - [[ $actual_a -gt $(($expected_a + $tolerance)) ]]; then
> - echo actual a = $((${actual_a%% *} / 1024 / 1024)) MB
> - echo expected a = $((${expected_a%% *} / 1024 / 1024)) MB
> - echo fail
> -
> - cleanup
> - exit 1
> - fi
> -
> - local actual_a_hugetlb
> - actual_a_hugetlb="$(cat "$CGROUP_ROOT"/a/hugetlb.${MB}MB.$usage_file)"
> - if [[ $actual_a_hugetlb -lt $(($expected_a_hugetlb - $tolerance)) ]] ||
> - [[ $actual_a_hugetlb -gt $(($expected_a_hugetlb + $tolerance)) ]]; then
> - echo actual a hugetlb = $((${actual_a_hugetlb%% *} / 1024 / 1024)) MB
> - echo expected a hugetlb = $((${expected_a_hugetlb%% *} / 1024 / 1024)) MB
> - echo fail
> -
> - cleanup
> - exit 1
> - fi
> -
> - if [[ -z "$expected_b" || -z "$expected_b_hugetlb" ]]; then
> - return
> - fi
> -
> - local actual_b
> - actual_b="$(cat "$CGROUP_ROOT"/a/b/memory.$usage_file)"
> - if [[ $actual_b -lt $(($expected_b - $tolerance)) ]] ||
> - [[ $actual_b -gt $(($expected_b + $tolerance)) ]]; then
> - echo actual b = $((${actual_b%% *} / 1024 / 1024)) MB
> - echo expected b = $((${expected_b%% *} / 1024 / 1024)) MB
> - echo fail
> -
> - cleanup
> - exit 1
> - fi
>
> - local actual_b_hugetlb
> - actual_b_hugetlb="$(cat "$CGROUP_ROOT"/a/b/hugetlb.${MB}MB.$usage_file)"
> - if [[ $actual_b_hugetlb -lt $(($expected_b_hugetlb - $tolerance)) ]] ||
> - [[ $actual_b_hugetlb -gt $(($expected_b_hugetlb + $tolerance)) ]]; then
> - echo actual b hugetlb = $((${actual_b_hugetlb%% *} / 1024 / 1024)) MB
> - echo expected b hugetlb = $((${expected_b_hugetlb%% *} / 1024 / 1024)) MB
> - echo fail
> + assert_with_retry "$CGROUP_ROOT/a/memory.$usage_file" "$expected_a"
> + assert_with_retry "$CGROUP_ROOT/a/hugetlb.${MB}MB.$usage_file" "$expected_a_hugetlb"
>
> - cleanup
> - exit 1
> + if [[ -n "$expected_b" && -n "$expected_b_hugetlb" ]]; then
> + assert_with_retry "$CGROUP_ROOT/a/b/memory.$usage_file" "$expected_b"
> + assert_with_retry "$CGROUP_ROOT/a/b/hugetlb.${MB}MB.$usage_file" "$expected_b_hugetlb"
> fi
> }
>
> @@ -174,7 +164,6 @@ size=$((${MB} * 1024 * 1024 * 25)) # 50MB = 25 * 2MB hugepages.
>
> cleanup
>
> -echo
> echo
> echo Test charge, rmdir, uncharge
> setup
> @@ -195,7 +184,6 @@ cleanup
>
> echo done
> echo
> -echo
> if [[ ! $cgroup2 ]]; then
> echo "Test parent and child hugetlb usage"
> setup
> @@ -212,7 +200,6 @@ if [[ ! $cgroup2 ]]; then
> assert_state 0 $(($size * 2)) 0 $size
>
> rmdir "$CGROUP_ROOT"/a/b
> - sleep 5
> echo Assert memory reparent correctly.
> assert_state 0 $(($size * 2))
>
> @@ -224,7 +211,6 @@ if [[ ! $cgroup2 ]]; then
> cleanup
> fi
>
> -echo
> echo
> echo "Test child only hugetlb usage"
> echo setup
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2025-04-08 3:46 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-04-07 8:42 [PATCH] selftest/mm: Make hugetlb_reparenting_test tolerant to async reparenting Li Wang
2025-04-08 3:46 ` Donet Tom
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox