linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] selftest/mm: Make hugetlb_reparenting_test tolerant to async reparenting
@ 2025-04-07  8:42 Li Wang
  2025-04-08  3:46 ` Donet Tom
  0 siblings, 1 reply; 2+ messages in thread
From: Li Wang @ 2025-04-07  8:42 UTC (permalink / raw)
  To: linux-mm, linux-kselftest, linux-kernel, akpm, liwang
  Cc: Waiman Long, Anshuman Khandual, Dev Jain, Kirill A. Shuemov, Shuah Khan

In cgroup v2, memory and hugetlb usage reparenting is asynchronous.
This can cause test flakiness when immediately asserting usage after
deleting a child cgroup. To address this, add a helper function
`assert_with_retry()` that checks usage values with a timeout-based retry.
This improves test stability without relying on fixed sleep delays.

Also bump up the tolerance size to 7MB.

To avoid False Positives:
  ...
  # Assert memory charged correctly for child only use.
  # actual a = 11 MB
  # expected a = 0 MB
  # fail
  # cleanup
  # [FAIL]
  not ok 11 hugetlb_reparenting_test.sh -cgroup-v2 # exit=1
  # 0
  # SUMMARY: PASS=10 SKIP=0 FAIL=1

Signed-off-by: Li Wang <liwang@redhat.com>
Cc: Waiman Long <longman@redhat.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Kirill A. Shuemov <kirill.shutemov@linux.intel.com>
Cc: Shuah Khan <shuah@kernel.org>
---
 .../selftests/mm/hugetlb_reparenting_test.sh  | 96 ++++++++-----------
 1 file changed, 41 insertions(+), 55 deletions(-)

diff --git a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
index 11f9bbe7dc22..1c172c6999f4 100755
--- a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
+++ b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
@@ -36,7 +36,7 @@ else
     do_umount=1
   fi
 fi
-MNT='/mnt/huge/'
+MNT='/mnt/huge'
 
 function get_machine_hugepage_size() {
   hpz=$(grep -i hugepagesize /proc/meminfo)
@@ -60,6 +60,41 @@ function cleanup() {
   set -e
 }
 
+function assert_with_retry() {
+  local actual_path="$1"
+  local expected="$2"
+  local tolerance=$((7 * 1024 * 1024))
+  local timeout=20
+  local interval=1
+  local start_time
+  local now
+  local elapsed
+  local actual
+
+  start_time=$(date +%s)
+
+  while true; do
+    actual="$(cat "$actual_path")"
+
+    if [[ $actual -ge $(($expected - $tolerance)) ]] &&
+        [[ $actual -le $(($expected + $tolerance)) ]]; then
+      return 0
+    fi
+
+    now=$(date +%s)
+    elapsed=$((now - start_time))
+
+    if [[ $elapsed -ge $timeout ]]; then
+      echo "actual = $((${actual%% *} / 1024 / 1024)) MB"
+      echo "expected = $((${expected%% *} / 1024 / 1024)) MB"
+      cleanup
+      exit 1
+    fi
+
+    sleep $interval
+  done
+}
+
 function assert_state() {
   local expected_a="$1"
   local expected_a_hugetlb="$2"
@@ -70,58 +105,13 @@ function assert_state() {
     expected_b="$3"
     expected_b_hugetlb="$4"
   fi
-  local tolerance=$((5 * 1024 * 1024))
-
-  local actual_a
-  actual_a="$(cat "$CGROUP_ROOT"/a/memory.$usage_file)"
-  if [[ $actual_a -lt $(($expected_a - $tolerance)) ]] ||
-    [[ $actual_a -gt $(($expected_a + $tolerance)) ]]; then
-    echo actual a = $((${actual_a%% *} / 1024 / 1024)) MB
-    echo expected a = $((${expected_a%% *} / 1024 / 1024)) MB
-    echo fail
-
-    cleanup
-    exit 1
-  fi
-
-  local actual_a_hugetlb
-  actual_a_hugetlb="$(cat "$CGROUP_ROOT"/a/hugetlb.${MB}MB.$usage_file)"
-  if [[ $actual_a_hugetlb -lt $(($expected_a_hugetlb - $tolerance)) ]] ||
-    [[ $actual_a_hugetlb -gt $(($expected_a_hugetlb + $tolerance)) ]]; then
-    echo actual a hugetlb = $((${actual_a_hugetlb%% *} / 1024 / 1024)) MB
-    echo expected a hugetlb = $((${expected_a_hugetlb%% *} / 1024 / 1024)) MB
-    echo fail
-
-    cleanup
-    exit 1
-  fi
-
-  if [[ -z "$expected_b" || -z "$expected_b_hugetlb" ]]; then
-    return
-  fi
-
-  local actual_b
-  actual_b="$(cat "$CGROUP_ROOT"/a/b/memory.$usage_file)"
-  if [[ $actual_b -lt $(($expected_b - $tolerance)) ]] ||
-    [[ $actual_b -gt $(($expected_b + $tolerance)) ]]; then
-    echo actual b = $((${actual_b%% *} / 1024 / 1024)) MB
-    echo expected b = $((${expected_b%% *} / 1024 / 1024)) MB
-    echo fail
-
-    cleanup
-    exit 1
-  fi
 
-  local actual_b_hugetlb
-  actual_b_hugetlb="$(cat "$CGROUP_ROOT"/a/b/hugetlb.${MB}MB.$usage_file)"
-  if [[ $actual_b_hugetlb -lt $(($expected_b_hugetlb - $tolerance)) ]] ||
-    [[ $actual_b_hugetlb -gt $(($expected_b_hugetlb + $tolerance)) ]]; then
-    echo actual b hugetlb = $((${actual_b_hugetlb%% *} / 1024 / 1024)) MB
-    echo expected b hugetlb = $((${expected_b_hugetlb%% *} / 1024 / 1024)) MB
-    echo fail
+  assert_with_retry "$CGROUP_ROOT/a/memory.$usage_file" "$expected_a"
+  assert_with_retry "$CGROUP_ROOT/a/hugetlb.${MB}MB.$usage_file" "$expected_a_hugetlb"
 
-    cleanup
-    exit 1
+  if [[ -n "$expected_b" && -n "$expected_b_hugetlb" ]]; then
+    assert_with_retry "$CGROUP_ROOT/a/b/memory.$usage_file" "$expected_b"
+    assert_with_retry "$CGROUP_ROOT/a/b/hugetlb.${MB}MB.$usage_file" "$expected_b_hugetlb"
   fi
 }
 
@@ -174,7 +164,6 @@ size=$((${MB} * 1024 * 1024 * 25)) # 50MB = 25 * 2MB hugepages.
 
 cleanup
 
-echo
 echo
 echo Test charge, rmdir, uncharge
 setup
@@ -195,7 +184,6 @@ cleanup
 
 echo done
 echo
-echo
 if [[ ! $cgroup2 ]]; then
   echo "Test parent and child hugetlb usage"
   setup
@@ -212,7 +200,6 @@ if [[ ! $cgroup2 ]]; then
   assert_state 0 $(($size * 2)) 0 $size
 
   rmdir "$CGROUP_ROOT"/a/b
-  sleep 5
   echo Assert memory reparent correctly.
   assert_state 0 $(($size * 2))
 
@@ -224,7 +211,6 @@ if [[ ! $cgroup2 ]]; then
   cleanup
 fi
 
-echo
 echo
 echo "Test child only hugetlb usage"
 echo setup
-- 
2.48.1



^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] selftest/mm: Make hugetlb_reparenting_test tolerant to async reparenting
  2025-04-07  8:42 [PATCH] selftest/mm: Make hugetlb_reparenting_test tolerant to async reparenting Li Wang
@ 2025-04-08  3:46 ` Donet Tom
  0 siblings, 0 replies; 2+ messages in thread
From: Donet Tom @ 2025-04-08  3:46 UTC (permalink / raw)
  To: Li Wang, linux-mm, linux-kselftest, linux-kernel, akpm
  Cc: Waiman Long, Anshuman Khandual, Dev Jain, Kirill A. Shuemov, Shuah Khan


On 4/7/25 2:12 PM, Li Wang wrote:
> In cgroup v2, memory and hugetlb usage reparenting is asynchronous.
> This can cause test flakiness when immediately asserting usage after
> deleting a child cgroup. To address this, add a helper function
> `assert_with_retry()` that checks usage values with a timeout-based retry.
> This improves test stability without relying on fixed sleep delays.
>
> Also bump up the tolerance size to 7MB.
>
> To avoid False Positives:
>    ...
>    # Assert memory charged correctly for child only use.
>    # actual a = 11 MB
>    # expected a = 0 MB
>    # fail
>    # cleanup
>    # [FAIL]
>    not ok 11 hugetlb_reparenting_test.sh -cgroup-v2 # exit=1
>    # 0
>    # SUMMARY: PASS=10 SKIP=0 FAIL=1


I was also seeing this failure. I have tested this patch on my powerPC
setup and it is passing now.

./hugetlb_reparenting_test.sh -cgroup-v2
cleanup

Test charge, rmdir, uncharge
mkdir
write
Writing to this path: /mnt/huge/test
Writing this size: 52428800
Populating.
Not writing to memory.
Using method=0
Shared mapping.
RESERVE mapping.
Allocating using HUGETLBFS.

rmdir
uncharge
cleanup
done


Test child only hugetlb usage
setup
write
Writing to this path: /mnt/huge/test2
Writing this size: 52428800
Populating.
Not writing to memory.
Using method=0
Shared mapping.
RESERVE mapping.
Allocating using HUGETLBFS.

Assert memory charged correctly for child only use.
actual = 10 MB
expected = 0 MB
cleanup


Feel free to add
Tested-by Donet Tom <donettom@linux.ibm.com>


>
> Signed-off-by: Li Wang <liwang@redhat.com>
> Cc: Waiman Long <longman@redhat.com>
> Cc: Anshuman Khandual <anshuman.khandual@arm.com>
> Cc: Dev Jain <dev.jain@arm.com>
> Cc: Kirill A. Shuemov <kirill.shutemov@linux.intel.com>
> Cc: Shuah Khan <shuah@kernel.org>
> ---
>   .../selftests/mm/hugetlb_reparenting_test.sh  | 96 ++++++++-----------
>   1 file changed, 41 insertions(+), 55 deletions(-)
>
> diff --git a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
> index 11f9bbe7dc22..1c172c6999f4 100755
> --- a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
> +++ b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
> @@ -36,7 +36,7 @@ else
>       do_umount=1
>     fi
>   fi
> -MNT='/mnt/huge/'
> +MNT='/mnt/huge'
>   
>   function get_machine_hugepage_size() {
>     hpz=$(grep -i hugepagesize /proc/meminfo)
> @@ -60,6 +60,41 @@ function cleanup() {
>     set -e
>   }
>   
> +function assert_with_retry() {
> +  local actual_path="$1"
> +  local expected="$2"
> +  local tolerance=$((7 * 1024 * 1024))
> +  local timeout=20
> +  local interval=1
> +  local start_time
> +  local now
> +  local elapsed
> +  local actual
> +
> +  start_time=$(date +%s)
> +
> +  while true; do
> +    actual="$(cat "$actual_path")"
> +
> +    if [[ $actual -ge $(($expected - $tolerance)) ]] &&
> +        [[ $actual -le $(($expected + $tolerance)) ]]; then
> +      return 0
> +    fi
> +
> +    now=$(date +%s)
> +    elapsed=$((now - start_time))
> +
> +    if [[ $elapsed -ge $timeout ]]; then
> +      echo "actual = $((${actual%% *} / 1024 / 1024)) MB"
> +      echo "expected = $((${expected%% *} / 1024 / 1024)) MB"
> +      cleanup
> +      exit 1
> +    fi
> +
> +    sleep $interval
> +  done
> +}
> +
>   function assert_state() {
>     local expected_a="$1"
>     local expected_a_hugetlb="$2"
> @@ -70,58 +105,13 @@ function assert_state() {
>       expected_b="$3"
>       expected_b_hugetlb="$4"
>     fi
> -  local tolerance=$((5 * 1024 * 1024))
> -
> -  local actual_a
> -  actual_a="$(cat "$CGROUP_ROOT"/a/memory.$usage_file)"
> -  if [[ $actual_a -lt $(($expected_a - $tolerance)) ]] ||
> -    [[ $actual_a -gt $(($expected_a + $tolerance)) ]]; then
> -    echo actual a = $((${actual_a%% *} / 1024 / 1024)) MB
> -    echo expected a = $((${expected_a%% *} / 1024 / 1024)) MB
> -    echo fail
> -
> -    cleanup
> -    exit 1
> -  fi
> -
> -  local actual_a_hugetlb
> -  actual_a_hugetlb="$(cat "$CGROUP_ROOT"/a/hugetlb.${MB}MB.$usage_file)"
> -  if [[ $actual_a_hugetlb -lt $(($expected_a_hugetlb - $tolerance)) ]] ||
> -    [[ $actual_a_hugetlb -gt $(($expected_a_hugetlb + $tolerance)) ]]; then
> -    echo actual a hugetlb = $((${actual_a_hugetlb%% *} / 1024 / 1024)) MB
> -    echo expected a hugetlb = $((${expected_a_hugetlb%% *} / 1024 / 1024)) MB
> -    echo fail
> -
> -    cleanup
> -    exit 1
> -  fi
> -
> -  if [[ -z "$expected_b" || -z "$expected_b_hugetlb" ]]; then
> -    return
> -  fi
> -
> -  local actual_b
> -  actual_b="$(cat "$CGROUP_ROOT"/a/b/memory.$usage_file)"
> -  if [[ $actual_b -lt $(($expected_b - $tolerance)) ]] ||
> -    [[ $actual_b -gt $(($expected_b + $tolerance)) ]]; then
> -    echo actual b = $((${actual_b%% *} / 1024 / 1024)) MB
> -    echo expected b = $((${expected_b%% *} / 1024 / 1024)) MB
> -    echo fail
> -
> -    cleanup
> -    exit 1
> -  fi
>   
> -  local actual_b_hugetlb
> -  actual_b_hugetlb="$(cat "$CGROUP_ROOT"/a/b/hugetlb.${MB}MB.$usage_file)"
> -  if [[ $actual_b_hugetlb -lt $(($expected_b_hugetlb - $tolerance)) ]] ||
> -    [[ $actual_b_hugetlb -gt $(($expected_b_hugetlb + $tolerance)) ]]; then
> -    echo actual b hugetlb = $((${actual_b_hugetlb%% *} / 1024 / 1024)) MB
> -    echo expected b hugetlb = $((${expected_b_hugetlb%% *} / 1024 / 1024)) MB
> -    echo fail
> +  assert_with_retry "$CGROUP_ROOT/a/memory.$usage_file" "$expected_a"
> +  assert_with_retry "$CGROUP_ROOT/a/hugetlb.${MB}MB.$usage_file" "$expected_a_hugetlb"
>   
> -    cleanup
> -    exit 1
> +  if [[ -n "$expected_b" && -n "$expected_b_hugetlb" ]]; then
> +    assert_with_retry "$CGROUP_ROOT/a/b/memory.$usage_file" "$expected_b"
> +    assert_with_retry "$CGROUP_ROOT/a/b/hugetlb.${MB}MB.$usage_file" "$expected_b_hugetlb"
>     fi
>   }
>   
> @@ -174,7 +164,6 @@ size=$((${MB} * 1024 * 1024 * 25)) # 50MB = 25 * 2MB hugepages.
>   
>   cleanup
>   
> -echo
>   echo
>   echo Test charge, rmdir, uncharge
>   setup
> @@ -195,7 +184,6 @@ cleanup
>   
>   echo done
>   echo
> -echo
>   if [[ ! $cgroup2 ]]; then
>     echo "Test parent and child hugetlb usage"
>     setup
> @@ -212,7 +200,6 @@ if [[ ! $cgroup2 ]]; then
>     assert_state 0 $(($size * 2)) 0 $size
>   
>     rmdir "$CGROUP_ROOT"/a/b
> -  sleep 5
>     echo Assert memory reparent correctly.
>     assert_state 0 $(($size * 2))
>   
> @@ -224,7 +211,6 @@ if [[ ! $cgroup2 ]]; then
>     cleanup
>   fi
>   
> -echo
>   echo
>   echo "Test child only hugetlb usage"
>   echo setup


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2025-04-08  3:46 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-04-07  8:42 [PATCH] selftest/mm: Make hugetlb_reparenting_test tolerant to async reparenting Li Wang
2025-04-08  3:46 ` Donet Tom

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox