[PATCH v2] mm: slub: Print the broken data before restoring slub.

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

* [PATCH v2] mm: slub: Print the broken data before restoring slub.
       [not found] <CGME20250205004552epcas2p43c15afa1e9c3e290693bc4921d46b6f5@epcas2p4.samsung.com>
@ 2025-02-05  0:44 ` Hyesoo Yu
  2025-02-06 14:08   ` Vlastimil Babka
  0 siblings, 1 reply; 3+ messages in thread
From: Hyesoo Yu @ 2025-02-05  0:44 UTC (permalink / raw)
  Cc: janghyuck.kim, chengming.zhou, Hyesoo Yu, Christoph Lameter,
	Pekka Enberg, David Rientjes, Joonsoo Kim, Andrew Morton,
	Vlastimil Babka, Roman Gushchin, Hyeonggon Yoo, linux-mm,
	linux-kernel

Previously, the restore occured after printing the object in slub.
After commit 47d911b02cbe ("slab: make check_object() more consistent"),
the bytes are printed after the restore. This information about the bytes
before the restore is highly valuable for debugging purpose.
For instance, in a event of cache issue, it displays byte patterns
by breaking them down into 64-bytes units. Without this information,
we can only speculate on how it was broken. Hence the corrupted regions
should be printed prior to the restoration process. However if an object breaks
in multiple places, the same log may be output multiple times.
Therefore the slub log is reported only once to prevent redundant printing,
by sending a parameter indicating whether an error has occurred previously.

Changes in v2:
- Instead of using print_section every time on check_bytes_and_report,
just print it once for the entire slub object before the restore.

Signed-off-by: Hyesoo Yu <hyesoo.yu@samsung.com>
---
 mm/slub.c | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index ea956cb4b8be..7a9f7a2c17d7 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1182,7 +1182,7 @@ static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
 static pad_check_attributes int
 check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
 		       u8 *object, char *what,
-		       u8 *start, unsigned int value, unsigned int bytes)
+		       u8 *start, unsigned int value, unsigned int bytes, int slab_obj_print)
 {
 	u8 *fault;
 	u8 *end;
@@ -1205,6 +1205,10 @@ check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
 	pr_err("0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
 					fault, end - 1, fault - addr,
 					fault[0], value);
+	if (slab_obj_print) {
+		print_trailer(s, slab, object);
+		add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
+	}
 
 skip_bug_print:
 	restore_bytes(s, what, value, fault, end);
@@ -1268,7 +1272,7 @@ static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p)
 		return 1;
 
 	return check_bytes_and_report(s, slab, p, "Object padding",
-			p + off, POISON_INUSE, size_from_object(s) - off);
+			p + off, POISON_INUSE, size_from_object(s) - off, 1);
 }
 
 /* Check the pad bytes at the end of a slab page */
@@ -1318,11 +1322,11 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
 
 	if (s->flags & SLAB_RED_ZONE) {
 		if (!check_bytes_and_report(s, slab, object, "Left Redzone",
-			object - s->red_left_pad, val, s->red_left_pad))
+			object - s->red_left_pad, val, s->red_left_pad, ret))
 			ret = 0;
 
 		if (!check_bytes_and_report(s, slab, object, "Right Redzone",
-			endobject, val, s->inuse - s->object_size))
+			endobject, val, s->inuse - s->object_size, ret))
 			ret = 0;
 
 		if (slub_debug_orig_size(s) && val == SLUB_RED_ACTIVE) {
@@ -1331,7 +1335,7 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
 			if (s->object_size > orig_size  &&
 				!check_bytes_and_report(s, slab, object,
 					"kmalloc Redzone", p + orig_size,
-					val, s->object_size - orig_size)) {
+					val, s->object_size - orig_size, ret)) {
 				ret = 0;
 			}
 		}
@@ -1339,7 +1343,7 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
 		if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
 			if (!check_bytes_and_report(s, slab, p, "Alignment padding",
 				endobject, POISON_INUSE,
-				s->inuse - s->object_size))
+				s->inuse - s->object_size, ret))
 				ret = 0;
 		}
 	}
@@ -1355,11 +1359,11 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
 			if (kasan_meta_size < s->object_size - 1 &&
 			    !check_bytes_and_report(s, slab, p, "Poison",
 					p + kasan_meta_size, POISON_FREE,
-					s->object_size - kasan_meta_size - 1))
+					s->object_size - kasan_meta_size - 1, ret))
 				ret = 0;
 			if (kasan_meta_size < s->object_size &&
 			    !check_bytes_and_report(s, slab, p, "End Poison",
-					p + s->object_size - 1, POISON_END, 1))
+					p + s->object_size - 1, POISON_END, 1, ret))
 				ret = 0;
 		}
 		/*
@@ -1385,11 +1389,6 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
 		ret = 0;
 	}
 
-	if (!ret && !slab_in_kunit_test()) {
-		print_trailer(s, slab, object);
-		add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
-	}
-
 	return ret;
 }
 
-- 
2.48.0



^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] mm: slub: Print the broken data before restoring slub.
  2025-02-05  0:44 ` [PATCH v2] mm: slub: Print the broken data before restoring slub Hyesoo Yu
@ 2025-02-06 14:08   ` Vlastimil Babka
  2025-02-07  3:41     ` Hyesoo Yu
  0 siblings, 1 reply; 3+ messages in thread
From: Vlastimil Babka @ 2025-02-06 14:08 UTC (permalink / raw)
  To: Hyesoo Yu
  Cc: janghyuck.kim, chengming.zhou, Christoph Lameter, Pekka Enberg,
	David Rientjes, Joonsoo Kim, Andrew Morton, Roman Gushchin,
	Hyeonggon Yoo, linux-mm, linux-kernel

On 2/5/25 01:44, Hyesoo Yu wrote:
> Previously, the restore occured after printing the object in slub.
> After commit 47d911b02cbe ("slab: make check_object() more consistent"),
> the bytes are printed after the restore. This information about the bytes
> before the restore is highly valuable for debugging purpose.
> For instance, in a event of cache issue, it displays byte patterns
> by breaking them down into 64-bytes units. Without this information,
> we can only speculate on how it was broken. Hence the corrupted regions
> should be printed prior to the restoration process. However if an object breaks
> in multiple places, the same log may be output multiple times.
> Therefore the slub log is reported only once to prevent redundant printing,
> by sending a parameter indicating whether an error has occurred previously.
> 
> Changes in v2:
> - Instead of using print_section every time on check_bytes_and_report,
> just print it once for the entire slub object before the restore.
> 
> Signed-off-by: Hyesoo Yu <hyesoo.yu@samsung.com>
> ---
>  mm/slub.c | 25 ++++++++++++-------------
>  1 file changed, 12 insertions(+), 13 deletions(-)
> 
> diff --git a/mm/slub.c b/mm/slub.c
> index ea956cb4b8be..7a9f7a2c17d7 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -1182,7 +1182,7 @@ static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
>  static pad_check_attributes int
>  check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
>  		       u8 *object, char *what,
> -		       u8 *start, unsigned int value, unsigned int bytes)
> +		       u8 *start, unsigned int value, unsigned int bytes, int slab_obj_print)

It would be better to redistribute the arguments among lines to fit each <80
chars. The previous line is underutilized. Also could the new argument be bool?

>  {
>  	u8 *fault;
>  	u8 *end;
> @@ -1205,6 +1205,10 @@ check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
>  	pr_err("0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
>  					fault, end - 1, fault - addr,
>  					fault[0], value);

Hm we have slab_bug() above this, not slab_err(). So this is another place
that would need to take care a WARN is called with your other patch.

> +	if (slab_obj_print) {
> +		print_trailer(s, slab, object);
> +		add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);

I guess we could do the WARN here. If panic_on_warn is enabled it will not
report all problems that check_object() could find and panic on the first
one. But that would have happened too with your slab_fix() approach
(slab_fix() called from restore_bytes() below). I think we can live with
that instead of needing two separate reporting and fixing rounds from
check_object().

Could you send the two patches as a series in v3, as they are
inter-dependent? Thanks.

> +	}
>  
>  skip_bug_print:
>  	restore_bytes(s, what, value, fault, end);
> @@ -1268,7 +1272,7 @@ static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p)
>  		return 1;
>  
>  	return check_bytes_and_report(s, slab, p, "Object padding",
> -			p + off, POISON_INUSE, size_from_object(s) - off);
> +			p + off, POISON_INUSE, size_from_object(s) - off, 1);
>  }
>  
>  /* Check the pad bytes at the end of a slab page */
> @@ -1318,11 +1322,11 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
>  
>  	if (s->flags & SLAB_RED_ZONE) {
>  		if (!check_bytes_and_report(s, slab, object, "Left Redzone",
> -			object - s->red_left_pad, val, s->red_left_pad))
> +			object - s->red_left_pad, val, s->red_left_pad, ret))
>  			ret = 0;
>  
>  		if (!check_bytes_and_report(s, slab, object, "Right Redzone",
> -			endobject, val, s->inuse - s->object_size))
> +			endobject, val, s->inuse - s->object_size, ret))
>  			ret = 0;
>  
>  		if (slub_debug_orig_size(s) && val == SLUB_RED_ACTIVE) {
> @@ -1331,7 +1335,7 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
>  			if (s->object_size > orig_size  &&
>  				!check_bytes_and_report(s, slab, object,
>  					"kmalloc Redzone", p + orig_size,
> -					val, s->object_size - orig_size)) {
> +					val, s->object_size - orig_size, ret)) {
>  				ret = 0;
>  			}
>  		}
> @@ -1339,7 +1343,7 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
>  		if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
>  			if (!check_bytes_and_report(s, slab, p, "Alignment padding",
>  				endobject, POISON_INUSE,
> -				s->inuse - s->object_size))
> +				s->inuse - s->object_size, ret))
>  				ret = 0;
>  		}
>  	}
> @@ -1355,11 +1359,11 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
>  			if (kasan_meta_size < s->object_size - 1 &&
>  			    !check_bytes_and_report(s, slab, p, "Poison",
>  					p + kasan_meta_size, POISON_FREE,
> -					s->object_size - kasan_meta_size - 1))
> +					s->object_size - kasan_meta_size - 1, ret))
>  				ret = 0;
>  			if (kasan_meta_size < s->object_size &&
>  			    !check_bytes_and_report(s, slab, p, "End Poison",
> -					p + s->object_size - 1, POISON_END, 1))
> +					p + s->object_size - 1, POISON_END, 1, ret))
>  				ret = 0;
>  		}
>  		/*
> @@ -1385,11 +1389,6 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
>  		ret = 0;
>  	}
>  
> -	if (!ret && !slab_in_kunit_test()) {
> -		print_trailer(s, slab, object);
> -		add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
> -	}
> -
>  	return ret;
>  }
>  



^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] mm: slub: Print the broken data before restoring slub.
  2025-02-06 14:08   ` Vlastimil Babka
@ 2025-02-07  3:41     ` Hyesoo Yu
  0 siblings, 0 replies; 3+ messages in thread
From: Hyesoo Yu @ 2025-02-07  3:41 UTC (permalink / raw)
  To: Vlastimil Babka
  Cc: janghyuck.kim, chengming.zhou, Christoph Lameter, Pekka Enberg,
	David Rientjes, Joonsoo Kim, Andrew Morton, Roman Gushchin,
	Hyeonggon Yoo, linux-mm, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 6070 bytes --]

On Thu, Feb 06, 2025 at 03:08:48PM +0100, Vlastimil Babka wrote:
> On 2/5/25 01:44, Hyesoo Yu wrote:
> > Previously, the restore occured after printing the object in slub.
> > After commit 47d911b02cbe ("slab: make check_object() more consistent"),
> > the bytes are printed after the restore. This information about the bytes
> > before the restore is highly valuable for debugging purpose.
> > For instance, in a event of cache issue, it displays byte patterns
> > by breaking them down into 64-bytes units. Without this information,
> > we can only speculate on how it was broken. Hence the corrupted regions
> > should be printed prior to the restoration process. However if an object breaks
> > in multiple places, the same log may be output multiple times.
> > Therefore the slub log is reported only once to prevent redundant printing,
> > by sending a parameter indicating whether an error has occurred previously.
> > 
> > Changes in v2:
> > - Instead of using print_section every time on check_bytes_and_report,
> > just print it once for the entire slub object before the restore.
> > 
> > Signed-off-by: Hyesoo Yu <hyesoo.yu@samsung.com>
> > ---
> >  mm/slub.c | 25 ++++++++++++-------------
> >  1 file changed, 12 insertions(+), 13 deletions(-)
> > 
> > diff --git a/mm/slub.c b/mm/slub.c
> > index ea956cb4b8be..7a9f7a2c17d7 100644
> > --- a/mm/slub.c
> > +++ b/mm/slub.c
> > @@ -1182,7 +1182,7 @@ static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
> >  static pad_check_attributes int
> >  check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
> >  		       u8 *object, char *what,
> > -		       u8 *start, unsigned int value, unsigned int bytes)
> > +		       u8 *start, unsigned int value, unsigned int bytes, int slab_obj_print)
> 
> It would be better to redistribute the arguments among lines to fit each <80
> chars. The previous line is underutilized. Also could the new argument be bool?
>

I used interger to make the types match, but it does seem like using a boolean would be more
readable. I will change it to a boolean and modify it to pass !!ret from check_objects().

> >  {
> >  	u8 *fault;
> >  	u8 *end;
> > @@ -1205,6 +1205,10 @@ check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
> >  	pr_err("0x%p-0x%p @offset=%tu. First byte 0x%x instead of 0x%x\n",
> >  					fault, end - 1, fault - addr,
> >  					fault[0], value);
> 
> Hm we have slab_bug() above this, not slab_err(). So this is another place
> that would need to take care a WARN is called with your other patch.
> 
> > +	if (slab_obj_print) {
> > +		print_trailer(s, slab, object);
> > +		add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
> 
> I guess we could do the WARN here. If panic_on_warn is enabled it will not
> report all problems that check_object() could find and panic on the first
> one. But that would have happened too with your slab_fix() approach
> (slab_fix() called from restore_bytes() below). I think we can live with
> that instead of needing two separate reporting and fixing rounds from
> check_object().
> 
> Could you send the two patches as a series in v3, as they are
> inter-dependent? Thanks.
> 

I guess we can call object_err() here. That would include the WARN() from other
patch. I will send the two pathces as a series in v3.

Thanks,
Regards.

> > +	}
> >  
> >  skip_bug_print:
> >  	restore_bytes(s, what, value, fault, end);
> > @@ -1268,7 +1272,7 @@ static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p)
> >  		return 1;
> >  
> >  	return check_bytes_and_report(s, slab, p, "Object padding",
> > -			p + off, POISON_INUSE, size_from_object(s) - off);
> > +			p + off, POISON_INUSE, size_from_object(s) - off, 1);
> >  }
> >  
> >  /* Check the pad bytes at the end of a slab page */
> > @@ -1318,11 +1322,11 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
> >  
> >  	if (s->flags & SLAB_RED_ZONE) {
> >  		if (!check_bytes_and_report(s, slab, object, "Left Redzone",
> > -			object - s->red_left_pad, val, s->red_left_pad))
> > +			object - s->red_left_pad, val, s->red_left_pad, ret))
> >  			ret = 0;
> >  
> >  		if (!check_bytes_and_report(s, slab, object, "Right Redzone",
> > -			endobject, val, s->inuse - s->object_size))
> > +			endobject, val, s->inuse - s->object_size, ret))
> >  			ret = 0;
> >  
> >  		if (slub_debug_orig_size(s) && val == SLUB_RED_ACTIVE) {
> > @@ -1331,7 +1335,7 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
> >  			if (s->object_size > orig_size  &&
> >  				!check_bytes_and_report(s, slab, object,
> >  					"kmalloc Redzone", p + orig_size,
> > -					val, s->object_size - orig_size)) {
> > +					val, s->object_size - orig_size, ret)) {
> >  				ret = 0;
> >  			}
> >  		}
> > @@ -1339,7 +1343,7 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
> >  		if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
> >  			if (!check_bytes_and_report(s, slab, p, "Alignment padding",
> >  				endobject, POISON_INUSE,
> > -				s->inuse - s->object_size))
> > +				s->inuse - s->object_size, ret))
> >  				ret = 0;
> >  		}
> >  	}
> > @@ -1355,11 +1359,11 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
> >  			if (kasan_meta_size < s->object_size - 1 &&
> >  			    !check_bytes_and_report(s, slab, p, "Poison",
> >  					p + kasan_meta_size, POISON_FREE,
> > -					s->object_size - kasan_meta_size - 1))
> > +					s->object_size - kasan_meta_size - 1, ret))
> >  				ret = 0;
> >  			if (kasan_meta_size < s->object_size &&
> >  			    !check_bytes_and_report(s, slab, p, "End Poison",
> > -					p + s->object_size - 1, POISON_END, 1))
> > +					p + s->object_size - 1, POISON_END, 1, ret))
> >  				ret = 0;
> >  		}
> >  		/*
> > @@ -1385,11 +1389,6 @@ static int check_object(struct kmem_cache *s, struct slab *slab,
> >  		ret = 0;
> >  	}
> >  
> > -	if (!ret && !slab_in_kunit_test()) {
> > -		print_trailer(s, slab, object);
> > -		add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
> > -	}
> > -
> >  	return ret;
> >  }
> >  
> 
> 

[-- Attachment #2: Type: text/plain, Size: 0 bytes --]



^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2025-02-07  3:43 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <CGME20250205004552epcas2p43c15afa1e9c3e290693bc4921d46b6f5@epcas2p4.samsung.com>
2025-02-05  0:44 ` [PATCH v2] mm: slub: Print the broken data before restoring slub Hyesoo Yu
2025-02-06 14:08   ` Vlastimil Babka
2025-02-07  3:41     ` Hyesoo Yu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox