linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] mm: fix dead-loop bug
@ 2025-01-01 13:21 Jerry
  2025-01-02  4:06 ` Matthew Wilcox
  0 siblings, 1 reply; 2+ messages in thread
From: Jerry @ 2025-01-01 13:21 UTC (permalink / raw)
  To: akpm; +Cc: linux-mm, linux-kernel, Jerry

KERNEL-5.10.232.generic_perform_write()->balance_dirty_pages_ratelimited()->        
balance_dirty_pages() At this point,if the block device removed, 
the process  may trapped in a dead loop.and the memory of the bdi 
device hass also been released.

Insert a USB flash and directly writing to device node.
Remove the USB flash while writing, and the writing process 
may trapped in a dead loop.

user code:

int fd = open("/dev/sda", O_RDWR);
char *p = malloc(0x1000000);
memset(p, 0xa, 0x1000000);
for(int i=0; i<100; i++) {


	write(fd, p, 0x1000000);


}
return;

ISSUE 1: Dead loop may occr here.
CALL trace:

schedule_timeout()

io_schedule_timeout()

balance_dirty_pages()

balance_dirty_pages_ratelimited()

balance_dirty_pages_ratelimited)

ISSUE 2 , BDI&WB memory illegal .
void balance_dirty_pages_ratelimited(struct address_space *mapping)
{
	struct inode *inode = mapping->host;
	struct backing_dev_info *bdi = inode_to_bdi(inode);
	struct bdi_writeback *wb = NULL;
	int ratelimit;
	.....

}
BDI&WB memory belong to SCSI device. If the USB flash remove, 
The BDI&WB memeory released by below process:

bdi_unregister()

del_gendisk()

sd_remove()

__device_release_driver()

device_release_driver()

bus_remove_device()

device_del()

__scsi_remove_deice()

scsi_forget_host()

scsi_remove_host()

usb_stor_disconnect()

...

usb_unbind_initerface()

usb_disable_device()

usb_disconnect()

Signed-off-by: Jerry <jerrydeng079@163.com>
---
 mm/backing-dev.c    |  1 +
 mm/filemap.c        |  6 ++++-
 mm/page-writeback.c | 56 ++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 56 insertions(+), 7 deletions(-)

diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index dd08ab928..0b86bd980 100755
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -878,6 +878,7 @@ void bdi_unregister(struct backing_dev_info *bdi)
 	/* make sure nobody finds us on the bdi_list anymore */
 	bdi_remove_from_list(bdi);
 	wb_shutdown(&bdi->wb);
+	wake_up(&(bdi->wb_waitq));
 	cgwb_bdi_unregister(bdi);
 
 	/*
diff --git a/mm/filemap.c b/mm/filemap.c
index 3b0d8c6dd..48424240f 100755
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3300,6 +3300,7 @@ ssize_t generic_perform_write(struct file *file,
 	long status = 0;
 	ssize_t written = 0;
 	unsigned int flags = 0;
+	errseq_t err = 0;
 
 	do {
 		struct page *page;
@@ -3368,8 +3369,11 @@ ssize_t generic_perform_write(struct file *file,
 		}
 		pos += copied;
 		written += copied;
-
+		
 		balance_dirty_pages_ratelimited(mapping);
+		err = errseq_check(&mapping->wb_err, 0);
+		if (err)
+			return err;
 	} while (iov_iter_count(i));
 
 	return written ? written : status;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index b2c916474..001dd0c5e 100755
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -146,6 +146,13 @@ struct dirty_throttle_control {
 	unsigned long		pos_ratio;
 };
 
+struct bdi_wq_callback_entry {
+	struct task_struct *tsk;
+	struct wait_queue_entry  wq_entry;
+	int bdi_unregister;
+};
+
+
 /*
  * Length of period for aging writeout fractions of bdis. This is an
  * arbitrarily chosen number. The longer the period, the slower fractions will
@@ -1567,6 +1574,22 @@ static inline void wb_dirty_limits(struct dirty_throttle_control *dtc)
 	}
 }
 
+
+static int wake_up_bdi_waitq(wait_queue_entry_t *wait, unsigned int mode,
+				int sync, void *key)
+{
+
+	struct bdi_wq_callback_entry *bwce =
+		container_of(wait, struct bdi_wq_callback_entry, wq_entry);
+
+	bwce->bdi_unregister = 1;
+	if (bwce->tsk)
+		wake_up_process(bwce->tsk);
+
+	return 0;
+}
+
+
 /*
  * balance_dirty_pages() must be called by processes which are generating dirty
  * data.  It looks at the number of dirty pages in the machine and will force
@@ -1574,7 +1597,7 @@ static inline void wb_dirty_limits(struct dirty_throttle_control *dtc)
  * If we're over `background_thresh' then the writeback threads are woken to
  * perform some writeout.
  */
-static void balance_dirty_pages(struct bdi_writeback *wb,
+static int balance_dirty_pages(struct bdi_writeback *wb,
 				unsigned long pages_dirtied)
 {
 	struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) };
@@ -1595,7 +1618,15 @@ static void balance_dirty_pages(struct bdi_writeback *wb,
 	struct backing_dev_info *bdi = wb->bdi;
 	bool strictlimit = bdi->capabilities & BDI_CAP_STRICTLIMIT;
 	unsigned long start_time = jiffies;
+	struct bdi_wq_callback_entry bwce = {NULL};
+	int ret = 0;
 
+	if (!test_bit(WB_registered, &wb->state))
+		return -EIO;
+	
+	init_waitqueue_func_entry(&(bwce.wq_entry), wake_up_bdi_waitq);
+	bwce.tsk = current;
+	add_wait_queue(&(bdi->wb_waitq), &(bwce.wq_entry));
 	for (;;) {
 		unsigned long now = jiffies;
 		unsigned long dirty, thresh, bg_thresh;
@@ -1816,6 +1847,11 @@ static void balance_dirty_pages(struct bdi_writeback *wb,
 		wb->dirty_sleep = now;
 		io_schedule_timeout(pause);
 
+		/* bid is unregister NULL, all bdi memory is illegal */
+		if (bwce.bdi_unregister) {
+			ret = -EIO;
+			break;
+		}
 		current->dirty_paused_when = now + pause;
 		current->nr_dirtied = 0;
 		current->nr_dirtied_pause = nr_dirtied_pause;
@@ -1844,11 +1880,14 @@ static void balance_dirty_pages(struct bdi_writeback *wb,
 			break;
 	}
 
+	if (bwce.bdi_unregister == 0)
+		remove_wait_queue(&(bdi->wb_waitq), &(bwce.wq_entry));
+	
 	if (!dirty_exceeded && wb->dirty_exceeded)
 		wb->dirty_exceeded = 0;
 
 	if (writeback_in_progress(wb))
-		return;
+		return ret;
 
 	/*
 	 * In laptop mode, we wait until hitting the higher threshold before
@@ -1859,10 +1898,12 @@ static void balance_dirty_pages(struct bdi_writeback *wb,
 	 * background_thresh, to keep the amount of dirty memory low.
 	 */
 	if (laptop_mode)
-		return;
+		return ret;
 
 	if (nr_reclaimable > gdtc->bg_thresh)
 		wb_start_background_writeback(wb);
+
+	return ret;
 }
 
 static DEFINE_PER_CPU(int, bdp_ratelimits);
@@ -1944,9 +1985,12 @@ void balance_dirty_pages_ratelimited(struct address_space *mapping)
 	}
 	preempt_enable();
 
-	if (unlikely(current->nr_dirtied >= ratelimit))
-		balance_dirty_pages(wb, current->nr_dirtied);
-
+	if (unlikely(current->nr_dirtied >= ratelimit)) {
+	
+		if (balance_dirty_pages(wb, current->nr_dirtied) < 0)
+			errseq_set(&(mapping->wb_err), -EIO);
+	}
+	
 	wb_put(wb);
 }
 EXPORT_SYMBOL(balance_dirty_pages_ratelimited);
-- 
2.43.0



^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] mm: fix dead-loop bug
  2025-01-01 13:21 [PATCH] mm: fix dead-loop bug Jerry
@ 2025-01-02  4:06 ` Matthew Wilcox
  0 siblings, 0 replies; 2+ messages in thread
From: Matthew Wilcox @ 2025-01-02  4:06 UTC (permalink / raw)
  To: Jerry; +Cc: akpm, linux-mm, linux-kernel, linux-block, linux-scsi

On Wed, Jan 01, 2025 at 09:21:48PM +0800, Jerry wrote:
> KERNEL-5.10.232.generic_perform_write()->balance_dirty_pages_ratelimited()->        

Can you reproduce this with a kernel that's newer than four years old?

> balance_dirty_pages() At this point,if the block device removed, 
> the process  may trapped in a dead loop.and the memory of the bdi 
> device hass also been released.

I think this is a block layer bug, not an MM bug.  Devices shouldn't
go away while the MM is still writing to them.

> Insert a USB flash and directly writing to device node.
> Remove the USB flash while writing, and the writing process 
> may trapped in a dead loop.
> 
> user code:
> 
> int fd = open("/dev/sda", O_RDWR);
> char *p = malloc(0x1000000);
> memset(p, 0xa, 0x1000000);
> for(int i=0; i<100; i++) {
> 
> 
> 	write(fd, p, 0x1000000);
> 
> 
> }
> return;
> 
> ISSUE 1: Dead loop may occr here.
> CALL trace:
> 
> schedule_timeout()
> 
> io_schedule_timeout()
> 
> balance_dirty_pages()
> 
> balance_dirty_pages_ratelimited()
> 
> balance_dirty_pages_ratelimited)
> 
> ISSUE 2 , BDI&WB memory illegal .
> void balance_dirty_pages_ratelimited(struct address_space *mapping)
> {
> 	struct inode *inode = mapping->host;
> 	struct backing_dev_info *bdi = inode_to_bdi(inode);
> 	struct bdi_writeback *wb = NULL;
> 	int ratelimit;
> 	.....
> 
> }
> BDI&WB memory belong to SCSI device. If the USB flash remove, 
> The BDI&WB memeory released by below process:
> 
> bdi_unregister()
> 
> del_gendisk()
> 
> sd_remove()
> 
> __device_release_driver()
> 
> device_release_driver()
> 
> bus_remove_device()
> 
> device_del()
> 
> __scsi_remove_deice()
> 
> scsi_forget_host()
> 
> scsi_remove_host()
> 
> usb_stor_disconnect()
> 
> ...
> 
> usb_unbind_initerface()
> 
> usb_disable_device()
> 
> usb_disconnect()
> 
> Signed-off-by: Jerry <jerrydeng079@163.com>
> ---
>  mm/backing-dev.c    |  1 +
>  mm/filemap.c        |  6 ++++-
>  mm/page-writeback.c | 56 ++++++++++++++++++++++++++++++++++++++++-----
>  3 files changed, 56 insertions(+), 7 deletions(-)
> 
> diff --git a/mm/backing-dev.c b/mm/backing-dev.c
> index dd08ab928..0b86bd980 100755
> --- a/mm/backing-dev.c
> +++ b/mm/backing-dev.c
> @@ -878,6 +878,7 @@ void bdi_unregister(struct backing_dev_info *bdi)
>  	/* make sure nobody finds us on the bdi_list anymore */
>  	bdi_remove_from_list(bdi);
>  	wb_shutdown(&bdi->wb);
> +	wake_up(&(bdi->wb_waitq));
>  	cgwb_bdi_unregister(bdi);
>  
>  	/*
> diff --git a/mm/filemap.c b/mm/filemap.c
> index 3b0d8c6dd..48424240f 100755
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -3300,6 +3300,7 @@ ssize_t generic_perform_write(struct file *file,
>  	long status = 0;
>  	ssize_t written = 0;
>  	unsigned int flags = 0;
> +	errseq_t err = 0;
>  
>  	do {
>  		struct page *page;
> @@ -3368,8 +3369,11 @@ ssize_t generic_perform_write(struct file *file,
>  		}
>  		pos += copied;
>  		written += copied;
> -
> +		
>  		balance_dirty_pages_ratelimited(mapping);
> +		err = errseq_check(&mapping->wb_err, 0);
> +		if (err)
> +			return err;
>  	} while (iov_iter_count(i));
>  
>  	return written ? written : status;
> diff --git a/mm/page-writeback.c b/mm/page-writeback.c
> index b2c916474..001dd0c5e 100755
> --- a/mm/page-writeback.c
> +++ b/mm/page-writeback.c
> @@ -146,6 +146,13 @@ struct dirty_throttle_control {
>  	unsigned long		pos_ratio;
>  };
>  
> +struct bdi_wq_callback_entry {
> +	struct task_struct *tsk;
> +	struct wait_queue_entry  wq_entry;
> +	int bdi_unregister;
> +};
> +
> +
>  /*
>   * Length of period for aging writeout fractions of bdis. This is an
>   * arbitrarily chosen number. The longer the period, the slower fractions will
> @@ -1567,6 +1574,22 @@ static inline void wb_dirty_limits(struct dirty_throttle_control *dtc)
>  	}
>  }
>  
> +
> +static int wake_up_bdi_waitq(wait_queue_entry_t *wait, unsigned int mode,
> +				int sync, void *key)
> +{
> +
> +	struct bdi_wq_callback_entry *bwce =
> +		container_of(wait, struct bdi_wq_callback_entry, wq_entry);
> +
> +	bwce->bdi_unregister = 1;
> +	if (bwce->tsk)
> +		wake_up_process(bwce->tsk);
> +
> +	return 0;
> +}
> +
> +
>  /*
>   * balance_dirty_pages() must be called by processes which are generating dirty
>   * data.  It looks at the number of dirty pages in the machine and will force
> @@ -1574,7 +1597,7 @@ static inline void wb_dirty_limits(struct dirty_throttle_control *dtc)
>   * If we're over `background_thresh' then the writeback threads are woken to
>   * perform some writeout.
>   */
> -static void balance_dirty_pages(struct bdi_writeback *wb,
> +static int balance_dirty_pages(struct bdi_writeback *wb,
>  				unsigned long pages_dirtied)
>  {
>  	struct dirty_throttle_control gdtc_stor = { GDTC_INIT(wb) };
> @@ -1595,7 +1618,15 @@ static void balance_dirty_pages(struct bdi_writeback *wb,
>  	struct backing_dev_info *bdi = wb->bdi;
>  	bool strictlimit = bdi->capabilities & BDI_CAP_STRICTLIMIT;
>  	unsigned long start_time = jiffies;
> +	struct bdi_wq_callback_entry bwce = {NULL};
> +	int ret = 0;
>  
> +	if (!test_bit(WB_registered, &wb->state))
> +		return -EIO;
> +	
> +	init_waitqueue_func_entry(&(bwce.wq_entry), wake_up_bdi_waitq);
> +	bwce.tsk = current;
> +	add_wait_queue(&(bdi->wb_waitq), &(bwce.wq_entry));
>  	for (;;) {
>  		unsigned long now = jiffies;
>  		unsigned long dirty, thresh, bg_thresh;
> @@ -1816,6 +1847,11 @@ static void balance_dirty_pages(struct bdi_writeback *wb,
>  		wb->dirty_sleep = now;
>  		io_schedule_timeout(pause);
>  
> +		/* bid is unregister NULL, all bdi memory is illegal */
> +		if (bwce.bdi_unregister) {
> +			ret = -EIO;
> +			break;
> +		}
>  		current->dirty_paused_when = now + pause;
>  		current->nr_dirtied = 0;
>  		current->nr_dirtied_pause = nr_dirtied_pause;
> @@ -1844,11 +1880,14 @@ static void balance_dirty_pages(struct bdi_writeback *wb,
>  			break;
>  	}
>  
> +	if (bwce.bdi_unregister == 0)
> +		remove_wait_queue(&(bdi->wb_waitq), &(bwce.wq_entry));
> +	
>  	if (!dirty_exceeded && wb->dirty_exceeded)
>  		wb->dirty_exceeded = 0;
>  
>  	if (writeback_in_progress(wb))
> -		return;
> +		return ret;
>  
>  	/*
>  	 * In laptop mode, we wait until hitting the higher threshold before
> @@ -1859,10 +1898,12 @@ static void balance_dirty_pages(struct bdi_writeback *wb,
>  	 * background_thresh, to keep the amount of dirty memory low.
>  	 */
>  	if (laptop_mode)
> -		return;
> +		return ret;
>  
>  	if (nr_reclaimable > gdtc->bg_thresh)
>  		wb_start_background_writeback(wb);
> +
> +	return ret;
>  }
>  
>  static DEFINE_PER_CPU(int, bdp_ratelimits);
> @@ -1944,9 +1985,12 @@ void balance_dirty_pages_ratelimited(struct address_space *mapping)
>  	}
>  	preempt_enable();
>  
> -	if (unlikely(current->nr_dirtied >= ratelimit))
> -		balance_dirty_pages(wb, current->nr_dirtied);
> -
> +	if (unlikely(current->nr_dirtied >= ratelimit)) {
> +	
> +		if (balance_dirty_pages(wb, current->nr_dirtied) < 0)
> +			errseq_set(&(mapping->wb_err), -EIO);
> +	}
> +	
>  	wb_put(wb);
>  }
>  EXPORT_SYMBOL(balance_dirty_pages_ratelimited);
> -- 
> 2.43.0
> 
> 


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2025-01-02  4:06 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-01-01 13:21 [PATCH] mm: fix dead-loop bug Jerry
2025-01-02  4:06 ` Matthew Wilcox

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox