From: Christoph Hellwig <hch@lst.de>
To: Jens Axboe <axboe@kernel.dk>
Cc: Vlastimil Babka <vbabka@suse.cz>,
Andrew Morton <akpm@linux-foundation.org>,
Christoph Lameter <cl@gentwo.org>,
David Rientjes <rientjes@google.com>,
Roman Gushchin <roman.gushchin@linux.dev>,
Harry Yoo <harry.yoo@oracle.com>,
"Martin K. Petersen" <martin.petersen@oracle.com>,
linux-block@vger.kernel.org, linux-mm@kvack.org
Subject: [PATCH 3/3] block: make bio auto-integrity deadlock safe
Date: Thu, 23 Oct 2025 10:08:56 +0200 [thread overview]
Message-ID: <20251023080919.9209-4-hch@lst.de> (raw)
In-Reply-To: <20251023080919.9209-1-hch@lst.de>
The current block layer automatic integrity protection allocates the
actual integrity buffer, which has three problems:
- because it happens at the bottom of the I/O stack and doesn't use a
mempool it can deadlock under load
- because the data size in a bio is almost unbounded when using lage
folios it can relatively easily exceed the maximum kmalloc size
- even when it does not exceed the maximum kmalloc size, it could
exceed the maximum segment size of the device
Fix this by limiting the I/O size so that we can allocated at least a
2MiB integrity buffer, i.e. 128MiB for 8 byte PI and 512 byte integrity
internals, and create a mempool as a last resort for this maximum size,
mirroring the scheme used for bvecs. As a nice upside none of this
can fail now, so we remove the error handling and open code the
trivial addition of the bip vec.
The new allocation helpers sit outside of bio-integrity-auto.c because
I plan to reuse them for file system based PI in the near future.
Fixes: 7ba1ba12eeef ("block: Block layer data integrity support")
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
block/bio-integrity-auto.c | 22 +++-------------
block/bio-integrity.c | 47 +++++++++++++++++++++++++++++++++++
block/blk-settings.c | 11 ++++++++
include/linux/bio-integrity.h | 6 +++++
include/linux/blk-integrity.h | 5 ++++
5 files changed, 72 insertions(+), 19 deletions(-)
diff --git a/block/bio-integrity-auto.c b/block/bio-integrity-auto.c
index 2f4a244749ac..9850c338548d 100644
--- a/block/bio-integrity-auto.c
+++ b/block/bio-integrity-auto.c
@@ -29,7 +29,7 @@ static void bio_integrity_finish(struct bio_integrity_data *bid)
{
bid->bio->bi_integrity = NULL;
bid->bio->bi_opf &= ~REQ_INTEGRITY;
- kfree(bvec_virt(bid->bip.bip_vec));
+ bio_integrity_free_buf(&bid->bip);
mempool_free(bid, &bid_pool);
}
@@ -110,8 +110,6 @@ bool bio_integrity_prep(struct bio *bio)
struct bio_integrity_data *bid;
bool set_flags = true;
gfp_t gfp = GFP_NOIO;
- unsigned int len;
- void *buf;
if (!bi)
return true;
@@ -152,17 +150,12 @@ bool bio_integrity_prep(struct bio *bio)
if (WARN_ON_ONCE(bio_has_crypt_ctx(bio)))
return true;
- /* Allocate kernel buffer for protection data */
- len = bio_integrity_bytes(bi, bio_sectors(bio));
- buf = kmalloc(len, gfp);
- if (!buf)
- goto err_end_io;
bid = mempool_alloc(&bid_pool, GFP_NOIO);
bio_integrity_init(bio, &bid->bip, &bid->bvec, 1);
-
bid->bio = bio;
-
bid->bip.bip_flags |= BIP_BLOCK_INTEGRITY;
+ bio_integrity_alloc_buf(bio, gfp & __GFP_ZERO);
+
bip_set_seed(&bid->bip, bio->bi_iter.bi_sector);
if (set_flags) {
@@ -174,21 +167,12 @@ bool bio_integrity_prep(struct bio *bio)
bid->bip.bip_flags |= BIP_CHECK_REFTAG;
}
- if (bio_integrity_add_page(bio, virt_to_page(buf), len,
- offset_in_page(buf)) < len)
- goto err_end_io;
-
/* Auto-generate integrity metadata if this is a write */
if (bio_data_dir(bio) == WRITE && bip_should_check(&bid->bip))
blk_integrity_generate(bio);
else
bid->saved_bio_iter = bio->bi_iter;
return true;
-
-err_end_io:
- bio->bi_status = BLK_STS_RESOURCE;
- bio_endio(bio);
- return false;
}
EXPORT_SYMBOL(bio_integrity_prep);
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index bed26f1ec869..a9896d563c1c 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -14,6 +14,44 @@ struct bio_integrity_alloc {
struct bio_vec bvecs[];
};
+static mempool_t integrity_buf_pool;
+
+void bio_integrity_alloc_buf(struct bio *bio, bool zero_buffer)
+{
+ struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
+ struct bio_integrity_payload *bip = bio_integrity(bio);
+ unsigned int len = bio_integrity_bytes(bi, bio_sectors(bio));
+ gfp_t gfp = GFP_NOIO | (zero_buffer ? __GFP_ZERO : 0);
+ void *buf;
+
+ buf = kmalloc(len, try_alloc_gfp(gfp));
+ if (unlikely(!buf)) {
+ struct page *page;
+
+ page = mempool_alloc(&integrity_buf_pool, GFP_NOFS);
+ if (zero_buffer)
+ memset(page_address(page), 0, len);
+ bvec_set_page(&bip->bip_vec[0], page, len, 0);
+ bip->bip_flags |= BIP_MEMPOOL;
+ } else {
+ bvec_set_page(&bip->bip_vec[0], virt_to_page(buf), len,
+ offset_in_page(buf));
+ }
+
+ bip->bip_vcnt = 1;
+ bip->bip_iter.bi_size = len;
+}
+
+void bio_integrity_free_buf(struct bio_integrity_payload *bip)
+{
+ struct bio_vec *bv = &bip->bip_vec[0];
+
+ if (bip->bip_flags & BIP_MEMPOOL)
+ mempool_free(bv->bv_page, &integrity_buf_pool);
+ else
+ kfree(bvec_virt(bv));
+}
+
/**
* bio_integrity_free - Free bio integrity payload
* @bio: bio containing bip to be freed
@@ -438,3 +476,12 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
return 0;
}
+
+static int __init bio_integrity_initfn(void)
+{
+ if (mempool_init_page_pool(&integrity_buf_pool, BIO_POOL_SIZE,
+ get_order(BLK_INTEGRITY_MAX_SIZE)))
+ panic("bio: can't create integrity buf pool\n");
+ return 0;
+}
+subsys_initcall(bio_integrity_initfn);
diff --git a/block/blk-settings.c b/block/blk-settings.c
index d74b13ec8e54..04e88615032a 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -194,6 +194,17 @@ static int blk_validate_integrity_limits(struct queue_limits *lim)
(1U << bi->interval_exp) - 1);
}
+ /*
+ * The block layer automatically adds integrity data for bios that don't
+ * already have it. It allocates a single segment. Limit the I/O size
+ * so that a single maximum size metadata segment can cover the
+ * integrity data for the entire I/O.
+ */
+ lim->max_sectors = min3(lim->max_sectors,
+ BLK_INTEGRITY_MAX_SIZE /
+ bi->pi_tuple_size * lim->logical_block_size,
+ lim->max_segment_size >> SECTOR_SHIFT);
+
return 0;
}
diff --git a/include/linux/bio-integrity.h b/include/linux/bio-integrity.h
index 851254f36eb3..3d05296a5afe 100644
--- a/include/linux/bio-integrity.h
+++ b/include/linux/bio-integrity.h
@@ -14,6 +14,8 @@ enum bip_flags {
BIP_CHECK_REFTAG = 1 << 6, /* reftag check */
BIP_CHECK_APPTAG = 1 << 7, /* apptag check */
BIP_P2P_DMA = 1 << 8, /* using P2P address */
+
+ BIP_MEMPOOL = 1 << 15, /* buffer backed by mempool */
};
struct bio_integrity_payload {
@@ -140,4 +142,8 @@ static inline int bio_integrity_add_page(struct bio *bio, struct page *page,
return 0;
}
#endif /* CONFIG_BLK_DEV_INTEGRITY */
+
+void bio_integrity_alloc_buf(struct bio *bio, bool zero_buffer);
+void bio_integrity_free_buf(struct bio_integrity_payload *bip);
+
#endif /* _LINUX_BIO_INTEGRITY_H */
diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
index b659373788f6..c2030fd8ba0a 100644
--- a/include/linux/blk-integrity.h
+++ b/include/linux/blk-integrity.h
@@ -8,6 +8,11 @@
struct request;
+/*
+ * Maximum contiguous integrity buffer allocation.
+ */
+#define BLK_INTEGRITY_MAX_SIZE SZ_2M
+
enum blk_integrity_flags {
BLK_INTEGRITY_NOVERIFY = 1 << 0,
BLK_INTEGRITY_NOGENERATE = 1 << 1,
--
2.47.3
next prev parent reply other threads:[~2025-10-23 8:09 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-23 8:08 make block layer auto-PI " Christoph Hellwig
2025-10-23 8:08 ` [PATCH 1/3] slab, block: generalize bvec_alloc_gfp Christoph Hellwig
2025-10-24 1:44 ` Martin K. Petersen
2025-10-24 8:38 ` Vlastimil Babka
2025-10-24 9:05 ` Christoph Hellwig
2025-10-26 21:19 ` Matthew Wilcox
2025-10-27 6:47 ` Christoph Hellwig
2025-10-27 13:09 ` Matthew Wilcox
2025-10-27 13:14 ` Christoph Hellwig
2025-10-23 8:08 ` [PATCH 2/3] block: blocking mempool_alloc doesn't fail Christoph Hellwig
2025-10-24 1:45 ` Martin K. Petersen
2025-10-23 8:08 ` Christoph Hellwig [this message]
2025-10-24 1:47 ` [PATCH 3/3] block: make bio auto-integrity deadlock safe Martin K. Petersen
2025-10-27 6:03 ` Kanchan Joshi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251023080919.9209-4-hch@lst.de \
--to=hch@lst.de \
--cc=akpm@linux-foundation.org \
--cc=axboe@kernel.dk \
--cc=cl@gentwo.org \
--cc=harry.yoo@oracle.com \
--cc=linux-block@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=martin.petersen@oracle.com \
--cc=rientjes@google.com \
--cc=roman.gushchin@linux.dev \
--cc=vbabka@suse.cz \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox