linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Andreas Hindborg <a.hindborg@kernel.org>
To: "Boqun Feng" <boqun.feng@gmail.com>,
	"Jens Axboe" <axboe@kernel.dk>, "Miguel Ojeda" <ojeda@kernel.org>,
	"Gary Guo" <gary@garyguo.net>,
	"Björn Roy Baron" <bjorn3_gh@protonmail.com>,
	"Benno Lossin" <lossin@kernel.org>,
	"Alice Ryhl" <aliceryhl@google.com>,
	"Trevor Gross" <tmgross@umich.edu>,
	"Danilo Krummrich" <dakr@kernel.org>,
	"FUJITA Tomonori" <fujita.tomonori@gmail.com>,
	"Frederic Weisbecker" <frederic@kernel.org>,
	"Lyude Paul" <lyude@redhat.com>,
	"Thomas Gleixner" <tglx@kernel.org>,
	"Anna-Maria Behnsen" <anna-maria@linutronix.de>,
	"John Stultz" <jstultz@google.com>,
	"Stephen Boyd" <sboyd@kernel.org>,
	"Lorenzo Stoakes" <lorenzo.stoakes@oracle.com>,
	"Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: linux-block@vger.kernel.org, rust-for-linux@vger.kernel.org,
	 linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	 Andreas Hindborg <a.hindborg@kernel.org>
Subject: [PATCH 31/79] block: rnull: add volatile cache emulation
Date: Mon, 16 Feb 2026 00:35:18 +0100	[thread overview]
Message-ID: <20260216-rnull-v6-19-rc5-send-v1-31-de9a7af4b469@kernel.org> (raw)
In-Reply-To: <20260216-rnull-v6-19-rc5-send-v1-0-de9a7af4b469@kernel.org>

Add volatile cache emulation to rnull. When enabled via the
`cache_size_mib` configfs attribute, writes are first stored in a volatile
cache before being written back to the simulated non-volatile storage.

Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
 drivers/block/rnull/configfs.rs          |  36 +++-
 drivers/block/rnull/disk_storage.rs      | 248 ++++++++++++++++++++++++
 drivers/block/rnull/disk_storage/page.rs |  75 ++++++++
 drivers/block/rnull/rnull.rs             | 316 ++++++++++++++++++-------------
 4 files changed, 538 insertions(+), 137 deletions(-)

diff --git a/drivers/block/rnull/configfs.rs b/drivers/block/rnull/configfs.rs
index c08a3cbd66f18..d679f12ee6749 100644
--- a/drivers/block/rnull/configfs.rs
+++ b/drivers/block/rnull/configfs.rs
@@ -1,9 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0
 
 use super::{
+    DiskStorage,
     NullBlkDevice,
     THIS_MODULE, //
 };
+use core::fmt::Write;
 use kernel::{
     bindings,
     block::{
@@ -19,10 +21,7 @@
         AttributeOperations, //
     },
     configfs_attrs,
-    fmt::{
-        self,
-        Write as _, //
-    },
+    fmt,
     new_mutex,
     page::PAGE_SIZE,
     prelude::*,
@@ -105,17 +104,19 @@ fn make_group(
                 badblocks: 12,
                 badblocks_once: 13,
                 badblocks_partial_io: 14,
+                cache_size_mib: 15,
             ],
         };
 
+        let block_size = 4096;
         Ok(configfs::Group::new(
             name.try_into()?,
             item_type,
             // TODO: cannot coerce new_mutex!() to impl PinInit<_, Error>, so put mutex inside
-            try_pin_init!( DeviceConfig {
+            try_pin_init!(DeviceConfig {
                 data <- new_mutex!(DeviceConfigInner {
                     powered: false,
-                    block_size: 4096,
+                    block_size,
                     rotational: false,
                     disk: None,
                     capacity_mib: 4096,
@@ -130,6 +131,11 @@ fn make_group(
                     bad_blocks: Arc::pin_init(BadBlocks::new(false), GFP_KERNEL)?,
                     bad_blocks_once: false,
                     bad_blocks_partial_io: false,
+                    disk_storage: Arc::pin_init(
+                        DiskStorage::new(0, block_size as usize),
+                        GFP_KERNEL
+                    )?,
+                    cache_size_mib: 0,
                 }),
             }),
             core::iter::empty(),
@@ -192,6 +198,8 @@ struct DeviceConfigInner {
     bad_blocks: Arc<BadBlocks>,
     bad_blocks_once: bool,
     bad_blocks_partial_io: bool,
+    cache_size_mib: u64,
+    disk_storage: Arc<DiskStorage>,
 }
 
 #[vtable]
@@ -230,6 +238,7 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
                 bad_blocks: guard.bad_blocks.clone(),
                 bad_blocks_once: guard.bad_blocks_once,
                 bad_blocks_partial_io: guard.bad_blocks_partial_io,
+                storage: guard.disk_storage.clone(),
             })?);
             guard.powered = true;
         } else if guard.powered && !power_op {
@@ -241,6 +250,7 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
     }
 }
 
+// DiskStorage::new(cache_size_mib << 20, block_size as usize),
 configfs_simple_field!(DeviceConfig, 1, block_size, u32, check GenDiskBuilder::validate_block_size);
 configfs_simple_bool_field!(DeviceConfig, 2, rotational);
 configfs_simple_field!(DeviceConfig, 3, capacity_mib, u64);
@@ -433,3 +443,17 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
 
 configfs_simple_bool_field!(DeviceConfig, 13, bad_blocks_once);
 configfs_simple_bool_field!(DeviceConfig, 14, bad_blocks_partial_io);
+configfs_attribute!(DeviceConfig, 15,
+    show: |this, page| show_field(this.data.lock().cache_size_mib, page),
+    store: |this, page| store_with_power_check(this, page, |this, page| {
+        let text = core::str::from_utf8(page)?.trim();
+        let value = text.parse::<u64>().map_err(|_| EINVAL)?;
+        let mut guard = this.data.lock();
+        guard.disk_storage = Arc::pin_init(
+            DiskStorage::new(value, guard.block_size as usize),
+            GFP_KERNEL
+        )?;
+        guard.cache_size_mib = value;
+        Ok(())
+    })
+);
diff --git a/drivers/block/rnull/disk_storage.rs b/drivers/block/rnull/disk_storage.rs
new file mode 100644
index 0000000000000..8a8a90e1cf0bd
--- /dev/null
+++ b/drivers/block/rnull/disk_storage.rs
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use super::HwQueueContext;
+use core::pin::Pin;
+use kernel::{
+    block,
+    new_spinlock,
+    new_xarray,
+    page::PAGE_SIZE,
+    prelude::*,
+    sync::{
+        atomic::{ordering, Atomic},
+        SpinLock, SpinLockGuard,
+    },
+    uapi::PAGE_SECTORS,
+    xarray::{
+        self,
+        XArray,
+        XArraySheaf, //
+    }, //
+};
+pub(crate) use page::NullBlockPage;
+
+mod page;
+
+#[pin_data]
+pub(crate) struct DiskStorage {
+    // TODO: Get rid of this pointer indirection.
+    #[pin]
+    trees: SpinLock<Pin<KBox<TreeContainer>>>,
+    cache_size: u64,
+    cache_size_used: Atomic<u64>,
+    next_flush_sector: Atomic<u64>,
+    block_size: usize,
+}
+
+impl DiskStorage {
+    pub(crate) fn new(cache_size: u64, block_size: usize) -> impl PinInit<Self, Error> {
+        try_pin_init!( Self {
+            // TODO: Get rid of the box
+            // https://git.kernel.org/pub/scm/linux/kernel/git/boqun/linux.git/commit/?h=locking&id=a5d84cafb3e253a11d2e078902c5b090be2f4227
+            trees <- new_spinlock!(KBox::pin_init(TreeContainer::new(), GFP_KERNEL)?),
+            cache_size,
+            cache_size_used: Atomic::new(0),
+            next_flush_sector: Atomic::new(0),
+            block_size
+        })
+    }
+
+    pub(crate) fn access<'a, 'b, 'c>(
+        &'a self,
+        tree_guard: &'a mut SpinLockGuard<'b, Pin<KBox<TreeContainer>>>,
+        hw_data_guard: &'a mut SpinLockGuard<'b, HwQueueContext>,
+        sheaf: Option<XArraySheaf<'c>>,
+    ) -> DiskStorageAccess<'a, 'b, 'c> {
+        DiskStorageAccess::new(self, tree_guard, hw_data_guard, sheaf)
+    }
+
+    pub(crate) fn lock(&self) -> SpinLockGuard<'_, Pin<KBox<TreeContainer>>> {
+        self.trees.lock()
+    }
+}
+
+pub(crate) struct DiskStorageAccess<'a, 'b, 'c> {
+    cache_guard: xarray::Guard<'a, TreeNode>,
+    disk_guard: xarray::Guard<'a, TreeNode>,
+    hw_data_guard: &'a mut SpinLockGuard<'b, HwQueueContext>,
+    disk_storage: &'a DiskStorage,
+    pub(crate) sheaf: Option<XArraySheaf<'c>>,
+}
+
+impl<'a, 'b, 'c> DiskStorageAccess<'a, 'b, 'c> {
+    fn new(
+        disk_storage: &'a DiskStorage,
+        tree_guard: &'a mut SpinLockGuard<'b, Pin<KBox<TreeContainer>>>,
+        hw_data_guard: &'a mut SpinLockGuard<'b, HwQueueContext>,
+        sheaf: Option<XArraySheaf<'c>>,
+    ) -> Self {
+        Self {
+            cache_guard: tree_guard.cache_tree.lock(),
+            disk_guard: tree_guard.disk_tree.lock(),
+            hw_data_guard,
+            disk_storage,
+            sheaf,
+        }
+    }
+    fn to_index(sector: u64) -> usize {
+        (sector >> block::PAGE_SECTORS_SHIFT) as usize
+    }
+
+    fn to_sector(index: usize) -> u64 {
+        (index << block::PAGE_SECTORS_SHIFT) as u64
+    }
+
+    fn extract_cache_page(&mut self) -> Result<KBox<NullBlockPage>> {
+        let cache_entry = self
+            .cache_guard
+            .find_next_entry_circular(
+                self.disk_storage.next_flush_sector.load(ordering::Relaxed) as usize
+            )
+            .expect("Expected to find a page in the cache");
+
+        let index = cache_entry.index();
+
+        self.disk_storage
+            .next_flush_sector
+            .store(Self::to_sector(index).wrapping_add(1), ordering::Relaxed);
+
+        self.disk_storage.cache_size_used.store(
+            self.disk_storage.cache_size_used.load(ordering::Relaxed) - PAGE_SIZE as u64,
+            ordering::Relaxed,
+        );
+
+        let page = match self.disk_guard.entry(index) {
+            xarray::Entry::Vacant(disk_entry) => {
+                disk_entry
+                    .insert(cache_entry.remove(), self.sheaf.as_mut())
+                    .expect("Preload is set up to allow insert without failure");
+                self.hw_data_guard
+                    .page
+                    .take()
+                    .expect("Preload has allocated for us")
+            }
+            xarray::Entry::Occupied(mut disk_entry) => {
+                let mut page = if cache_entry.is_full() {
+                    disk_entry.insert(cache_entry.remove())
+                } else {
+                    let mut src = cache_entry;
+                    let mut offset = 0;
+                    for _ in 0..PAGE_SECTORS {
+                        src.page_mut().get_pin_mut().copy_to_page(
+                            disk_entry.page_mut().get_pin_mut(),
+                            offset,
+                            block::SECTOR_SIZE as usize,
+                        )?;
+                        offset += block::SECTOR_SIZE as usize;
+                    }
+                    src.remove()
+                };
+                page.reset();
+                page
+            }
+        };
+
+        Ok(page)
+    }
+
+    fn get_cache_page(&mut self, sector: u64) -> Result<&mut NullBlockPage> {
+        let index = Self::to_index(sector);
+
+        if self.cache_guard.contains_index(index) {
+            Ok(self.cache_guard.get_mut(index).expect("Index is present"))
+        } else {
+            let page = if self.disk_storage.cache_size_used.load(ordering::Relaxed)
+                < self.disk_storage.cache_size
+            {
+                self.hw_data_guard
+                    .page
+                    .take()
+                    .expect("Expected to have a page available")
+            } else {
+                self.extract_cache_page()?
+            };
+            Ok(self
+                .cache_guard
+                .insert_entry(index, page, self.sheaf.as_mut())
+                .expect("Should be able to insert")
+                .into_mut())
+        }
+    }
+
+    fn get_disk_page(&mut self, sector: u64) -> Result<&mut NullBlockPage> {
+        let index = Self::to_index(sector);
+
+        let page = match self.disk_guard.entry(index) {
+            xarray::Entry::Vacant(e) => e.insert(
+                self.hw_data_guard
+                    .page
+                    .take()
+                    .expect("Expected page to be available"),
+                self.sheaf.as_mut(),
+            )?,
+            xarray::Entry::Occupied(e) => e.into_mut(),
+        };
+
+        Ok(page)
+    }
+
+    pub(crate) fn get_write_page(&mut self, sector: u64) -> Result<&mut NullBlockPage> {
+        let page = if self.disk_storage.cache_size > 0 {
+            self.get_cache_page(sector)?
+        } else {
+            self.get_disk_page(sector)?
+        };
+
+        Ok(page)
+    }
+
+    pub(crate) fn get_read_page(&self, sector: u64) -> Option<&NullBlockPage> {
+        let index = Self::to_index(sector);
+        if self.disk_storage.cache_size > 0 {
+            self.cache_guard
+                .get(index)
+                .or_else(|| self.disk_guard.get(index))
+        } else {
+            self.disk_guard.get(index)
+        }
+    }
+
+    fn free_sector_tree(tree_access: &mut xarray::Guard<'_, TreeNode>, sector: u64) {
+        let index = Self::to_index(sector);
+        if let Some(page) = tree_access.get_mut(index) {
+            page.set_free(sector);
+
+            if page.is_empty() {
+                tree_access.remove(index);
+            }
+        }
+    }
+
+    pub(crate) fn free_sector(&mut self, sector: u64) {
+        if self.disk_storage.cache_size > 0 {
+            Self::free_sector_tree(&mut self.cache_guard, sector);
+        }
+
+        Self::free_sector_tree(&mut self.disk_guard, sector);
+    }
+}
+
+type Tree = XArray<TreeNode>;
+type TreeNode = KBox<NullBlockPage>;
+
+#[pin_data]
+pub(crate) struct TreeContainer {
+    #[pin]
+    disk_tree: Tree,
+    #[pin]
+    cache_tree: Tree,
+}
+
+impl TreeContainer {
+    fn new() -> impl PinInit<Self> {
+        pin_init!(TreeContainer {
+            disk_tree <- new_xarray!(xarray::AllocKind::Alloc),
+            cache_tree <- new_xarray!(xarray::AllocKind::Alloc),
+        })
+    }
+}
diff --git a/drivers/block/rnull/disk_storage/page.rs b/drivers/block/rnull/disk_storage/page.rs
new file mode 100644
index 0000000000000..c2e18502cbdda
--- /dev/null
+++ b/drivers/block/rnull/disk_storage/page.rs
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use kernel::{
+    block::{
+        SECTOR_MASK,
+        SECTOR_SHIFT, //
+    },
+    page::{
+        SafePage,
+        PAGE_SIZE, //
+    },
+    prelude::*,
+    types::Owned,
+    uapi::PAGE_SECTORS, //
+};
+
+const _CHEKC_STATUS_WIDTH: () = build_assert!((PAGE_SIZE >> SECTOR_SHIFT) <= 64);
+
+pub(crate) struct NullBlockPage {
+    page: Owned<SafePage>,
+    status: u64,
+    block_size: usize,
+}
+
+impl NullBlockPage {
+    pub(crate) fn new(block_size: usize) -> Result<KBox<Self>> {
+        Ok(KBox::new(
+            Self {
+                page: SafePage::alloc_page(GFP_NOIO | __GFP_ZERO)?,
+                status: 0,
+                block_size,
+            },
+            GFP_NOIO,
+        )?)
+    }
+
+    pub(crate) fn set_occupied(&mut self, sector: u64) {
+        let idx = sector & u64::from(SECTOR_MASK);
+        self.status |= 1 << idx;
+    }
+
+    pub(crate) fn set_free(&mut self, sector: u64) {
+        let idx = sector & u64::from(SECTOR_MASK);
+        self.status &= !(1 << idx);
+    }
+
+    pub(crate) fn is_empty(&self) -> bool {
+        self.status == 0
+    }
+
+    pub(crate) fn reset(&mut self) {
+        self.status = 0;
+    }
+
+    pub(crate) fn is_full(&self) -> bool {
+        let blocks_per_page = PAGE_SIZE >> self.block_size.trailing_zeros();
+        let shift = PAGE_SECTORS as usize / blocks_per_page;
+
+        for i in 0..blocks_per_page {
+            if self.status & (1 << (i * shift)) == 0 {
+                return false;
+            }
+        }
+
+        true
+    }
+
+    pub(crate) fn page_mut(&mut self) -> &mut Owned<SafePage> {
+        &mut self.page
+    }
+
+    pub(crate) fn page(&self) -> &Owned<SafePage> {
+        &self.page
+    }
+}
diff --git a/drivers/block/rnull/rnull.rs b/drivers/block/rnull/rnull.rs
index 4e226186d2f36..cca497aef40df 100644
--- a/drivers/block/rnull/rnull.rs
+++ b/drivers/block/rnull/rnull.rs
@@ -3,13 +3,22 @@
 //! This is a Rust implementation of the C null block driver.
 
 mod configfs;
+mod disk_storage;
 
 use configfs::IRQMode;
+use disk_storage::{
+    DiskStorage,
+    NullBlockPage,
+    TreeContainer, //
+};
 use kernel::{
     bindings,
     block::{
         self,
-        badblocks::{self, BadBlocks},
+        badblocks::{
+            self,
+            BadBlocks, //
+        },
         bio::Segment,
         mq::{
             self,
@@ -20,7 +29,7 @@
             Operations,
             TagSet, //
         },
-        SECTOR_MASK, SECTOR_SHIFT,
+        SECTOR_SHIFT,
     },
     error::{
         code,
@@ -28,11 +37,7 @@
     },
     ffi,
     new_mutex,
-    new_xarray,
-    page::{
-        SafePage,
-        PAGE_SIZE, //
-    },
+    new_spinlock,
     pr_info,
     prelude::*,
     str::CString,
@@ -41,9 +46,11 @@
         atomic::{
             ordering,
             Atomic, //
-        },
+        }, //
         Arc,
-        Mutex, //
+        Mutex,
+        SpinLock,
+        SpinLockGuard,
     },
     time::{
         hrtimer::{
@@ -58,7 +65,7 @@
         OwnableRefCounted,
         Owned, //
     },
-    xarray::XArray, //
+    xarray::XArraySheaf, //
 };
 use pin_init::PinInit;
 
@@ -148,9 +155,11 @@ fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> {
                 } else {
                     *module_parameters::submit_queues.value()
                 };
+
+                let block_size = *module_parameters::bs.value();
                 let disk = NullBlkDevice::new(NullBlkOptions {
                     name: &name,
-                    block_size: *module_parameters::bs.value(),
+                    block_size,
                     rotational: *module_parameters::rotational.value() != 0,
                     capacity_mib: *module_parameters::gb.value() * 1024,
                     irq_mode: (*module_parameters::irqmode.value()).try_into()?,
@@ -163,6 +172,7 @@ fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> {
                     bad_blocks: Arc::pin_init(BadBlocks::new(false), GFP_KERNEL)?,
                     bad_blocks_once: false,
                     bad_blocks_partial_io: false,
+                    storage: Arc::pin_init(DiskStorage::new(0, block_size as usize), GFP_KERNEL)?,
                 })?;
                 disks.push(disk, GFP_KERNEL)?;
             }
@@ -192,8 +202,20 @@ struct NullBlkOptions<'a> {
     bad_blocks: Arc<BadBlocks>,
     bad_blocks_once: bool,
     bad_blocks_partial_io: bool,
+    storage: Arc<DiskStorage>,
+}
+
+#[pin_data]
+struct NullBlkDevice {
+    storage: Arc<DiskStorage>,
+    irq_mode: IRQMode,
+    completion_time: Delta,
+    memory_backed: bool,
+    block_size: usize,
+    bad_blocks: Arc<BadBlocks>,
+    bad_blocks_once: bool,
+    bad_blocks_partial_io: bool,
 }
-struct NullBlkDevice;
 
 impl NullBlkDevice {
     fn new(options: NullBlkOptions<'_>) -> Result<GenDisk<Self>> {
@@ -212,10 +234,14 @@ fn new(options: NullBlkOptions<'_>) -> Result<GenDisk<Self>> {
             bad_blocks,
             bad_blocks_once,
             bad_blocks_partial_io,
+            storage,
         } = options;
 
         let mut flags = mq::tag_set::Flags::default();
 
+        // TODO: lim.features |= BLK_FEAT_WRITE_CACHE;
+        // if (dev->fua)
+        // 	lim.features |= BLK_FEAT_FUA;
         if memory_backed {
             flags |= mq::tag_set::Flag::Blocking;
         }
@@ -233,13 +259,13 @@ fn new(options: NullBlkOptions<'_>) -> Result<GenDisk<Self>> {
             GFP_KERNEL,
         )?;
 
-        let queue_data = Box::pin_init(
-            pin_init!(QueueData {
-                tree <- new_xarray!(kernel::xarray::AllocKind::Alloc),
+        let queue_data = Box::try_pin_init(
+            try_pin_init!(Self {
+                storage,
                 irq_mode,
                 completion_time,
                 memory_backed,
-                block_size: block_size.into(),
+                block_size: block_size as usize,
                 bad_blocks,
                 bad_blocks_once,
                 bad_blocks_partial_io,
@@ -262,68 +288,133 @@ fn new(options: NullBlkOptions<'_>) -> Result<GenDisk<Self>> {
         builder.build(fmt!("{}", name.to_str()?), tagset, queue_data)
     }
 
+    fn sheaf_size() -> usize {
+        2 * ((usize::BITS as usize / bindings::XA_CHUNK_SHIFT)
+            + if (usize::BITS as usize % bindings::XA_CHUNK_SHIFT) == 0 {
+                0
+            } else {
+                1
+            })
+    }
+
+    fn preload<'b, 'c>(
+        tree_guard: &'b mut SpinLockGuard<'c, Pin<KBox<TreeContainer>>>,
+        hw_data_guard: &'b mut SpinLockGuard<'c, HwQueueContext>,
+        block_size: usize,
+    ) -> Result {
+        if hw_data_guard.page.is_none() {
+            hw_data_guard.page =
+                Some(tree_guard.do_unlocked(|| {
+                    hw_data_guard.do_unlocked(|| NullBlockPage::new(block_size))
+                })?);
+        }
+
+        Ok(())
+    }
+
     #[inline(always)]
-    fn write(tree: &Tree, mut sector: u64, mut segment: Segment<'_>) -> Result {
+    fn write<'a, 'b, 'c>(
+        &'a self,
+        tree_guard: &'b mut SpinLockGuard<'c, Pin<KBox<TreeContainer>>>,
+        hw_data_guard: &'b mut SpinLockGuard<'c, HwQueueContext>,
+        mut sector: u64,
+        mut segment: Segment<'_>,
+    ) -> Result {
+        let mut sheaf: Option<XArraySheaf<'_>> = None;
+
         while !segment.is_empty() {
-            let page = NullBlockPage::new()?;
-            let mut tree = tree.lock();
+            Self::preload(tree_guard, hw_data_guard, self.block_size)?;
 
-            let page_idx = sector >> block::PAGE_SECTORS_SHIFT;
+            match &mut sheaf {
+                Some(sheaf) => {
+                    tree_guard.do_unlocked(|| {
+                        hw_data_guard.do_unlocked(|| sheaf.refill(GFP_KERNEL, Self::sheaf_size()))
+                    })?;
+                }
+                None => {
+                    let _ = sheaf.insert(
+                        kernel::xarray::xarray_kmem_cache()
+                            .sheaf(Self::sheaf_size(), GFP_NOWAIT)
+                            .or(tree_guard.do_unlocked(|| {
+                                hw_data_guard.do_unlocked(|| -> Result<_> {
+                                    kernel::xarray::xarray_kmem_cache()
+                                        .sheaf(Self::sheaf_size(), GFP_KERNEL)
+                                })
+                            }))?,
+                    );
+                }
+            }
 
-            let page = if let Some(page) = tree.get_mut(page_idx as usize) {
-                page
-            } else {
-                tree.store(page_idx as usize, page, GFP_NOIO)?;
-                tree.get_mut(page_idx as usize).unwrap()
-            };
+            let mut access = self.storage.access(tree_guard, hw_data_guard, sheaf);
 
+            let page = access.get_write_page(sector)?;
             page.set_occupied(sector);
             let page_offset = (sector & u64::from(block::SECTOR_MASK)) << block::SECTOR_SHIFT;
-            sector += segment.copy_to_page(page.page.get_pin_mut(), page_offset as usize) as u64
+
+            sector += segment.copy_to_page(page.page_mut().get_pin_mut(), page_offset as usize)
+                as u64
                 >> block::SECTOR_SHIFT;
+
+            sheaf = access.sheaf;
+        }
+
+        if let Some(sheaf) = sheaf {
+            tree_guard.do_unlocked(|| {
+                hw_data_guard.do_unlocked(|| {
+                    sheaf.return_refill(GFP_KERNEL);
+                })
+            });
         }
+
         Ok(())
     }
 
     #[inline(always)]
-    fn read(tree: &Tree, mut sector: u64, mut segment: Segment<'_>) -> Result {
-        let tree = tree.lock();
+    fn read<'a, 'b, 'c>(
+        &'a self,
+        tree_guard: &'b mut SpinLockGuard<'c, Pin<KBox<TreeContainer>>>,
+        hw_data_guard: &'b mut SpinLockGuard<'c, HwQueueContext>,
+        mut sector: u64,
+        mut segment: Segment<'_>,
+    ) -> Result {
+        let access = self.storage.access(tree_guard, hw_data_guard, None);
 
         while !segment.is_empty() {
-            let idx = sector >> block::PAGE_SECTORS_SHIFT;
-
-            if let Some(page) = tree.get(idx as usize) {
-                let page_offset = (sector & u64::from(block::SECTOR_MASK)) << block::SECTOR_SHIFT;
-                sector += segment.copy_from_page(&page.page, page_offset as usize) as u64
-                    >> block::SECTOR_SHIFT;
-            } else {
-                sector += segment.zero_page() as u64 >> block::SECTOR_SHIFT;
+            let page = access.get_read_page(sector);
+
+            match page {
+                Some(page) => {
+                    let page_offset =
+                        (sector & u64::from(block::SECTOR_MASK)) << block::SECTOR_SHIFT;
+                    sector += segment.copy_from_page(page.page(), page_offset as usize) as u64
+                        >> block::SECTOR_SHIFT;
+                }
+                None => sector += segment.zero_page() as u64 >> block::SECTOR_SHIFT,
             }
         }
 
         Ok(())
     }
 
-    fn discard(tree: &Tree, mut sector: u64, sectors: u64, block_size: u64) -> Result {
-        let mut remaining_bytes = sectors << SECTOR_SHIFT;
-        let mut tree = tree.lock();
+    fn discard(
+        &self,
+        hw_data: &Pin<&SpinLock<HwQueueContext>>,
+        mut sector: u64,
+        sectors: u32,
+    ) -> Result {
+        let mut tree_guard = self.storage.lock();
+        let mut hw_data_guard = hw_data.lock();
 
-        while remaining_bytes > 0 {
-            let page_idx = sector >> block::PAGE_SECTORS_SHIFT;
-            let mut remove = false;
-            if let Some(page) = tree.get_mut(page_idx as usize) {
-                page.set_free(sector);
-                if page.is_empty() {
-                    remove = true;
-                }
-            }
+        let mut access = self
+            .storage
+            .access(&mut tree_guard, &mut hw_data_guard, None);
 
-            if remove {
-                drop(tree.remove(page_idx as usize))
-            }
+        let mut remaining_bytes = (sectors as usize) << SECTOR_SHIFT;
 
-            let processed = remaining_bytes.min(block_size);
-            sector += processed >> SECTOR_SHIFT;
+        while remaining_bytes > 0 {
+            access.free_sector(sector);
+            let processed = remaining_bytes.min(self.block_size);
+            sector += (processed >> SECTOR_SHIFT) as u64;
             remaining_bytes -= processed;
         }
 
@@ -331,21 +422,34 @@ fn discard(tree: &Tree, mut sector: u64, sectors: u64, block_size: u64) -> Resul
     }
 
     #[inline(never)]
-    fn transfer(rq: &mut Owned<mq::Request<Self>>, tree: &Tree, sectors: u32) -> Result {
+    fn transfer(
+        &self,
+        hw_data: &Pin<&SpinLock<HwQueueContext>>,
+        rq: &mut Owned<mq::Request<Self>>,
+        sectors: u32,
+    ) -> Result {
         let mut sector = rq.sector();
         let end_sector = sector + <u32 as Into<u64>>::into(sectors);
         let command = rq.command();
 
+        // TODO: Use `PerCpu` to get rid of this lock
+        let mut hw_data_guard = hw_data.lock();
+        let mut tree_guard = self.storage.lock();
+
         for bio in rq.bio_iter_mut() {
             let segment_iter = bio.segment_iter();
             for segment in segment_iter {
                 // Length might be limited by bad blocks.
                 let length = segment
                     .len()
-                    .min((sector - end_sector) as u32 >> SECTOR_SHIFT);
+                    .min((end_sector - sector) as u32 >> SECTOR_SHIFT);
                 match command {
-                    bindings::req_op_REQ_OP_WRITE => Self::write(tree, sector, segment)?,
-                    bindings::req_op_REQ_OP_READ => Self::read(tree, sector, segment)?,
+                    bindings::req_op_REQ_OP_WRITE => {
+                        self.write(&mut tree_guard, &mut hw_data_guard, sector, segment)?
+                    }
+                    bindings::req_op_REQ_OP_READ => {
+                        self.read(&mut tree_guard, &mut hw_data_guard, sector, segment)?
+                    }
                     _ => (),
                 }
                 sector += u64::from(length) >> SECTOR_SHIFT;
@@ -355,29 +459,26 @@ fn transfer(rq: &mut Owned<mq::Request<Self>>, tree: &Tree, sectors: u32) -> Res
                 }
             }
         }
+
         Ok(())
     }
 
-    fn handle_bad_blocks(
-        rq: &mut Owned<mq::Request<Self>>,
-        queue_data: &QueueData,
-        sectors: &mut u32,
-    ) -> Result {
-        if queue_data.bad_blocks.enabled() {
+    fn handle_bad_blocks(&self, rq: &mut Owned<mq::Request<Self>>, sectors: &mut u32) -> Result {
+        if self.bad_blocks.enabled() {
             let start = rq.sector();
             let end = start + u64::from(*sectors);
-            match queue_data.bad_blocks.check(start..end) {
+            match self.bad_blocks.check(start..end) {
                 badblocks::BlockStatus::None => {}
                 badblocks::BlockStatus::Acknowledged(mut range)
                 | badblocks::BlockStatus::Unacknowledged(mut range) => {
                     rq.data_ref().error.store(1, ordering::Relaxed);
 
-                    if queue_data.bad_blocks_once {
-                        queue_data.bad_blocks.set_good(range.clone())?;
+                    if self.bad_blocks_once {
+                        self.bad_blocks.set_good(range.clone())?;
                     }
 
-                    if queue_data.bad_blocks_partial_io {
-                        let block_size_sectors = queue_data.block_size >> SECTOR_SHIFT;
+                    if self.bad_blocks_partial_io {
+                        let block_size_sectors = (self.block_size >> SECTOR_SHIFT) as u64;
                         range.start = align_down(range.start, block_size_sectors);
                         if start < range.start {
                             *sectors = (range.start - start) as u32;
@@ -402,53 +503,8 @@ fn end_request(rq: Owned<mq::Request<Self>>) {
     }
 }
 
-const _CHEKC_STATUS_WIDTH: () = build_assert!((PAGE_SIZE >> SECTOR_SHIFT) <= 64);
-
-struct NullBlockPage {
-    page: Owned<SafePage>,
-    status: u64,
-}
-
-impl NullBlockPage {
-    fn new() -> Result<KBox<Self>> {
-        Ok(KBox::new(
-            Self {
-                page: SafePage::alloc_page(GFP_NOIO | __GFP_ZERO)?,
-                status: 0,
-            },
-            GFP_NOIO,
-        )?)
-    }
-
-    fn set_occupied(&mut self, sector: u64) {
-        let idx = sector & u64::from(SECTOR_MASK);
-        self.status |= 1 << idx;
-    }
-
-    fn set_free(&mut self, sector: u64) {
-        let idx = sector & u64::from(SECTOR_MASK);
-        self.status &= !(1 << idx);
-    }
-
-    fn is_empty(&self) -> bool {
-        self.status == 0
-    }
-}
-
-type TreeNode = KBox<NullBlockPage>;
-type Tree = XArray<TreeNode>;
-
-#[pin_data]
-struct QueueData {
-    #[pin]
-    tree: Tree,
-    irq_mode: IRQMode,
-    completion_time: Delta,
-    memory_backed: bool,
-    block_size: u64,
-    bad_blocks: Arc<BadBlocks>,
-    bad_blocks_once: bool,
-    bad_blocks_partial_io: bool,
+struct HwQueueContext {
+    page: Option<KBox<disk_storage::NullBlockPage>>,
 }
 
 #[pin_data]
@@ -503,10 +559,10 @@ fn align_down<T>(value: T, to: T) -> T
 
 #[vtable]
 impl Operations for NullBlkDevice {
-    type QueueData = Pin<KBox<QueueData>>;
+    type QueueData = Pin<KBox<Self>>;
     type RequestData = Pdu;
     type TagSetData = ();
-    type HwData = ();
+    type HwData = Pin<KBox<SpinLock<HwQueueContext>>>;
 
     fn new_request_data() -> impl PinInit<Self::RequestData> {
         pin_init!(Pdu {
@@ -517,41 +573,39 @@ fn new_request_data() -> impl PinInit<Self::RequestData> {
 
     #[inline(always)]
     fn queue_rq(
-        _hw_data: (),
-        queue_data: Pin<&QueueData>,
+        hw_data: Pin<&SpinLock<HwQueueContext>>,
+        this: Pin<&Self>,
         mut rq: Owned<mq::Request<Self>>,
         _is_last: bool,
     ) -> Result {
         let mut sectors = rq.sectors();
 
-        Self::handle_bad_blocks(&mut rq, queue_data.get_ref(), &mut sectors)?;
-
-        if queue_data.memory_backed {
-            let tree = &queue_data.tree;
+        Self::handle_bad_blocks(this.get_ref(), &mut rq, &mut sectors)?;
 
+        if this.memory_backed {
             if rq.command() == bindings::req_op_REQ_OP_DISCARD {
-                Self::discard(tree, rq.sector(), sectors.into(), queue_data.block_size)?;
+                this.discard(&hw_data, rq.sector(), sectors)?;
             } else {
-                Self::transfer(&mut rq, tree, sectors)?;
+                this.transfer(&hw_data, &mut rq, sectors)?;
             }
         }
 
-        match queue_data.irq_mode {
+        match this.irq_mode {
             IRQMode::None => Self::end_request(rq),
             IRQMode::Soft => mq::Request::complete(rq.into()),
             IRQMode::Timer => {
                 OwnableRefCounted::into_shared(rq)
-                    .start(queue_data.completion_time)
+                    .start(this.completion_time)
                     .dismiss();
             }
         }
         Ok(())
     }
 
-    fn commit_rqs(_hw_data: (), _queue_data: Pin<&QueueData>) {}
+    fn commit_rqs(_hw_data: Pin<&SpinLock<HwQueueContext>>, _queue_data: Pin<&Self>) {}
 
-    fn init_hctx(_tagset_data: (), _hctx_idx: u32) -> Result {
-        Ok(())
+    fn init_hctx(_tagset_data: (), _hctx_idx: u32) -> Result<Self::HwData> {
+        KBox::pin_init(new_spinlock!(HwQueueContext { page: None }), GFP_KERNEL)
     }
 
     fn complete(rq: ARef<mq::Request<Self>>) {

-- 
2.51.2




  parent reply	other threads:[~2026-02-16  0:58 UTC|newest]

Thread overview: 82+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-02-15 23:34 [PATCH 00/79] block: rnull: complete the rust null block driver Andreas Hindborg
2026-02-15 23:34 ` [PATCH 01/79] block: rnull: adopt new formatting guidelines Andreas Hindborg
2026-02-15 23:34 ` [PATCH 02/79] block: rnull: add module parameters Andreas Hindborg
2026-02-15 23:34 ` [PATCH 03/79] block: rnull: add macros to define configfs attributes Andreas Hindborg
2026-02-15 23:34 ` [PATCH 04/79] block: rust: fix generation of bindings to `BLK_STS_.*` Andreas Hindborg
2026-02-15 23:34 ` [PATCH 05/79] block: rust: change `queue_rq` request type to `Owned` Andreas Hindborg
2026-02-15 23:34 ` [PATCH 06/79] block: rust: add `Request` private data support Andreas Hindborg
2026-02-15 23:34 ` [PATCH 07/79] block: rust: allow `hrtimer::Timer` in `RequestData` Andreas Hindborg
2026-02-15 23:34 ` [PATCH 08/79] block: rnull: add timer completion mode Andreas Hindborg
2026-02-15 23:34 ` [PATCH 09/79] block: rust: introduce `kernel::block::bio` module Andreas Hindborg
2026-02-15 23:34 ` [PATCH 10/79] block: rust: add `command` getter to `Request` Andreas Hindborg
2026-02-15 23:34 ` [PATCH 11/79] block: rust: mq: use GFP_KERNEL from prelude Andreas Hindborg
2026-02-15 23:34 ` [PATCH 12/79] block: rust: add `TagSet` flags Andreas Hindborg
2026-02-15 23:35 ` [PATCH 13/79] block: rnull: add memory backing Andreas Hindborg
2026-02-15 23:35 ` [PATCH 14/79] block: rnull: add submit queue count config option Andreas Hindborg
2026-02-15 23:35 ` [PATCH 15/79] block: rnull: add `use_per_node_hctx` " Andreas Hindborg
2026-02-15 23:35 ` [PATCH 16/79] block: rust: allow specifying home node when constructing `TagSet` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 17/79] block: rnull: allow specifying the home numa node Andreas Hindborg
2026-02-15 23:35 ` [PATCH 18/79] block: rust: add Request::sectors() method Andreas Hindborg
2026-02-15 23:35 ` [PATCH 19/79] block: rust: mq: add max_hw_discard_sectors support to GenDiskBuilder Andreas Hindborg
2026-02-15 23:35 ` [PATCH 20/79] block: rnull: add discard support Andreas Hindborg
2026-02-15 23:35 ` [PATCH 21/79] block: rust: add `NoDefaultScheduler` flag for `TagSet` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 22/79] block: rnull: add no_sched module parameter and configfs attribute Andreas Hindborg
2026-02-15 23:35 ` [PATCH 23/79] block: rust: change sector type from usize to u64 Andreas Hindborg
2026-02-15 23:35 ` [PATCH 24/79] block: rust: add `BadBlocks` for bad block tracking Andreas Hindborg
2026-02-15 23:35 ` [PATCH 25/79] block: rust: mq: add Request::end() method for custom status codes Andreas Hindborg
2026-02-15 23:35 ` [PATCH 26/79] block: rnull: add badblocks support Andreas Hindborg
2026-02-15 23:35 ` [PATCH 27/79] block: rnull: add badblocks_once support Andreas Hindborg
2026-02-15 23:35 ` [PATCH 28/79] block: rnull: add partial I/O support for bad blocks Andreas Hindborg
2026-02-15 23:35 ` [PATCH 29/79] block: rust: add `TagSet` private data support Andreas Hindborg
2026-02-15 23:35 ` [PATCH 30/79] block: rust: add `hctx` " Andreas Hindborg
2026-02-15 23:35 ` Andreas Hindborg [this message]
2026-02-15 23:35 ` [PATCH 32/79] block: rust: implement `Sync` for `GenDisk` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 33/79] block: rust: add a back reference feature to `GenDisk` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 34/79] block: rust: introduce an idle type state for `Request` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 35/79] block: rust: add a request queue abstraction Andreas Hindborg
2026-02-15 23:35 ` [PATCH 36/79] block: rust: add a method to get the request queue for a request Andreas Hindborg
2026-02-15 23:35 ` [PATCH 37/79] block: rust: introduce `kernel::block::error` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 38/79] block: rust: require `queue_rq` to return a `BlkResult` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 39/79] block: rust: add `GenDisk::queue_data` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 40/79] block: rnull: add bandwidth limiting Andreas Hindborg
2026-02-15 23:35 ` [PATCH 41/79] block: rnull: add blocking queue mode Andreas Hindborg
2026-02-15 23:35 ` [PATCH 42/79] block: rnull: add shared tags Andreas Hindborg
2026-02-15 23:35 ` [PATCH 43/79] block: rnull: add queue depth config option Andreas Hindborg
2026-02-15 23:35 ` [PATCH 44/79] block: rust: add an abstraction for `bindings::req_op` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 45/79] block: rust: add a method to set the target sector of a request Andreas Hindborg
2026-02-15 23:35 ` [PATCH 46/79] block: rust: move gendisk vtable construction to separate function Andreas Hindborg
2026-02-15 23:35 ` [PATCH 47/79] block: rust: add zoned block device support Andreas Hindborg
2026-02-15 23:35 ` [PATCH 48/79] block: rnull: add zoned storage support Andreas Hindborg
2026-02-15 23:35 ` [PATCH 49/79] block: rust: add `map_queues` support Andreas Hindborg
2026-02-15 23:35 ` [PATCH 50/79] block: rust: add an abstraction for `struct blk_mq_queue_map` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 51/79] block: rust: add polled completion support Andreas Hindborg
2026-02-15 23:35 ` [PATCH 52/79] block: rust: add accessors to `TagSet` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 53/79] block: rnull: add polled completion support Andreas Hindborg
2026-02-15 23:35 ` [PATCH 54/79] block: rnull: add REQ_OP_FLUSH support Andreas Hindborg
2026-02-15 23:35 ` [PATCH 55/79] block: rust: add request flags abstraction Andreas Hindborg
2026-02-15 23:35 ` [PATCH 56/79] block: rust: add abstraction for block queue feature flags Andreas Hindborg
2026-02-15 23:35 ` [PATCH 57/79] block: rust: allow setting write cache and FUA flags for `GenDisk` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 58/79] block: rust: add `Segment::copy_to_page_limit` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 59/79] block: rnull: add fua support Andreas Hindborg
2026-02-15 23:35 ` [PATCH 60/79] block: fix arg type in `blk_mq_update_nr_hw_queues` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 61/79] block: rust: add `GenDisk::tag_set` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 62/79] block: rust: add `TagSet::update_hw_queue_count` Andreas Hindborg
2026-02-16 23:59   ` Ken Kurematsu
2026-02-17  9:54     ` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 63/79] block: rnull: add an option to change the number of hardware queues Andreas Hindborg
2026-02-15 23:35 ` [PATCH 64/79] block: rust: add an abstraction for `struct rq_list` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 65/79] block: rust: add `queue_rqs` vtable hook Andreas Hindborg
2026-02-15 23:35 ` [PATCH 66/79] block: rnull: support queue_rqs Andreas Hindborg
2026-02-15 23:35 ` [PATCH 67/79] block: rust: remove the `is_poll` parameter from `queue_rq` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 68/79] block: rust: add a debug assert for refcounts Andreas Hindborg
2026-02-15 23:35 ` [PATCH 69/79] block: rust: add `TagSet::tag_to_rq` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 70/79] block: rust: add `Request::queue_index` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 71/79] block: rust: add `Request::requeue` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 72/79] block: rust: add `request_timeout` hook Andreas Hindborg
2026-02-15 23:36 ` [PATCH 73/79] block: rnull: add fault injection support Andreas Hindborg
2026-02-15 23:36 ` [PATCH 74/79] block: rust: add max_sectors option to `GenDiskBuilder` Andreas Hindborg
2026-02-15 23:36 ` [PATCH 75/79] block: rnull: allow configuration of the maximum IO size Andreas Hindborg
2026-02-15 23:36 ` [PATCH 76/79] block: rust: add `virt_boundary_mask` option to `GenDiskBuilder` Andreas Hindborg
2026-02-15 23:36 ` [PATCH 77/79] block: rnull: add `virt_boundary` option Andreas Hindborg
2026-02-15 23:36 ` [PATCH 78/79] block: rnull: add `shared_tag_bitmap` config option Andreas Hindborg
2026-02-15 23:36 ` [PATCH 79/79] block: rnull: add zone offline and readonly configfs files Andreas Hindborg

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260216-rnull-v6-19-rc5-send-v1-31-de9a7af4b469@kernel.org \
    --to=a.hindborg@kernel.org \
    --cc=Liam.Howlett@oracle.com \
    --cc=aliceryhl@google.com \
    --cc=anna-maria@linutronix.de \
    --cc=axboe@kernel.dk \
    --cc=bjorn3_gh@protonmail.com \
    --cc=boqun.feng@gmail.com \
    --cc=dakr@kernel.org \
    --cc=frederic@kernel.org \
    --cc=fujita.tomonori@gmail.com \
    --cc=gary@garyguo.net \
    --cc=jstultz@google.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lorenzo.stoakes@oracle.com \
    --cc=lossin@kernel.org \
    --cc=lyude@redhat.com \
    --cc=ojeda@kernel.org \
    --cc=rust-for-linux@vger.kernel.org \
    --cc=sboyd@kernel.org \
    --cc=tglx@kernel.org \
    --cc=tmgross@umich.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox