From: Andreas Hindborg <a.hindborg@kernel.org>
To: "Boqun Feng" <boqun.feng@gmail.com>,
"Jens Axboe" <axboe@kernel.dk>, "Miguel Ojeda" <ojeda@kernel.org>,
"Gary Guo" <gary@garyguo.net>,
"Björn Roy Baron" <bjorn3_gh@protonmail.com>,
"Benno Lossin" <lossin@kernel.org>,
"Alice Ryhl" <aliceryhl@google.com>,
"Trevor Gross" <tmgross@umich.edu>,
"Danilo Krummrich" <dakr@kernel.org>,
"FUJITA Tomonori" <fujita.tomonori@gmail.com>,
"Frederic Weisbecker" <frederic@kernel.org>,
"Lyude Paul" <lyude@redhat.com>,
"Thomas Gleixner" <tglx@kernel.org>,
"Anna-Maria Behnsen" <anna-maria@linutronix.de>,
"John Stultz" <jstultz@google.com>,
"Stephen Boyd" <sboyd@kernel.org>,
"Lorenzo Stoakes" <lorenzo.stoakes@oracle.com>,
"Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: linux-block@vger.kernel.org, rust-for-linux@vger.kernel.org,
linux-kernel@vger.kernel.org, linux-mm@kvack.org,
Andreas Hindborg <a.hindborg@kernel.org>
Subject: [PATCH 31/79] block: rnull: add volatile cache emulation
Date: Mon, 16 Feb 2026 00:35:18 +0100 [thread overview]
Message-ID: <20260216-rnull-v6-19-rc5-send-v1-31-de9a7af4b469@kernel.org> (raw)
In-Reply-To: <20260216-rnull-v6-19-rc5-send-v1-0-de9a7af4b469@kernel.org>
Add volatile cache emulation to rnull. When enabled via the
`cache_size_mib` configfs attribute, writes are first stored in a volatile
cache before being written back to the simulated non-volatile storage.
Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
---
drivers/block/rnull/configfs.rs | 36 +++-
drivers/block/rnull/disk_storage.rs | 248 ++++++++++++++++++++++++
drivers/block/rnull/disk_storage/page.rs | 75 ++++++++
drivers/block/rnull/rnull.rs | 316 ++++++++++++++++++-------------
4 files changed, 538 insertions(+), 137 deletions(-)
diff --git a/drivers/block/rnull/configfs.rs b/drivers/block/rnull/configfs.rs
index c08a3cbd66f18..d679f12ee6749 100644
--- a/drivers/block/rnull/configfs.rs
+++ b/drivers/block/rnull/configfs.rs
@@ -1,9 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
use super::{
+ DiskStorage,
NullBlkDevice,
THIS_MODULE, //
};
+use core::fmt::Write;
use kernel::{
bindings,
block::{
@@ -19,10 +21,7 @@
AttributeOperations, //
},
configfs_attrs,
- fmt::{
- self,
- Write as _, //
- },
+ fmt,
new_mutex,
page::PAGE_SIZE,
prelude::*,
@@ -105,17 +104,19 @@ fn make_group(
badblocks: 12,
badblocks_once: 13,
badblocks_partial_io: 14,
+ cache_size_mib: 15,
],
};
+ let block_size = 4096;
Ok(configfs::Group::new(
name.try_into()?,
item_type,
// TODO: cannot coerce new_mutex!() to impl PinInit<_, Error>, so put mutex inside
- try_pin_init!( DeviceConfig {
+ try_pin_init!(DeviceConfig {
data <- new_mutex!(DeviceConfigInner {
powered: false,
- block_size: 4096,
+ block_size,
rotational: false,
disk: None,
capacity_mib: 4096,
@@ -130,6 +131,11 @@ fn make_group(
bad_blocks: Arc::pin_init(BadBlocks::new(false), GFP_KERNEL)?,
bad_blocks_once: false,
bad_blocks_partial_io: false,
+ disk_storage: Arc::pin_init(
+ DiskStorage::new(0, block_size as usize),
+ GFP_KERNEL
+ )?,
+ cache_size_mib: 0,
}),
}),
core::iter::empty(),
@@ -192,6 +198,8 @@ struct DeviceConfigInner {
bad_blocks: Arc<BadBlocks>,
bad_blocks_once: bool,
bad_blocks_partial_io: bool,
+ cache_size_mib: u64,
+ disk_storage: Arc<DiskStorage>,
}
#[vtable]
@@ -230,6 +238,7 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
bad_blocks: guard.bad_blocks.clone(),
bad_blocks_once: guard.bad_blocks_once,
bad_blocks_partial_io: guard.bad_blocks_partial_io,
+ storage: guard.disk_storage.clone(),
})?);
guard.powered = true;
} else if guard.powered && !power_op {
@@ -241,6 +250,7 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
}
}
+// DiskStorage::new(cache_size_mib << 20, block_size as usize),
configfs_simple_field!(DeviceConfig, 1, block_size, u32, check GenDiskBuilder::validate_block_size);
configfs_simple_bool_field!(DeviceConfig, 2, rotational);
configfs_simple_field!(DeviceConfig, 3, capacity_mib, u64);
@@ -433,3 +443,17 @@ fn store(this: &DeviceConfig, page: &[u8]) -> Result {
configfs_simple_bool_field!(DeviceConfig, 13, bad_blocks_once);
configfs_simple_bool_field!(DeviceConfig, 14, bad_blocks_partial_io);
+configfs_attribute!(DeviceConfig, 15,
+ show: |this, page| show_field(this.data.lock().cache_size_mib, page),
+ store: |this, page| store_with_power_check(this, page, |this, page| {
+ let text = core::str::from_utf8(page)?.trim();
+ let value = text.parse::<u64>().map_err(|_| EINVAL)?;
+ let mut guard = this.data.lock();
+ guard.disk_storage = Arc::pin_init(
+ DiskStorage::new(value, guard.block_size as usize),
+ GFP_KERNEL
+ )?;
+ guard.cache_size_mib = value;
+ Ok(())
+ })
+);
diff --git a/drivers/block/rnull/disk_storage.rs b/drivers/block/rnull/disk_storage.rs
new file mode 100644
index 0000000000000..8a8a90e1cf0bd
--- /dev/null
+++ b/drivers/block/rnull/disk_storage.rs
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use super::HwQueueContext;
+use core::pin::Pin;
+use kernel::{
+ block,
+ new_spinlock,
+ new_xarray,
+ page::PAGE_SIZE,
+ prelude::*,
+ sync::{
+ atomic::{ordering, Atomic},
+ SpinLock, SpinLockGuard,
+ },
+ uapi::PAGE_SECTORS,
+ xarray::{
+ self,
+ XArray,
+ XArraySheaf, //
+ }, //
+};
+pub(crate) use page::NullBlockPage;
+
+mod page;
+
+#[pin_data]
+pub(crate) struct DiskStorage {
+ // TODO: Get rid of this pointer indirection.
+ #[pin]
+ trees: SpinLock<Pin<KBox<TreeContainer>>>,
+ cache_size: u64,
+ cache_size_used: Atomic<u64>,
+ next_flush_sector: Atomic<u64>,
+ block_size: usize,
+}
+
+impl DiskStorage {
+ pub(crate) fn new(cache_size: u64, block_size: usize) -> impl PinInit<Self, Error> {
+ try_pin_init!( Self {
+ // TODO: Get rid of the box
+ // https://git.kernel.org/pub/scm/linux/kernel/git/boqun/linux.git/commit/?h=locking&id=a5d84cafb3e253a11d2e078902c5b090be2f4227
+ trees <- new_spinlock!(KBox::pin_init(TreeContainer::new(), GFP_KERNEL)?),
+ cache_size,
+ cache_size_used: Atomic::new(0),
+ next_flush_sector: Atomic::new(0),
+ block_size
+ })
+ }
+
+ pub(crate) fn access<'a, 'b, 'c>(
+ &'a self,
+ tree_guard: &'a mut SpinLockGuard<'b, Pin<KBox<TreeContainer>>>,
+ hw_data_guard: &'a mut SpinLockGuard<'b, HwQueueContext>,
+ sheaf: Option<XArraySheaf<'c>>,
+ ) -> DiskStorageAccess<'a, 'b, 'c> {
+ DiskStorageAccess::new(self, tree_guard, hw_data_guard, sheaf)
+ }
+
+ pub(crate) fn lock(&self) -> SpinLockGuard<'_, Pin<KBox<TreeContainer>>> {
+ self.trees.lock()
+ }
+}
+
+pub(crate) struct DiskStorageAccess<'a, 'b, 'c> {
+ cache_guard: xarray::Guard<'a, TreeNode>,
+ disk_guard: xarray::Guard<'a, TreeNode>,
+ hw_data_guard: &'a mut SpinLockGuard<'b, HwQueueContext>,
+ disk_storage: &'a DiskStorage,
+ pub(crate) sheaf: Option<XArraySheaf<'c>>,
+}
+
+impl<'a, 'b, 'c> DiskStorageAccess<'a, 'b, 'c> {
+ fn new(
+ disk_storage: &'a DiskStorage,
+ tree_guard: &'a mut SpinLockGuard<'b, Pin<KBox<TreeContainer>>>,
+ hw_data_guard: &'a mut SpinLockGuard<'b, HwQueueContext>,
+ sheaf: Option<XArraySheaf<'c>>,
+ ) -> Self {
+ Self {
+ cache_guard: tree_guard.cache_tree.lock(),
+ disk_guard: tree_guard.disk_tree.lock(),
+ hw_data_guard,
+ disk_storage,
+ sheaf,
+ }
+ }
+ fn to_index(sector: u64) -> usize {
+ (sector >> block::PAGE_SECTORS_SHIFT) as usize
+ }
+
+ fn to_sector(index: usize) -> u64 {
+ (index << block::PAGE_SECTORS_SHIFT) as u64
+ }
+
+ fn extract_cache_page(&mut self) -> Result<KBox<NullBlockPage>> {
+ let cache_entry = self
+ .cache_guard
+ .find_next_entry_circular(
+ self.disk_storage.next_flush_sector.load(ordering::Relaxed) as usize
+ )
+ .expect("Expected to find a page in the cache");
+
+ let index = cache_entry.index();
+
+ self.disk_storage
+ .next_flush_sector
+ .store(Self::to_sector(index).wrapping_add(1), ordering::Relaxed);
+
+ self.disk_storage.cache_size_used.store(
+ self.disk_storage.cache_size_used.load(ordering::Relaxed) - PAGE_SIZE as u64,
+ ordering::Relaxed,
+ );
+
+ let page = match self.disk_guard.entry(index) {
+ xarray::Entry::Vacant(disk_entry) => {
+ disk_entry
+ .insert(cache_entry.remove(), self.sheaf.as_mut())
+ .expect("Preload is set up to allow insert without failure");
+ self.hw_data_guard
+ .page
+ .take()
+ .expect("Preload has allocated for us")
+ }
+ xarray::Entry::Occupied(mut disk_entry) => {
+ let mut page = if cache_entry.is_full() {
+ disk_entry.insert(cache_entry.remove())
+ } else {
+ let mut src = cache_entry;
+ let mut offset = 0;
+ for _ in 0..PAGE_SECTORS {
+ src.page_mut().get_pin_mut().copy_to_page(
+ disk_entry.page_mut().get_pin_mut(),
+ offset,
+ block::SECTOR_SIZE as usize,
+ )?;
+ offset += block::SECTOR_SIZE as usize;
+ }
+ src.remove()
+ };
+ page.reset();
+ page
+ }
+ };
+
+ Ok(page)
+ }
+
+ fn get_cache_page(&mut self, sector: u64) -> Result<&mut NullBlockPage> {
+ let index = Self::to_index(sector);
+
+ if self.cache_guard.contains_index(index) {
+ Ok(self.cache_guard.get_mut(index).expect("Index is present"))
+ } else {
+ let page = if self.disk_storage.cache_size_used.load(ordering::Relaxed)
+ < self.disk_storage.cache_size
+ {
+ self.hw_data_guard
+ .page
+ .take()
+ .expect("Expected to have a page available")
+ } else {
+ self.extract_cache_page()?
+ };
+ Ok(self
+ .cache_guard
+ .insert_entry(index, page, self.sheaf.as_mut())
+ .expect("Should be able to insert")
+ .into_mut())
+ }
+ }
+
+ fn get_disk_page(&mut self, sector: u64) -> Result<&mut NullBlockPage> {
+ let index = Self::to_index(sector);
+
+ let page = match self.disk_guard.entry(index) {
+ xarray::Entry::Vacant(e) => e.insert(
+ self.hw_data_guard
+ .page
+ .take()
+ .expect("Expected page to be available"),
+ self.sheaf.as_mut(),
+ )?,
+ xarray::Entry::Occupied(e) => e.into_mut(),
+ };
+
+ Ok(page)
+ }
+
+ pub(crate) fn get_write_page(&mut self, sector: u64) -> Result<&mut NullBlockPage> {
+ let page = if self.disk_storage.cache_size > 0 {
+ self.get_cache_page(sector)?
+ } else {
+ self.get_disk_page(sector)?
+ };
+
+ Ok(page)
+ }
+
+ pub(crate) fn get_read_page(&self, sector: u64) -> Option<&NullBlockPage> {
+ let index = Self::to_index(sector);
+ if self.disk_storage.cache_size > 0 {
+ self.cache_guard
+ .get(index)
+ .or_else(|| self.disk_guard.get(index))
+ } else {
+ self.disk_guard.get(index)
+ }
+ }
+
+ fn free_sector_tree(tree_access: &mut xarray::Guard<'_, TreeNode>, sector: u64) {
+ let index = Self::to_index(sector);
+ if let Some(page) = tree_access.get_mut(index) {
+ page.set_free(sector);
+
+ if page.is_empty() {
+ tree_access.remove(index);
+ }
+ }
+ }
+
+ pub(crate) fn free_sector(&mut self, sector: u64) {
+ if self.disk_storage.cache_size > 0 {
+ Self::free_sector_tree(&mut self.cache_guard, sector);
+ }
+
+ Self::free_sector_tree(&mut self.disk_guard, sector);
+ }
+}
+
+type Tree = XArray<TreeNode>;
+type TreeNode = KBox<NullBlockPage>;
+
+#[pin_data]
+pub(crate) struct TreeContainer {
+ #[pin]
+ disk_tree: Tree,
+ #[pin]
+ cache_tree: Tree,
+}
+
+impl TreeContainer {
+ fn new() -> impl PinInit<Self> {
+ pin_init!(TreeContainer {
+ disk_tree <- new_xarray!(xarray::AllocKind::Alloc),
+ cache_tree <- new_xarray!(xarray::AllocKind::Alloc),
+ })
+ }
+}
diff --git a/drivers/block/rnull/disk_storage/page.rs b/drivers/block/rnull/disk_storage/page.rs
new file mode 100644
index 0000000000000..c2e18502cbdda
--- /dev/null
+++ b/drivers/block/rnull/disk_storage/page.rs
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0
+
+use kernel::{
+ block::{
+ SECTOR_MASK,
+ SECTOR_SHIFT, //
+ },
+ page::{
+ SafePage,
+ PAGE_SIZE, //
+ },
+ prelude::*,
+ types::Owned,
+ uapi::PAGE_SECTORS, //
+};
+
+const _CHEKC_STATUS_WIDTH: () = build_assert!((PAGE_SIZE >> SECTOR_SHIFT) <= 64);
+
+pub(crate) struct NullBlockPage {
+ page: Owned<SafePage>,
+ status: u64,
+ block_size: usize,
+}
+
+impl NullBlockPage {
+ pub(crate) fn new(block_size: usize) -> Result<KBox<Self>> {
+ Ok(KBox::new(
+ Self {
+ page: SafePage::alloc_page(GFP_NOIO | __GFP_ZERO)?,
+ status: 0,
+ block_size,
+ },
+ GFP_NOIO,
+ )?)
+ }
+
+ pub(crate) fn set_occupied(&mut self, sector: u64) {
+ let idx = sector & u64::from(SECTOR_MASK);
+ self.status |= 1 << idx;
+ }
+
+ pub(crate) fn set_free(&mut self, sector: u64) {
+ let idx = sector & u64::from(SECTOR_MASK);
+ self.status &= !(1 << idx);
+ }
+
+ pub(crate) fn is_empty(&self) -> bool {
+ self.status == 0
+ }
+
+ pub(crate) fn reset(&mut self) {
+ self.status = 0;
+ }
+
+ pub(crate) fn is_full(&self) -> bool {
+ let blocks_per_page = PAGE_SIZE >> self.block_size.trailing_zeros();
+ let shift = PAGE_SECTORS as usize / blocks_per_page;
+
+ for i in 0..blocks_per_page {
+ if self.status & (1 << (i * shift)) == 0 {
+ return false;
+ }
+ }
+
+ true
+ }
+
+ pub(crate) fn page_mut(&mut self) -> &mut Owned<SafePage> {
+ &mut self.page
+ }
+
+ pub(crate) fn page(&self) -> &Owned<SafePage> {
+ &self.page
+ }
+}
diff --git a/drivers/block/rnull/rnull.rs b/drivers/block/rnull/rnull.rs
index 4e226186d2f36..cca497aef40df 100644
--- a/drivers/block/rnull/rnull.rs
+++ b/drivers/block/rnull/rnull.rs
@@ -3,13 +3,22 @@
//! This is a Rust implementation of the C null block driver.
mod configfs;
+mod disk_storage;
use configfs::IRQMode;
+use disk_storage::{
+ DiskStorage,
+ NullBlockPage,
+ TreeContainer, //
+};
use kernel::{
bindings,
block::{
self,
- badblocks::{self, BadBlocks},
+ badblocks::{
+ self,
+ BadBlocks, //
+ },
bio::Segment,
mq::{
self,
@@ -20,7 +29,7 @@
Operations,
TagSet, //
},
- SECTOR_MASK, SECTOR_SHIFT,
+ SECTOR_SHIFT,
},
error::{
code,
@@ -28,11 +37,7 @@
},
ffi,
new_mutex,
- new_xarray,
- page::{
- SafePage,
- PAGE_SIZE, //
- },
+ new_spinlock,
pr_info,
prelude::*,
str::CString,
@@ -41,9 +46,11 @@
atomic::{
ordering,
Atomic, //
- },
+ }, //
Arc,
- Mutex, //
+ Mutex,
+ SpinLock,
+ SpinLockGuard,
},
time::{
hrtimer::{
@@ -58,7 +65,7 @@
OwnableRefCounted,
Owned, //
},
- xarray::XArray, //
+ xarray::XArraySheaf, //
};
use pin_init::PinInit;
@@ -148,9 +155,11 @@ fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> {
} else {
*module_parameters::submit_queues.value()
};
+
+ let block_size = *module_parameters::bs.value();
let disk = NullBlkDevice::new(NullBlkOptions {
name: &name,
- block_size: *module_parameters::bs.value(),
+ block_size,
rotational: *module_parameters::rotational.value() != 0,
capacity_mib: *module_parameters::gb.value() * 1024,
irq_mode: (*module_parameters::irqmode.value()).try_into()?,
@@ -163,6 +172,7 @@ fn init(_module: &'static ThisModule) -> impl PinInit<Self, Error> {
bad_blocks: Arc::pin_init(BadBlocks::new(false), GFP_KERNEL)?,
bad_blocks_once: false,
bad_blocks_partial_io: false,
+ storage: Arc::pin_init(DiskStorage::new(0, block_size as usize), GFP_KERNEL)?,
})?;
disks.push(disk, GFP_KERNEL)?;
}
@@ -192,8 +202,20 @@ struct NullBlkOptions<'a> {
bad_blocks: Arc<BadBlocks>,
bad_blocks_once: bool,
bad_blocks_partial_io: bool,
+ storage: Arc<DiskStorage>,
+}
+
+#[pin_data]
+struct NullBlkDevice {
+ storage: Arc<DiskStorage>,
+ irq_mode: IRQMode,
+ completion_time: Delta,
+ memory_backed: bool,
+ block_size: usize,
+ bad_blocks: Arc<BadBlocks>,
+ bad_blocks_once: bool,
+ bad_blocks_partial_io: bool,
}
-struct NullBlkDevice;
impl NullBlkDevice {
fn new(options: NullBlkOptions<'_>) -> Result<GenDisk<Self>> {
@@ -212,10 +234,14 @@ fn new(options: NullBlkOptions<'_>) -> Result<GenDisk<Self>> {
bad_blocks,
bad_blocks_once,
bad_blocks_partial_io,
+ storage,
} = options;
let mut flags = mq::tag_set::Flags::default();
+ // TODO: lim.features |= BLK_FEAT_WRITE_CACHE;
+ // if (dev->fua)
+ // lim.features |= BLK_FEAT_FUA;
if memory_backed {
flags |= mq::tag_set::Flag::Blocking;
}
@@ -233,13 +259,13 @@ fn new(options: NullBlkOptions<'_>) -> Result<GenDisk<Self>> {
GFP_KERNEL,
)?;
- let queue_data = Box::pin_init(
- pin_init!(QueueData {
- tree <- new_xarray!(kernel::xarray::AllocKind::Alloc),
+ let queue_data = Box::try_pin_init(
+ try_pin_init!(Self {
+ storage,
irq_mode,
completion_time,
memory_backed,
- block_size: block_size.into(),
+ block_size: block_size as usize,
bad_blocks,
bad_blocks_once,
bad_blocks_partial_io,
@@ -262,68 +288,133 @@ fn new(options: NullBlkOptions<'_>) -> Result<GenDisk<Self>> {
builder.build(fmt!("{}", name.to_str()?), tagset, queue_data)
}
+ fn sheaf_size() -> usize {
+ 2 * ((usize::BITS as usize / bindings::XA_CHUNK_SHIFT)
+ + if (usize::BITS as usize % bindings::XA_CHUNK_SHIFT) == 0 {
+ 0
+ } else {
+ 1
+ })
+ }
+
+ fn preload<'b, 'c>(
+ tree_guard: &'b mut SpinLockGuard<'c, Pin<KBox<TreeContainer>>>,
+ hw_data_guard: &'b mut SpinLockGuard<'c, HwQueueContext>,
+ block_size: usize,
+ ) -> Result {
+ if hw_data_guard.page.is_none() {
+ hw_data_guard.page =
+ Some(tree_guard.do_unlocked(|| {
+ hw_data_guard.do_unlocked(|| NullBlockPage::new(block_size))
+ })?);
+ }
+
+ Ok(())
+ }
+
#[inline(always)]
- fn write(tree: &Tree, mut sector: u64, mut segment: Segment<'_>) -> Result {
+ fn write<'a, 'b, 'c>(
+ &'a self,
+ tree_guard: &'b mut SpinLockGuard<'c, Pin<KBox<TreeContainer>>>,
+ hw_data_guard: &'b mut SpinLockGuard<'c, HwQueueContext>,
+ mut sector: u64,
+ mut segment: Segment<'_>,
+ ) -> Result {
+ let mut sheaf: Option<XArraySheaf<'_>> = None;
+
while !segment.is_empty() {
- let page = NullBlockPage::new()?;
- let mut tree = tree.lock();
+ Self::preload(tree_guard, hw_data_guard, self.block_size)?;
- let page_idx = sector >> block::PAGE_SECTORS_SHIFT;
+ match &mut sheaf {
+ Some(sheaf) => {
+ tree_guard.do_unlocked(|| {
+ hw_data_guard.do_unlocked(|| sheaf.refill(GFP_KERNEL, Self::sheaf_size()))
+ })?;
+ }
+ None => {
+ let _ = sheaf.insert(
+ kernel::xarray::xarray_kmem_cache()
+ .sheaf(Self::sheaf_size(), GFP_NOWAIT)
+ .or(tree_guard.do_unlocked(|| {
+ hw_data_guard.do_unlocked(|| -> Result<_> {
+ kernel::xarray::xarray_kmem_cache()
+ .sheaf(Self::sheaf_size(), GFP_KERNEL)
+ })
+ }))?,
+ );
+ }
+ }
- let page = if let Some(page) = tree.get_mut(page_idx as usize) {
- page
- } else {
- tree.store(page_idx as usize, page, GFP_NOIO)?;
- tree.get_mut(page_idx as usize).unwrap()
- };
+ let mut access = self.storage.access(tree_guard, hw_data_guard, sheaf);
+ let page = access.get_write_page(sector)?;
page.set_occupied(sector);
let page_offset = (sector & u64::from(block::SECTOR_MASK)) << block::SECTOR_SHIFT;
- sector += segment.copy_to_page(page.page.get_pin_mut(), page_offset as usize) as u64
+
+ sector += segment.copy_to_page(page.page_mut().get_pin_mut(), page_offset as usize)
+ as u64
>> block::SECTOR_SHIFT;
+
+ sheaf = access.sheaf;
+ }
+
+ if let Some(sheaf) = sheaf {
+ tree_guard.do_unlocked(|| {
+ hw_data_guard.do_unlocked(|| {
+ sheaf.return_refill(GFP_KERNEL);
+ })
+ });
}
+
Ok(())
}
#[inline(always)]
- fn read(tree: &Tree, mut sector: u64, mut segment: Segment<'_>) -> Result {
- let tree = tree.lock();
+ fn read<'a, 'b, 'c>(
+ &'a self,
+ tree_guard: &'b mut SpinLockGuard<'c, Pin<KBox<TreeContainer>>>,
+ hw_data_guard: &'b mut SpinLockGuard<'c, HwQueueContext>,
+ mut sector: u64,
+ mut segment: Segment<'_>,
+ ) -> Result {
+ let access = self.storage.access(tree_guard, hw_data_guard, None);
while !segment.is_empty() {
- let idx = sector >> block::PAGE_SECTORS_SHIFT;
-
- if let Some(page) = tree.get(idx as usize) {
- let page_offset = (sector & u64::from(block::SECTOR_MASK)) << block::SECTOR_SHIFT;
- sector += segment.copy_from_page(&page.page, page_offset as usize) as u64
- >> block::SECTOR_SHIFT;
- } else {
- sector += segment.zero_page() as u64 >> block::SECTOR_SHIFT;
+ let page = access.get_read_page(sector);
+
+ match page {
+ Some(page) => {
+ let page_offset =
+ (sector & u64::from(block::SECTOR_MASK)) << block::SECTOR_SHIFT;
+ sector += segment.copy_from_page(page.page(), page_offset as usize) as u64
+ >> block::SECTOR_SHIFT;
+ }
+ None => sector += segment.zero_page() as u64 >> block::SECTOR_SHIFT,
}
}
Ok(())
}
- fn discard(tree: &Tree, mut sector: u64, sectors: u64, block_size: u64) -> Result {
- let mut remaining_bytes = sectors << SECTOR_SHIFT;
- let mut tree = tree.lock();
+ fn discard(
+ &self,
+ hw_data: &Pin<&SpinLock<HwQueueContext>>,
+ mut sector: u64,
+ sectors: u32,
+ ) -> Result {
+ let mut tree_guard = self.storage.lock();
+ let mut hw_data_guard = hw_data.lock();
- while remaining_bytes > 0 {
- let page_idx = sector >> block::PAGE_SECTORS_SHIFT;
- let mut remove = false;
- if let Some(page) = tree.get_mut(page_idx as usize) {
- page.set_free(sector);
- if page.is_empty() {
- remove = true;
- }
- }
+ let mut access = self
+ .storage
+ .access(&mut tree_guard, &mut hw_data_guard, None);
- if remove {
- drop(tree.remove(page_idx as usize))
- }
+ let mut remaining_bytes = (sectors as usize) << SECTOR_SHIFT;
- let processed = remaining_bytes.min(block_size);
- sector += processed >> SECTOR_SHIFT;
+ while remaining_bytes > 0 {
+ access.free_sector(sector);
+ let processed = remaining_bytes.min(self.block_size);
+ sector += (processed >> SECTOR_SHIFT) as u64;
remaining_bytes -= processed;
}
@@ -331,21 +422,34 @@ fn discard(tree: &Tree, mut sector: u64, sectors: u64, block_size: u64) -> Resul
}
#[inline(never)]
- fn transfer(rq: &mut Owned<mq::Request<Self>>, tree: &Tree, sectors: u32) -> Result {
+ fn transfer(
+ &self,
+ hw_data: &Pin<&SpinLock<HwQueueContext>>,
+ rq: &mut Owned<mq::Request<Self>>,
+ sectors: u32,
+ ) -> Result {
let mut sector = rq.sector();
let end_sector = sector + <u32 as Into<u64>>::into(sectors);
let command = rq.command();
+ // TODO: Use `PerCpu` to get rid of this lock
+ let mut hw_data_guard = hw_data.lock();
+ let mut tree_guard = self.storage.lock();
+
for bio in rq.bio_iter_mut() {
let segment_iter = bio.segment_iter();
for segment in segment_iter {
// Length might be limited by bad blocks.
let length = segment
.len()
- .min((sector - end_sector) as u32 >> SECTOR_SHIFT);
+ .min((end_sector - sector) as u32 >> SECTOR_SHIFT);
match command {
- bindings::req_op_REQ_OP_WRITE => Self::write(tree, sector, segment)?,
- bindings::req_op_REQ_OP_READ => Self::read(tree, sector, segment)?,
+ bindings::req_op_REQ_OP_WRITE => {
+ self.write(&mut tree_guard, &mut hw_data_guard, sector, segment)?
+ }
+ bindings::req_op_REQ_OP_READ => {
+ self.read(&mut tree_guard, &mut hw_data_guard, sector, segment)?
+ }
_ => (),
}
sector += u64::from(length) >> SECTOR_SHIFT;
@@ -355,29 +459,26 @@ fn transfer(rq: &mut Owned<mq::Request<Self>>, tree: &Tree, sectors: u32) -> Res
}
}
}
+
Ok(())
}
- fn handle_bad_blocks(
- rq: &mut Owned<mq::Request<Self>>,
- queue_data: &QueueData,
- sectors: &mut u32,
- ) -> Result {
- if queue_data.bad_blocks.enabled() {
+ fn handle_bad_blocks(&self, rq: &mut Owned<mq::Request<Self>>, sectors: &mut u32) -> Result {
+ if self.bad_blocks.enabled() {
let start = rq.sector();
let end = start + u64::from(*sectors);
- match queue_data.bad_blocks.check(start..end) {
+ match self.bad_blocks.check(start..end) {
badblocks::BlockStatus::None => {}
badblocks::BlockStatus::Acknowledged(mut range)
| badblocks::BlockStatus::Unacknowledged(mut range) => {
rq.data_ref().error.store(1, ordering::Relaxed);
- if queue_data.bad_blocks_once {
- queue_data.bad_blocks.set_good(range.clone())?;
+ if self.bad_blocks_once {
+ self.bad_blocks.set_good(range.clone())?;
}
- if queue_data.bad_blocks_partial_io {
- let block_size_sectors = queue_data.block_size >> SECTOR_SHIFT;
+ if self.bad_blocks_partial_io {
+ let block_size_sectors = (self.block_size >> SECTOR_SHIFT) as u64;
range.start = align_down(range.start, block_size_sectors);
if start < range.start {
*sectors = (range.start - start) as u32;
@@ -402,53 +503,8 @@ fn end_request(rq: Owned<mq::Request<Self>>) {
}
}
-const _CHEKC_STATUS_WIDTH: () = build_assert!((PAGE_SIZE >> SECTOR_SHIFT) <= 64);
-
-struct NullBlockPage {
- page: Owned<SafePage>,
- status: u64,
-}
-
-impl NullBlockPage {
- fn new() -> Result<KBox<Self>> {
- Ok(KBox::new(
- Self {
- page: SafePage::alloc_page(GFP_NOIO | __GFP_ZERO)?,
- status: 0,
- },
- GFP_NOIO,
- )?)
- }
-
- fn set_occupied(&mut self, sector: u64) {
- let idx = sector & u64::from(SECTOR_MASK);
- self.status |= 1 << idx;
- }
-
- fn set_free(&mut self, sector: u64) {
- let idx = sector & u64::from(SECTOR_MASK);
- self.status &= !(1 << idx);
- }
-
- fn is_empty(&self) -> bool {
- self.status == 0
- }
-}
-
-type TreeNode = KBox<NullBlockPage>;
-type Tree = XArray<TreeNode>;
-
-#[pin_data]
-struct QueueData {
- #[pin]
- tree: Tree,
- irq_mode: IRQMode,
- completion_time: Delta,
- memory_backed: bool,
- block_size: u64,
- bad_blocks: Arc<BadBlocks>,
- bad_blocks_once: bool,
- bad_blocks_partial_io: bool,
+struct HwQueueContext {
+ page: Option<KBox<disk_storage::NullBlockPage>>,
}
#[pin_data]
@@ -503,10 +559,10 @@ fn align_down<T>(value: T, to: T) -> T
#[vtable]
impl Operations for NullBlkDevice {
- type QueueData = Pin<KBox<QueueData>>;
+ type QueueData = Pin<KBox<Self>>;
type RequestData = Pdu;
type TagSetData = ();
- type HwData = ();
+ type HwData = Pin<KBox<SpinLock<HwQueueContext>>>;
fn new_request_data() -> impl PinInit<Self::RequestData> {
pin_init!(Pdu {
@@ -517,41 +573,39 @@ fn new_request_data() -> impl PinInit<Self::RequestData> {
#[inline(always)]
fn queue_rq(
- _hw_data: (),
- queue_data: Pin<&QueueData>,
+ hw_data: Pin<&SpinLock<HwQueueContext>>,
+ this: Pin<&Self>,
mut rq: Owned<mq::Request<Self>>,
_is_last: bool,
) -> Result {
let mut sectors = rq.sectors();
- Self::handle_bad_blocks(&mut rq, queue_data.get_ref(), &mut sectors)?;
-
- if queue_data.memory_backed {
- let tree = &queue_data.tree;
+ Self::handle_bad_blocks(this.get_ref(), &mut rq, &mut sectors)?;
+ if this.memory_backed {
if rq.command() == bindings::req_op_REQ_OP_DISCARD {
- Self::discard(tree, rq.sector(), sectors.into(), queue_data.block_size)?;
+ this.discard(&hw_data, rq.sector(), sectors)?;
} else {
- Self::transfer(&mut rq, tree, sectors)?;
+ this.transfer(&hw_data, &mut rq, sectors)?;
}
}
- match queue_data.irq_mode {
+ match this.irq_mode {
IRQMode::None => Self::end_request(rq),
IRQMode::Soft => mq::Request::complete(rq.into()),
IRQMode::Timer => {
OwnableRefCounted::into_shared(rq)
- .start(queue_data.completion_time)
+ .start(this.completion_time)
.dismiss();
}
}
Ok(())
}
- fn commit_rqs(_hw_data: (), _queue_data: Pin<&QueueData>) {}
+ fn commit_rqs(_hw_data: Pin<&SpinLock<HwQueueContext>>, _queue_data: Pin<&Self>) {}
- fn init_hctx(_tagset_data: (), _hctx_idx: u32) -> Result {
- Ok(())
+ fn init_hctx(_tagset_data: (), _hctx_idx: u32) -> Result<Self::HwData> {
+ KBox::pin_init(new_spinlock!(HwQueueContext { page: None }), GFP_KERNEL)
}
fn complete(rq: ARef<mq::Request<Self>>) {
--
2.51.2
next prev parent reply other threads:[~2026-02-16 0:58 UTC|newest]
Thread overview: 82+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-15 23:34 [PATCH 00/79] block: rnull: complete the rust null block driver Andreas Hindborg
2026-02-15 23:34 ` [PATCH 01/79] block: rnull: adopt new formatting guidelines Andreas Hindborg
2026-02-15 23:34 ` [PATCH 02/79] block: rnull: add module parameters Andreas Hindborg
2026-02-15 23:34 ` [PATCH 03/79] block: rnull: add macros to define configfs attributes Andreas Hindborg
2026-02-15 23:34 ` [PATCH 04/79] block: rust: fix generation of bindings to `BLK_STS_.*` Andreas Hindborg
2026-02-15 23:34 ` [PATCH 05/79] block: rust: change `queue_rq` request type to `Owned` Andreas Hindborg
2026-02-15 23:34 ` [PATCH 06/79] block: rust: add `Request` private data support Andreas Hindborg
2026-02-15 23:34 ` [PATCH 07/79] block: rust: allow `hrtimer::Timer` in `RequestData` Andreas Hindborg
2026-02-15 23:34 ` [PATCH 08/79] block: rnull: add timer completion mode Andreas Hindborg
2026-02-15 23:34 ` [PATCH 09/79] block: rust: introduce `kernel::block::bio` module Andreas Hindborg
2026-02-15 23:34 ` [PATCH 10/79] block: rust: add `command` getter to `Request` Andreas Hindborg
2026-02-15 23:34 ` [PATCH 11/79] block: rust: mq: use GFP_KERNEL from prelude Andreas Hindborg
2026-02-15 23:34 ` [PATCH 12/79] block: rust: add `TagSet` flags Andreas Hindborg
2026-02-15 23:35 ` [PATCH 13/79] block: rnull: add memory backing Andreas Hindborg
2026-02-15 23:35 ` [PATCH 14/79] block: rnull: add submit queue count config option Andreas Hindborg
2026-02-15 23:35 ` [PATCH 15/79] block: rnull: add `use_per_node_hctx` " Andreas Hindborg
2026-02-15 23:35 ` [PATCH 16/79] block: rust: allow specifying home node when constructing `TagSet` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 17/79] block: rnull: allow specifying the home numa node Andreas Hindborg
2026-02-15 23:35 ` [PATCH 18/79] block: rust: add Request::sectors() method Andreas Hindborg
2026-02-15 23:35 ` [PATCH 19/79] block: rust: mq: add max_hw_discard_sectors support to GenDiskBuilder Andreas Hindborg
2026-02-15 23:35 ` [PATCH 20/79] block: rnull: add discard support Andreas Hindborg
2026-02-15 23:35 ` [PATCH 21/79] block: rust: add `NoDefaultScheduler` flag for `TagSet` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 22/79] block: rnull: add no_sched module parameter and configfs attribute Andreas Hindborg
2026-02-15 23:35 ` [PATCH 23/79] block: rust: change sector type from usize to u64 Andreas Hindborg
2026-02-15 23:35 ` [PATCH 24/79] block: rust: add `BadBlocks` for bad block tracking Andreas Hindborg
2026-02-15 23:35 ` [PATCH 25/79] block: rust: mq: add Request::end() method for custom status codes Andreas Hindborg
2026-02-15 23:35 ` [PATCH 26/79] block: rnull: add badblocks support Andreas Hindborg
2026-02-15 23:35 ` [PATCH 27/79] block: rnull: add badblocks_once support Andreas Hindborg
2026-02-15 23:35 ` [PATCH 28/79] block: rnull: add partial I/O support for bad blocks Andreas Hindborg
2026-02-15 23:35 ` [PATCH 29/79] block: rust: add `TagSet` private data support Andreas Hindborg
2026-02-15 23:35 ` [PATCH 30/79] block: rust: add `hctx` " Andreas Hindborg
2026-02-15 23:35 ` Andreas Hindborg [this message]
2026-02-15 23:35 ` [PATCH 32/79] block: rust: implement `Sync` for `GenDisk` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 33/79] block: rust: add a back reference feature to `GenDisk` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 34/79] block: rust: introduce an idle type state for `Request` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 35/79] block: rust: add a request queue abstraction Andreas Hindborg
2026-02-15 23:35 ` [PATCH 36/79] block: rust: add a method to get the request queue for a request Andreas Hindborg
2026-02-15 23:35 ` [PATCH 37/79] block: rust: introduce `kernel::block::error` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 38/79] block: rust: require `queue_rq` to return a `BlkResult` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 39/79] block: rust: add `GenDisk::queue_data` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 40/79] block: rnull: add bandwidth limiting Andreas Hindborg
2026-02-15 23:35 ` [PATCH 41/79] block: rnull: add blocking queue mode Andreas Hindborg
2026-02-15 23:35 ` [PATCH 42/79] block: rnull: add shared tags Andreas Hindborg
2026-02-15 23:35 ` [PATCH 43/79] block: rnull: add queue depth config option Andreas Hindborg
2026-02-15 23:35 ` [PATCH 44/79] block: rust: add an abstraction for `bindings::req_op` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 45/79] block: rust: add a method to set the target sector of a request Andreas Hindborg
2026-02-15 23:35 ` [PATCH 46/79] block: rust: move gendisk vtable construction to separate function Andreas Hindborg
2026-02-15 23:35 ` [PATCH 47/79] block: rust: add zoned block device support Andreas Hindborg
2026-02-15 23:35 ` [PATCH 48/79] block: rnull: add zoned storage support Andreas Hindborg
2026-02-15 23:35 ` [PATCH 49/79] block: rust: add `map_queues` support Andreas Hindborg
2026-02-15 23:35 ` [PATCH 50/79] block: rust: add an abstraction for `struct blk_mq_queue_map` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 51/79] block: rust: add polled completion support Andreas Hindborg
2026-02-15 23:35 ` [PATCH 52/79] block: rust: add accessors to `TagSet` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 53/79] block: rnull: add polled completion support Andreas Hindborg
2026-02-15 23:35 ` [PATCH 54/79] block: rnull: add REQ_OP_FLUSH support Andreas Hindborg
2026-02-15 23:35 ` [PATCH 55/79] block: rust: add request flags abstraction Andreas Hindborg
2026-02-15 23:35 ` [PATCH 56/79] block: rust: add abstraction for block queue feature flags Andreas Hindborg
2026-02-15 23:35 ` [PATCH 57/79] block: rust: allow setting write cache and FUA flags for `GenDisk` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 58/79] block: rust: add `Segment::copy_to_page_limit` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 59/79] block: rnull: add fua support Andreas Hindborg
2026-02-15 23:35 ` [PATCH 60/79] block: fix arg type in `blk_mq_update_nr_hw_queues` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 61/79] block: rust: add `GenDisk::tag_set` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 62/79] block: rust: add `TagSet::update_hw_queue_count` Andreas Hindborg
2026-02-16 23:59 ` Ken Kurematsu
2026-02-17 9:54 ` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 63/79] block: rnull: add an option to change the number of hardware queues Andreas Hindborg
2026-02-15 23:35 ` [PATCH 64/79] block: rust: add an abstraction for `struct rq_list` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 65/79] block: rust: add `queue_rqs` vtable hook Andreas Hindborg
2026-02-15 23:35 ` [PATCH 66/79] block: rnull: support queue_rqs Andreas Hindborg
2026-02-15 23:35 ` [PATCH 67/79] block: rust: remove the `is_poll` parameter from `queue_rq` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 68/79] block: rust: add a debug assert for refcounts Andreas Hindborg
2026-02-15 23:35 ` [PATCH 69/79] block: rust: add `TagSet::tag_to_rq` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 70/79] block: rust: add `Request::queue_index` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 71/79] block: rust: add `Request::requeue` Andreas Hindborg
2026-02-15 23:35 ` [PATCH 72/79] block: rust: add `request_timeout` hook Andreas Hindborg
2026-02-15 23:36 ` [PATCH 73/79] block: rnull: add fault injection support Andreas Hindborg
2026-02-15 23:36 ` [PATCH 74/79] block: rust: add max_sectors option to `GenDiskBuilder` Andreas Hindborg
2026-02-15 23:36 ` [PATCH 75/79] block: rnull: allow configuration of the maximum IO size Andreas Hindborg
2026-02-15 23:36 ` [PATCH 76/79] block: rust: add `virt_boundary_mask` option to `GenDiskBuilder` Andreas Hindborg
2026-02-15 23:36 ` [PATCH 77/79] block: rnull: add `virt_boundary` option Andreas Hindborg
2026-02-15 23:36 ` [PATCH 78/79] block: rnull: add `shared_tag_bitmap` config option Andreas Hindborg
2026-02-15 23:36 ` [PATCH 79/79] block: rnull: add zone offline and readonly configfs files Andreas Hindborg
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260216-rnull-v6-19-rc5-send-v1-31-de9a7af4b469@kernel.org \
--to=a.hindborg@kernel.org \
--cc=Liam.Howlett@oracle.com \
--cc=aliceryhl@google.com \
--cc=anna-maria@linutronix.de \
--cc=axboe@kernel.dk \
--cc=bjorn3_gh@protonmail.com \
--cc=boqun.feng@gmail.com \
--cc=dakr@kernel.org \
--cc=frederic@kernel.org \
--cc=fujita.tomonori@gmail.com \
--cc=gary@garyguo.net \
--cc=jstultz@google.com \
--cc=linux-block@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=lorenzo.stoakes@oracle.com \
--cc=lossin@kernel.org \
--cc=lyude@redhat.com \
--cc=ojeda@kernel.org \
--cc=rust-for-linux@vger.kernel.org \
--cc=sboyd@kernel.org \
--cc=tglx@kernel.org \
--cc=tmgross@umich.edu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox