* [RFC PATCH 0/2] introduce precised blk-throttle control
@ 2024-05-15 1:23 zhaoyang.huang
2024-05-15 1:23 ` [RFC PATCH 1/2] block: introduce helper function to calculate bps budgt zhaoyang.huang
2024-05-15 1:23 ` [RFC PATCH 2/2] mm: introduce budgt control in readahead zhaoyang.huang
0 siblings, 2 replies; 8+ messages in thread
From: zhaoyang.huang @ 2024-05-15 1:23 UTC (permalink / raw)
To: Andrew Morton, Matthew Wilcox, Jens Axboe, Tejun Heo,
Josef Bacik, Baolin Wang, linux-mm, linux-block, linux-kernel,
cgroups, Zhaoyang Huang, steve.kang
From: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
There is always an error between blk-throttle's configuration and the
real value which should be introduced by over-sized bio as there is no
control on ra->size during readahead. This series patches would like to
introduce the helper function to provide the bytes budgt and apply it
on readahead.
Please find below for the fio test result on v6.6 which presents 2%-10%
improvement for BW and lat. Besides, we can also observed stable BW
instantaneous and lower stdev value during the test.
blkio.throttle.read_bps_device = 1MB/s
before: read: IOPS=223, BW=894KiB/s (915kB/s)(175MiB/200919msec)
after : read: IOPS=239, BW=960KiB/s (983kB/s)(153MiB/163105msec)
before: clat (usec): min=4, max=16795k, avg=4468.74, stdev=265746.14
after : clat (usec): min=11, max=209193, avg=4105.22, stdev=27188.04
before: lat (usec): min=6, max=16795k, avg=4470.57, stdev=265746.14
after : lat (usec): min=16, max=209197, avg=4120.03, stdev=27188.04
blkio.throttle.read_bps_device = 10MB/s
before: read: IOPS=2380, BW=9524KiB/s (9752kB/s)(1007MiB/108311msec)
after : read: IOPS=2438, BW=9754KiB/s (9989kB/s)(1680MiB/176405msec)
before: clat (usec): min=4, max=2494.6k, avg=412.72, stdev=25783.51
after : clat (usec): min=4, max=201817, avg=399.58, stdev=8268.85
before: lat (usec): min=6, max=2494.6k, avg=414.48, stdev=25783.51
after : lat (usec): min=6, max=201819, avg=402.10, stdev=8268.85
blkio.throttle.read_bps_device = 20MB/s
fio ... -numjobs=8 ...
before : IOPS=37.9k, BW=148MiB/s (155MB/s)(11.6GiB/80333msec)
after : IOPS=39.0k, BW=153MiB/s (160MB/s)(15.6GiB/104914msec)
before : clat (usec): min=4, max=1056.6k, avg=197.23, stdev=10080.69
after : clat (usec): min=4, max=193481, avg=188.83, stdev=4651.29
before : lat (usec): min=5, max=1056.6k, avg=200.48, stdev=10080.76
after : lat (usec): min=5, max=193483, avg=192.68, stdev=4651.87
blkio.throttle.read_bps_device = 30MB/s
fio ... -numjobs=8 ...
before : IOPS=57.2k, BW=224MiB/s (234MB/s)(15.6GiB/71561msec)
after : IOPS=58.5k, BW=229MiB/s (240MB/s)(15.6GiB/69996msec)
before : clat (usec): min=4, max=1105.5k, avg=126.20, stdev=6419.22
after : clat (usec): min=4, max=183956, avg=120.60, stdev=2957.28
before : lat (usec): min=5, max=1105.5k, avg=129.45, stdev=6419.29
after : lat (usec): min=5, max=183958, avg=124.40, stdev=2958.18
Zhaoyang Huang (2):
block: introduce helper function to calculate bps budgt
mm: introduce budgt control in readahead
block/blk-throttle.c | 44 ++++++++++++++++++++++++++++++++++++++
include/linux/blk-cgroup.h | 10 +++++++++
mm/readahead.c | 33 ++++++++++++++++++++--------
3 files changed, 78 insertions(+), 9 deletions(-)
--
2.25.1
^ permalink raw reply [flat|nested] 8+ messages in thread* [RFC PATCH 1/2] block: introduce helper function to calculate bps budgt
2024-05-15 1:23 [RFC PATCH 0/2] introduce precised blk-throttle control zhaoyang.huang
@ 2024-05-15 1:23 ` zhaoyang.huang
2024-05-15 1:23 ` [RFC PATCH 2/2] mm: introduce budgt control in readahead zhaoyang.huang
1 sibling, 0 replies; 8+ messages in thread
From: zhaoyang.huang @ 2024-05-15 1:23 UTC (permalink / raw)
To: Andrew Morton, Matthew Wilcox, Jens Axboe, Tejun Heo,
Josef Bacik, Baolin Wang, linux-mm, linux-block, linux-kernel,
cgroups, Zhaoyang Huang, steve.kang
From: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
The 'over-sized' bio under blk throttle control will be delayed to
launch which breaks original IO timing and have the immediate BW be
not within the bps limit. Introduce a helper function to calculate block
device's budgt which provide the allowed bytes for current bio.
Signed-off-by: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
---
block/blk-throttle.c | 44 ++++++++++++++++++++++++++++++++++++++
include/linux/blk-cgroup.h | 10 +++++++++
2 files changed, 54 insertions(+)
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index f4850a6f860b..41c75258183d 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -10,6 +10,7 @@
#include <linux/blkdev.h>
#include <linux/bio.h>
#include <linux/blktrace_api.h>
+#include <linux/cgroup.h>
#include "blk.h"
#include "blk-cgroup-rwstat.h"
#include "blk-stat.h"
@@ -2365,6 +2366,49 @@ void blk_throtl_bio_endio(struct bio *bio)
}
#endif
+unsigned long blk_throttle_budgt(struct block_device *bdev)
+{
+ struct request_queue *q = bdev_get_queue(bdev);
+ struct blkcg *blkcg;
+ struct blkcg_gq *blkg;
+ struct throtl_grp *tg;
+ long long bytes_allowed = 0;
+ unsigned long jiffy_elapsed, jiffy_elapsed_rnd;
+ u64 bps_limit;
+
+ if (!q)
+ return U64_MAX;
+
+ rcu_read_lock();
+ spin_lock_irq(&q->queue_lock);
+ blkcg = css_to_blkcg(task_css(current, io_cgrp_id));
+ if (!blkcg)
+ goto out;
+
+ blkg = blkg_lookup(blkcg, q);
+ if (!blkg || !blkg_tryget(blkg))
+ goto out;
+
+ tg = blkg_to_tg(blkg);
+ bps_limit = tg_bps_limit(tg, READ);
+ if (bps_limit == U64_MAX)
+ goto out;
+
+ jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[READ];
+ if (!jiffy_elapsed)
+ jiffy_elapsed_rnd = tg->td->throtl_slice;
+
+ jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, tg->td->throtl_slice);
+ bytes_allowed = calculate_bytes_allowed(bps_limit, jiffy_elapsed_rnd) +
+ tg->carryover_bytes[READ];
+ blkg_put(blkg);
+out:
+ spin_unlock_irq(&q->queue_lock);
+ rcu_read_unlock();
+ return bytes_allowed;
+}
+
+
int blk_throtl_init(struct gendisk *disk)
{
struct request_queue *q = disk->queue;
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index dd5841a42c33..ba79fa464e0a 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -15,10 +15,12 @@
*/
#include <linux/types.h>
+#include <linux/limits.h>
struct bio;
struct cgroup_subsys_state;
struct gendisk;
+struct block_device;
#define FC_APPID_LEN 129
@@ -45,6 +47,14 @@ static inline struct cgroup_subsys_state *bio_blkcg_css(struct bio *bio)
}
#endif /* CONFIG_BLK_CGROUP */
+#ifdef CONFIG_BLK_DEV_THROTTLING
+unsigned long blk_throttle_budgt(struct block_device *bdev);
+#else
+static inline unsigned long blk_throttle_budgt(struct block_device *bdev)
+{
+ return U64_MAX;
+}
+#endif
int blkcg_set_fc_appid(char *app_id, u64 cgrp_id, size_t app_id_len);
char *blkcg_get_fc_appid(struct bio *bio);
--
2.25.1
^ permalink raw reply [flat|nested] 8+ messages in thread* [RFC PATCH 2/2] mm: introduce budgt control in readahead
2024-05-15 1:23 [RFC PATCH 0/2] introduce precised blk-throttle control zhaoyang.huang
2024-05-15 1:23 ` [RFC PATCH 1/2] block: introduce helper function to calculate bps budgt zhaoyang.huang
@ 2024-05-15 1:23 ` zhaoyang.huang
2024-05-15 4:09 ` Matthew Wilcox
2024-05-15 7:40 ` Tejun Heo
1 sibling, 2 replies; 8+ messages in thread
From: zhaoyang.huang @ 2024-05-15 1:23 UTC (permalink / raw)
To: Andrew Morton, Matthew Wilcox, Jens Axboe, Tejun Heo,
Josef Bacik, Baolin Wang, linux-mm, linux-block, linux-kernel,
cgroups, Zhaoyang Huang, steve.kang
From: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
Currently, readahead's size is decided mainly by page cache's status
like hit/miss or hole size which could lead to suspension of following
bio which is over the size of blk-throttle allowed size when
BLK_THROTTLING is on. Introduce the budgt value here to have the bio's
size be within the legal size.
Signed-off-by: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
---
mm/readahead.c | 33 ++++++++++++++++++++++++---------
1 file changed, 24 insertions(+), 9 deletions(-)
diff --git a/mm/readahead.c b/mm/readahead.c
index 130c0e7df99f..2b6120ced6f9 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -128,6 +128,7 @@
#include <linux/blk-cgroup.h>
#include <linux/fadvise.h>
#include <linux/sched/mm.h>
+#include <linux/minmax.h>
#include "internal.h"
@@ -358,16 +359,23 @@ static unsigned long get_init_ra_size(unsigned long size, unsigned long max)
* Get the previous window size, ramp it up, and
* return it as the new window size.
*/
-static unsigned long get_next_ra_size(struct file_ra_state *ra,
+static unsigned long get_next_ra_size(struct readahead_control *ractl,
unsigned long max)
{
- unsigned long cur = ra->size;
+ unsigned long cur = ractl->ra->size;
+ struct inode *inode = ractl->mapping->host;
+ unsigned long budgt = inode->i_sb->s_bdev ?
+ blk_throttle_budgt(inode->i_sb->s_bdev) : 0;
+ unsigned long val = max;
if (cur < max / 16)
- return 4 * cur;
+ val = 4 * cur;
if (cur <= max / 2)
- return 2 * cur;
- return max;
+ val = 2 * cur;
+
+ val = budgt ? min(budgt / PAGE_SIZE, val) : val;
+
+ return val;
}
/*
@@ -437,6 +445,8 @@ static int try_context_readahead(struct address_space *mapping,
unsigned long max)
{
pgoff_t size;
+ unsigned long budgt = mapping->host->i_sb->s_bdev ?
+ blk_throttle_budgt(mapping->host->i_sb->s_bdev) : 0;
size = count_history_pages(mapping, index, max);
@@ -455,7 +465,7 @@ static int try_context_readahead(struct address_space *mapping,
size *= 2;
ra->start = index;
- ra->size = min(size + req_size, max);
+ ra->size = min3(budgt / PAGE_SIZE, size + req_size, max);
ra->async_size = 1;
return 1;
@@ -552,6 +562,8 @@ static void ondemand_readahead(struct readahead_control *ractl,
pgoff_t index = readahead_index(ractl);
pgoff_t expected, prev_index;
unsigned int order = folio ? folio_order(folio) : 0;
+ unsigned long budgt = ractl->mapping->host->i_sb->s_bdev ?
+ blk_throttle_budgt(ractl->mapping->host->i_sb->s_bdev) : 0;
/*
* If the request exceeds the readahead window, allow the read to
@@ -574,7 +586,7 @@ static void ondemand_readahead(struct readahead_control *ractl,
1UL << order);
if (index == expected || index == (ra->start + ra->size)) {
ra->start += ra->size;
- ra->size = get_next_ra_size(ra, max_pages);
+ ra->size = get_next_ra_size(ractl, max_pages);
ra->async_size = ra->size;
goto readit;
}
@@ -599,7 +611,7 @@ static void ondemand_readahead(struct readahead_control *ractl,
ra->start = start;
ra->size = start - index; /* old async_size */
ra->size += req_size;
- ra->size = get_next_ra_size(ra, max_pages);
+ ra->size = get_next_ra_size(ractl, max_pages);
ra->async_size = ra->size;
goto readit;
}
@@ -631,6 +643,9 @@ static void ondemand_readahead(struct readahead_control *ractl,
* standalone, small random read
* Read as is, and do not pollute the readahead state.
*/
+ if (budgt)
+ req_size = min(budgt / PAGE_SIZE, req_size);
+
do_page_cache_ra(ractl, req_size, 0);
return;
@@ -647,7 +662,7 @@ static void ondemand_readahead(struct readahead_control *ractl,
* Take care of maximum IO pages as above.
*/
if (index == ra->start && ra->size == ra->async_size) {
- add_pages = get_next_ra_size(ra, max_pages);
+ add_pages = get_next_ra_size(ractl, max_pages);
if (ra->size + add_pages <= max_pages) {
ra->async_size = add_pages;
ra->size += add_pages;
--
2.25.1
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: [RFC PATCH 2/2] mm: introduce budgt control in readahead
2024-05-15 1:23 ` [RFC PATCH 2/2] mm: introduce budgt control in readahead zhaoyang.huang
@ 2024-05-15 4:09 ` Matthew Wilcox
2024-05-15 6:31 ` Zhaoyang Huang
2024-05-15 7:40 ` Tejun Heo
1 sibling, 1 reply; 8+ messages in thread
From: Matthew Wilcox @ 2024-05-15 4:09 UTC (permalink / raw)
To: zhaoyang.huang
Cc: Andrew Morton, Jens Axboe, Tejun Heo, Josef Bacik, Baolin Wang,
linux-mm, linux-block, linux-kernel, cgroups, Zhaoyang Huang,
steve.kang
On Wed, May 15, 2024 at 09:23:50AM +0800, zhaoyang.huang wrote:
> + unsigned long budgt = inode->i_sb->s_bdev ?
> + blk_throttle_budgt(inode->i_sb->s_bdev) : 0;
NAK as previously explained.
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC PATCH 2/2] mm: introduce budgt control in readahead
2024-05-15 4:09 ` Matthew Wilcox
@ 2024-05-15 6:31 ` Zhaoyang Huang
0 siblings, 0 replies; 8+ messages in thread
From: Zhaoyang Huang @ 2024-05-15 6:31 UTC (permalink / raw)
To: Matthew Wilcox
Cc: zhaoyang.huang, Andrew Morton, Jens Axboe, Tejun Heo,
Josef Bacik, Baolin Wang, linux-mm, linux-block, linux-kernel,
cgroups, steve.kang
On Wed, May 15, 2024 at 12:09 PM Matthew Wilcox <willy@infradead.org> wrote:
>
> On Wed, May 15, 2024 at 09:23:50AM +0800, zhaoyang.huang wrote:
> > + unsigned long budgt = inode->i_sb->s_bdev ?
> > + blk_throttle_budgt(inode->i_sb->s_bdev) : 0;
>
> NAK as previously explained.
ok. But this commit could work by following the configuration of
blk-throttle as long as it works on btrfs with internal RAID on.
Furthermore, this will help the blkcg meet the desired BPS value
perfectly.
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC PATCH 2/2] mm: introduce budgt control in readahead
2024-05-15 1:23 ` [RFC PATCH 2/2] mm: introduce budgt control in readahead zhaoyang.huang
2024-05-15 4:09 ` Matthew Wilcox
@ 2024-05-15 7:40 ` Tejun Heo
2024-05-15 8:17 ` Zhaoyang Huang
1 sibling, 1 reply; 8+ messages in thread
From: Tejun Heo @ 2024-05-15 7:40 UTC (permalink / raw)
To: zhaoyang.huang
Cc: Andrew Morton, Matthew Wilcox, Jens Axboe, Josef Bacik,
Baolin Wang, linux-mm, linux-block, linux-kernel, cgroups,
Zhaoyang Huang, steve.kang
Hello,
On Wed, May 15, 2024 at 09:23:50AM +0800, zhaoyang.huang wrote:
> +static unsigned long get_next_ra_size(struct readahead_control *ractl,
> unsigned long max)
> {
> + unsigned long cur = ractl->ra->size;
> + struct inode *inode = ractl->mapping->host;
> + unsigned long budgt = inode->i_sb->s_bdev ?
> + blk_throttle_budgt(inode->i_sb->s_bdev) : 0;
Technical correctness aside, I'm not convinced it's generally a good idea to
bubble up one specific IO control mechanism's detail all the way upto RA
layer. Besides what's the gain here? For continuous IO stream, whether some
RA bios are oversized or not shouldn't matter, no? Doesn't this just affect
the accuracy of the last RA IO of a finite read stream?
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: [RFC PATCH 2/2] mm: introduce budgt control in readahead
2024-05-15 7:40 ` Tejun Heo
@ 2024-05-15 8:17 ` Zhaoyang Huang
0 siblings, 0 replies; 8+ messages in thread
From: Zhaoyang Huang @ 2024-05-15 8:17 UTC (permalink / raw)
To: Tejun Heo
Cc: zhaoyang.huang, Andrew Morton, Matthew Wilcox, Jens Axboe,
Josef Bacik, Baolin Wang, linux-mm, linux-block, linux-kernel,
cgroups, steve.kang
On Wed, May 15, 2024 at 3:40 PM Tejun Heo <tj@kernel.org> wrote:
>
> Hello,
>
> On Wed, May 15, 2024 at 09:23:50AM +0800, zhaoyang.huang wrote:
> > +static unsigned long get_next_ra_size(struct readahead_control *ractl,
> > unsigned long max)
> > {
> > + unsigned long cur = ractl->ra->size;
> > + struct inode *inode = ractl->mapping->host;
> > + unsigned long budgt = inode->i_sb->s_bdev ?
> > + blk_throttle_budgt(inode->i_sb->s_bdev) : 0;
>
> Technical correctness aside, I'm not convinced it's generally a good idea to
> bubble up one specific IO control mechanism's detail all the way upto RA
> layer. Besides what's the gain here? For continuous IO stream, whether some
> RA bios are oversized or not shouldn't matter, no? Doesn't this just affect
> the accuracy of the last RA IO of a finite read stream?
Thanks for feedback. If I understand right, the oversized RA bios of a
finite read will fail by being queued to tg's queue which should be
deemed as introducing a drop of IOPS.
submit_bio
blk_throtl_bio
if(!tg_may_dispatch) //failed, queue the bio to tg's queue
What we get here is a more precise BW of the throttled blkcg like
below, from which we can find the result of 'after' could exactly meet
the configured bps value and a little bit enhancement since there are
no hung(oversized) bios any more.
blkio.throttle.read_bps_device = 20MB/s
fio ... -numjobs=8 ...
before : IOPS=37.9k, BW=148MiB/s (155MB/s)(11.6GiB/80333msec)
after : IOPS=39.0k, BW=153MiB/s (160MB/s)(15.6GiB/104914msec)
before : clat (usec): min=4, max=1056.6k, avg=197.23, stdev=10080.69
after : clat (usec): min=4, max=193481, avg=188.83, stdev=4651.29
before : lat (usec): min=5, max=1056.6k, avg=200.48, stdev=10080.76
after : lat (usec): min=5, max=193483, avg=192.68, stdev=4651.87
blkio.throttle.read_bps_device = 30MB/s
fio ... -numjobs=8 ...
before : IOPS=57.2k, BW=224MiB/s (234MB/s)(15.6GiB/71561msec)
after : IOPS=58.5k, BW=229MiB/s (240MB/s)(15.6GiB/69996msec)
before : clat (usec): min=4, max=1105.5k, avg=126.20, stdev=6419.22
after : clat (usec): min=4, max=183956, avg=120.60, stdev=2957.28
before : lat (usec): min=5, max=1105.5k, avg=129.45, stdev=6419.29
after : lat (usec): min=5, max=183958, avg=124.40, stdev=2958.18
>
> Thanks. blk_throttle_budgt
>
> --
> tejun
^ permalink raw reply [flat|nested] 8+ messages in thread
* [RFC PATCH 0/2] introduce budgt control in readahead
@ 2024-05-09 2:39 zhaoyang.huang
2024-05-09 2:39 ` [RFC PATCH 1/2] block: introduce helper function to calculate bps budgt zhaoyang.huang
0 siblings, 1 reply; 8+ messages in thread
From: zhaoyang.huang @ 2024-05-09 2:39 UTC (permalink / raw)
To: Andrew Morton, Matthew Wilcox, Jens Axboe, Tejun Heo,
Josef Bacik, Baolin Wang, linux-mm, linux-block, linux-kernel,
cgroups, Zhaoyang Huang, steve.kang
From: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
Over-limit bw value is observed during fio test in the throttling group
which caused by over-sized bio as there is no control on ra->size during
readahead. This series patches would like to introduce the helper
function to provide the bytes limit and apply it on readahead.
Please find below for the fio test result on v6.6 which presents 2%-10%
improvement for BW and lat. Besides, we can also observed stable BW
instantaneous value during the test.
blkio.throttle.read_bps_device = 1MB/s
before: read: IOPS=223, BW=894KiB/s (915kB/s)(175MiB/200919msec)
after : read: IOPS=239, BW=960KiB/s (983kB/s)(153MiB/163105msec)
before: clat (usec): min=4, max=16795k, avg=4468.74, stdev=265746.14
lat (usec): min=6, max=16795k, avg=4470.57, stdev=265746.14
after : clat (usec): min=11, max=209193, avg=4105.22, stdev=27188.04
lat (usec): min=16, max=209197, avg=4120.03, stdev=27188.04
blkio.throttle.read_bps_device = 10MB/s
before: read: IOPS=2380, BW=9524KiB/s (9752kB/s)(1007MiB/108311msec)
after : read: IOPS=2438, BW=9754KiB/s (9989kB/s)(1680MiB/176405msec)
before: clat (usec): min=4, max=201817, avg=399.58, stdev=8268.85
lat (usec): min=6, max=201819, avg=402.10, stdev=8268.85
after : clat (usec): min=4, max=2494.6k, avg=412.72, stdev=25783.51
lat (usec): min=6, max=2494.6k, avg=414.48, stdev=25783.51
Zhaoyang Huang (2):
block: introduce helper function to calculate bps budgt
mm: introduce budgt control in readahead
block/blk-throttle.c | 44 ++++++++++++++++++++++++++++++++++++++
include/linux/blk-cgroup.h | 10 +++++++++
mm/readahead.c | 33 ++++++++++++++++++++--------
3 files changed, 78 insertions(+), 9 deletions(-)
--
2.25.1
^ permalink raw reply [flat|nested] 8+ messages in thread* [RFC PATCH 1/2] block: introduce helper function to calculate bps budgt
2024-05-09 2:39 [RFC PATCH 0/2] " zhaoyang.huang
@ 2024-05-09 2:39 ` zhaoyang.huang
0 siblings, 0 replies; 8+ messages in thread
From: zhaoyang.huang @ 2024-05-09 2:39 UTC (permalink / raw)
To: Andrew Morton, Matthew Wilcox, Jens Axboe, Tejun Heo,
Josef Bacik, Baolin Wang, linux-mm, linux-block, linux-kernel,
cgroups, Zhaoyang Huang, steve.kang
From: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
The 'over-sized' bio under blk throttle control will be delayed to
launch which breaks original IO timing and have the immediate BW be
not within the bps limit. Introduce a helper function to calculate block
device's budgt which provide the allowed bytes for current bio.
Signed-off-by: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
---
block/blk-throttle.c | 44 ++++++++++++++++++++++++++++++++++++++
include/linux/blk-cgroup.h | 10 +++++++++
2 files changed, 54 insertions(+)
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index f4850a6f860b..41c75258183d 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -10,6 +10,7 @@
#include <linux/blkdev.h>
#include <linux/bio.h>
#include <linux/blktrace_api.h>
+#include <linux/cgroup.h>
#include "blk.h"
#include "blk-cgroup-rwstat.h"
#include "blk-stat.h"
@@ -2365,6 +2366,49 @@ void blk_throtl_bio_endio(struct bio *bio)
}
#endif
+unsigned long blk_throttle_budgt(struct block_device *bdev)
+{
+ struct request_queue *q = bdev_get_queue(bdev);
+ struct blkcg *blkcg;
+ struct blkcg_gq *blkg;
+ struct throtl_grp *tg;
+ long long bytes_allowed = 0;
+ unsigned long jiffy_elapsed, jiffy_elapsed_rnd;
+ u64 bps_limit;
+
+ if (!q)
+ return U64_MAX;
+
+ rcu_read_lock();
+ spin_lock_irq(&q->queue_lock);
+ blkcg = css_to_blkcg(task_css(current, io_cgrp_id));
+ if (!blkcg)
+ goto out;
+
+ blkg = blkg_lookup(blkcg, q);
+ if (!blkg || !blkg_tryget(blkg))
+ goto out;
+
+ tg = blkg_to_tg(blkg);
+ bps_limit = tg_bps_limit(tg, READ);
+ if (bps_limit == U64_MAX)
+ goto out;
+
+ jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[READ];
+ if (!jiffy_elapsed)
+ jiffy_elapsed_rnd = tg->td->throtl_slice;
+
+ jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, tg->td->throtl_slice);
+ bytes_allowed = calculate_bytes_allowed(bps_limit, jiffy_elapsed_rnd) +
+ tg->carryover_bytes[READ];
+ blkg_put(blkg);
+out:
+ spin_unlock_irq(&q->queue_lock);
+ rcu_read_unlock();
+ return bytes_allowed;
+}
+
+
int blk_throtl_init(struct gendisk *disk)
{
struct request_queue *q = disk->queue;
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index dd5841a42c33..ba79fa464e0a 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -15,10 +15,12 @@
*/
#include <linux/types.h>
+#include <linux/limits.h>
struct bio;
struct cgroup_subsys_state;
struct gendisk;
+struct block_device;
#define FC_APPID_LEN 129
@@ -45,6 +47,14 @@ static inline struct cgroup_subsys_state *bio_blkcg_css(struct bio *bio)
}
#endif /* CONFIG_BLK_CGROUP */
+#ifdef CONFIG_BLK_DEV_THROTTLING
+unsigned long blk_throttle_budgt(struct block_device *bdev);
+#else
+static inline unsigned long blk_throttle_budgt(struct block_device *bdev)
+{
+ return U64_MAX;
+}
+#endif
int blkcg_set_fc_appid(char *app_id, u64 cgrp_id, size_t app_id_len);
char *blkcg_get_fc_appid(struct bio *bio);
--
2.25.1
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2024-05-15 8:17 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-05-15 1:23 [RFC PATCH 0/2] introduce precised blk-throttle control zhaoyang.huang
2024-05-15 1:23 ` [RFC PATCH 1/2] block: introduce helper function to calculate bps budgt zhaoyang.huang
2024-05-15 1:23 ` [RFC PATCH 2/2] mm: introduce budgt control in readahead zhaoyang.huang
2024-05-15 4:09 ` Matthew Wilcox
2024-05-15 6:31 ` Zhaoyang Huang
2024-05-15 7:40 ` Tejun Heo
2024-05-15 8:17 ` Zhaoyang Huang
-- strict thread matches above, loose matches on Subject: below --
2024-05-09 2:39 [RFC PATCH 0/2] " zhaoyang.huang
2024-05-09 2:39 ` [RFC PATCH 1/2] block: introduce helper function to calculate bps budgt zhaoyang.huang
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox