From: Baoquan He <bhe@redhat.com>
To: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-mm@kvack.org, Andrew Morton <akpm@linux-foundation.org>,
Christoph Hellwig <hch@lst.de>,
Uladzislau Rezki <urezki@gmail.com>,
Lorenzo Stoakes <lstoakes@gmail.com>,
Peter Zijlstra <peterz@infradead.org>
Subject: Re: [patch 6/6] mm/vmalloc: Dont purge usable blocks unnecessarily
Date: Wed, 24 May 2023 18:34:00 +0800 [thread overview]
Message-ID: <ZG3oGBMUueExFrro@MiWiFi-R3L-srv> (raw)
In-Reply-To: <20230523140002.852175941@linutronix.de>
On 05/23/23 at 04:02pm, Thomas Gleixner wrote:
> Purging fragmented blocks is done unconditionally in several contexts:
>
> 1) From drain_vmap_area_work(), when the number of lazy to be freed
> vmap_areas reached the threshold
>
> 2) Reclaiming vmalloc address space from pcpu_get_vm_areas()
>
> 3) _unmap_aliases()
>
> #1 There is no reason to zap fragmented vmap blocks unconditionally, simply
> because reclaiming all lazy areas drains at least
>
> 32MB * fls(num_online_cpus())
>
> per invocation which is plenty.
>
> #2 Reclaiming when running out of space or due to memory pressure makes a
> lot of sense
>
> #3 _unmap_aliases() requires to touch everything because the caller has no
> clue which vmap_area used a particular page last and the vmap_area lost
> that information too.
>
> Except for the vfree + VM_FLUSH_RESET_PERMS case, which removes the
> vmap area first and then cares about the flush. That in turn requires
> a full walk of _all_ vmap areas including the one which was just
> added to the purge list.
>
> But as this has to be flushed anyway this is an opportunity to combine
> outstanding TLB flushes and do the housekeeping of purging freed areas,
> but like #1 there is no real good reason to zap usable vmap blocks
> unconditionally.
>
> Add a @force_purge argument to the relevant functions and if not true only
> purge fragmented blocks which have less than 1/4 of their capacity left.
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
> mm/vmalloc.c | 34 ++++++++++++++++++++++------------
> 1 file changed, 22 insertions(+), 12 deletions(-)
>
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -791,7 +791,7 @@ get_subtree_max_size(struct rb_node *nod
> RB_DECLARE_CALLBACKS_MAX(static, free_vmap_area_rb_augment_cb,
> struct vmap_area, rb_node, unsigned long, subtree_max_size, va_size)
>
> -static void purge_vmap_area_lazy(void);
> +static void purge_vmap_area_lazy(bool force_purge);
> static BLOCKING_NOTIFIER_HEAD(vmap_notify_list);
> static void drain_vmap_area_work(struct work_struct *work);
> static DECLARE_WORK(drain_vmap_work, drain_vmap_area_work);
> @@ -1649,7 +1649,7 @@ static struct vmap_area *alloc_vmap_area
>
> overflow:
> if (!purged) {
> - purge_vmap_area_lazy();
> + purge_vmap_area_lazy(true);
> purged = 1;
> goto retry;
> }
> @@ -1717,7 +1717,7 @@ static atomic_long_t vmap_lazy_nr = ATOM
> static DEFINE_MUTEX(vmap_purge_lock);
>
> /* for per-CPU blocks */
> -static void purge_fragmented_blocks_allcpus(void);
> +static void purge_fragmented_blocks_allcpus(bool force_purge);
>
> /*
> * Purges all lazily-freed vmap areas.
> @@ -1787,10 +1787,10 @@ static bool __purge_vmap_area_lazy(unsig
> /*
> * Kick off a purge of the outstanding lazy areas.
> */
> -static void purge_vmap_area_lazy(void)
> +static void purge_vmap_area_lazy(bool force_purge)
> {
> mutex_lock(&vmap_purge_lock);
> - purge_fragmented_blocks_allcpus();
> + purge_fragmented_blocks_allcpus(force_purge);
> __purge_vmap_area_lazy(ULONG_MAX, 0);
> mutex_unlock(&vmap_purge_lock);
> }
> @@ -1908,6 +1908,12 @@ static struct vmap_area *find_unlink_vma
>
> #define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE)
>
> +/*
> + * Purge threshold to prevent overeager purging of fragmented blocks for
> + * regular operations: Purge if vb->free is less than 1/4 of the capacity.
> + */
> +#define VMAP_PURGE_THRESHOLD (VMAP_BBMAP_BITS / 4)
> +
> #define VMAP_RAM 0x1 /* indicates vm_map_ram area*/
> #define VMAP_BLOCK 0x2 /* mark out the vmap_block sub-type*/
> #define VMAP_FLAGS_MASK 0x3
> @@ -2087,12 +2093,16 @@ static void free_vmap_block(struct vmap_
> }
>
> static bool purge_fragmented_block(struct vmap_block *vb, struct vmap_block_queue *vbq,
> - struct list_head *purge_list)
> + struct list_head *purge_list, bool force_purge)
> {
> if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS))
> return false;
>
> - /* prevent further allocs after releasing lock */
> + /* Don't overeagerly purge usable blocks unless requested */
> + if (!force_purge && vb->free < VMAP_PURGE_THRESHOLD)
> + return false;
> +
> + /* prevent further allocs after releasing lock */
> WRITE_ONCE(vb->free, 0);
> /* prevent purging it again */
> WRITE_ONCE(vb->dirty, VMAP_BBMAP_BITS);
> @@ -2115,7 +2125,7 @@ static void free_purged_blocks(struct li
> }
> }
>
> -static void purge_fragmented_blocks(int cpu)
> +static void purge_fragmented_blocks(int cpu, bool force_purge)
> {
> LIST_HEAD(purge);
> struct vmap_block *vb;
> @@ -2130,19 +2140,19 @@ static void purge_fragmented_blocks(int
> continue;
>
> spin_lock(&vb->lock);
> - purge_fragmented_block(vb, vbq, &purge);
> + purge_fragmented_block(vb, vbq, &purge, force_purge);
> spin_unlock(&vb->lock);
> }
> rcu_read_unlock();
> free_purged_blocks(&purge);
> }
>
> -static void purge_fragmented_blocks_allcpus(void)
> +static void purge_fragmented_blocks_allcpus(bool force_purge)
> {
> int cpu;
>
> for_each_possible_cpu(cpu)
> - purge_fragmented_blocks(cpu);
> + purge_fragmented_blocks(cpu, force_purge);
> }
>
> static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
> @@ -4173,7 +4183,7 @@ struct vm_struct **pcpu_get_vm_areas(con
> overflow:
> spin_unlock(&free_vmap_area_lock);
> if (!purged) {
> - purge_vmap_area_lazy();
> + purge_vmap_area_lazy(true);
> purged = true;
>
> /* Before "retry", check if we recover. */
Wondering why bothering to add 'force_purge' to purge_vmap_area_lazy(),
purge_fragmented_blocks_allcpus() if they are all true. Can't we set
'force_purge' as true for purge_fragmented_block() in
purge_fragmented_blocks()?
alloc_vmap_area()
pcpu_get_vm_areas()
-->purge_vmap_area_lazy(true)
-->purge_fragmented_blocks_allcpus(force_purge=true)
-->purge_fragmented_block(force_purge=true)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 062f4a86b049..c812f8afa985 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2140,7 +2140,7 @@ static void purge_fragmented_blocks(int cpu, bool force_purge)
continue;
spin_lock(&vb->lock);
- purge_fragmented_block(vb, vbq, &purge, force_purge);
+ purge_fragmented_block(vb, vbq, &purge, true);
spin_unlock(&vb->lock);
}
rcu_read_unlock();
And one place of change is missing, it will fail building.
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 062f4a86b049..0453bc66812e 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2277,7 +2277,7 @@ static void _vm_unmap_aliases(unsigned long start, unsigned long end, int flush)
* not purgeable, check whether there is dirty
* space to be flushed.
*/
- if (!purge_fragmented_block(vb, vbq, &purge_list) &&
+ if (!purge_fragmented_block(vb, vbq, &purge_list, false) &&
vb->dirty_max && vb->dirty != VMAP_BBMAP_BITS) {
unsigned long va_start = vb->va->va_start;
unsigned long s, e;
>
next prev parent reply other threads:[~2023-05-24 10:34 UTC|newest]
Thread overview: 43+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-05-23 14:02 [patch 0/6] mm/vmalloc: Assorted fixes and improvements Thomas Gleixner
2023-05-23 14:02 ` [patch 1/6] mm/vmalloc: Prevent stale TLBs in fully utilized blocks Thomas Gleixner
2023-05-23 15:17 ` Christoph Hellwig
2023-05-23 16:40 ` Thomas Gleixner
2023-05-23 16:47 ` Uladzislau Rezki
2023-05-23 19:18 ` Lorenzo Stoakes
2023-05-24 9:19 ` Uladzislau Rezki
2023-05-24 9:25 ` Baoquan He
2023-05-24 9:51 ` Thomas Gleixner
2023-05-24 11:24 ` Baoquan He
2023-05-24 11:26 ` Baoquan He
2023-05-24 11:36 ` Uladzislau Rezki
2023-05-24 12:49 ` Thomas Gleixner
2023-05-24 12:44 ` Thomas Gleixner
2023-05-24 13:41 ` Baoquan He
2023-05-24 14:31 ` Thomas Gleixner
2023-05-24 9:32 ` Baoquan He
2023-05-24 9:52 ` Thomas Gleixner
2023-05-24 14:10 ` Baoquan He
2023-05-24 14:35 ` Thomas Gleixner
2023-05-23 14:02 ` [patch 2/6] mm/vmalloc: Avoid iterating over per CPU vmap blocks twice Thomas Gleixner
2023-05-23 15:21 ` Christoph Hellwig
2023-05-23 14:02 ` [patch 3/6] mm/vmalloc: Prevent flushing dirty space over and over Thomas Gleixner
2023-05-23 15:27 ` Christoph Hellwig
2023-05-23 16:10 ` Thomas Gleixner
2023-05-24 9:43 ` Baoquan He
2023-05-23 14:02 ` [patch 4/6] mm/vmalloc: Check free space in vmap_block lockless Thomas Gleixner
2023-05-23 15:29 ` Christoph Hellwig
2023-05-23 16:17 ` Thomas Gleixner
2023-05-24 9:20 ` Uladzislau Rezki
2023-05-23 14:02 ` [patch 5/6] mm/vmalloc: Add missing READ/WRITE_ONCE() annotations Thomas Gleixner
2023-05-24 9:15 ` Uladzislau Rezki
2023-05-23 14:02 ` [patch 6/6] mm/vmalloc: Dont purge usable blocks unnecessarily Thomas Gleixner
2023-05-23 15:30 ` Christoph Hellwig
2023-05-24 10:34 ` Baoquan He [this message]
2023-05-24 12:55 ` Thomas Gleixner
2023-05-23 16:24 ` [patch 0/6] mm/vmalloc: Assorted fixes and improvements Uladzislau Rezki
2023-05-23 17:33 ` Thomas Gleixner
2023-05-23 17:39 ` Thomas Gleixner
2023-05-23 17:48 ` Uladzislau Rezki
2023-05-23 17:51 ` Uladzislau Rezki
2023-05-23 17:55 ` Uladzislau Rezki
2023-05-23 18:40 ` Thomas Gleixner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=ZG3oGBMUueExFrro@MiWiFi-R3L-srv \
--to=bhe@redhat.com \
--cc=akpm@linux-foundation.org \
--cc=hch@lst.de \
--cc=linux-mm@kvack.org \
--cc=lstoakes@gmail.com \
--cc=peterz@infradead.org \
--cc=tglx@linutronix.de \
--cc=urezki@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox