From: Greg Thelen <gthelen@google.com>
To: Glauber Costa <glommer@parallels.com>
Cc: linux-mm@kvack.org, cgroups@vger.kernel.org,
Dave Shrinnker <david@fromorbit.com>,
Serge Hallyn <serge.hallyn@canonical.com>,
kamezawa.hiroyu@jp.fujitsu.com, Michal Hocko <mhocko@suse.cz>,
Johannes Weiner <hannes@cmpxchg.org>,
Andrew Morton <akpm@linux-foundation.org>,
hughd@google.com, linux-fsdevel@vger.kernel.org,
containers@lists.linux-foundation.org,
Dave Chinner <dchinner@redhat.com>
Subject: Re: [PATCH v3 11/32] list_lru: per-node list infrastructure
Date: Sun, 14 Apr 2013 22:37:51 -0700 [thread overview]
Message-ID: <xr93k3o48ic0.fsf@gthelen.mtv.corp.google.com> (raw)
In-Reply-To: <1365429659-22108-12-git-send-email-glommer@parallels.com>
On Mon, Apr 08 2013, Glauber Costa wrote:
> From: Dave Chinner <dchinner@redhat.com>
>
> Now that we have an LRU list API, we can start to enhance the
> implementation. This splits the single LRU list into per-node lists
> and locks to enhance scalability. Items are placed on lists
> according to the node the memory belongs to. To make scanning the
> lists efficient, also track whether the per-node lists have entries
> in them in a active nodemask.
>
> [ glommer: fixed warnings ]
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> Signed-off-by: Glauber Costa <glommer@parallels.com>
Reviewed-by: Greg Thelen <gthelen@google.com>
(one comment below regarding a potentially unnecessary spinlock)
> ---
> include/linux/list_lru.h | 14 ++--
> lib/list_lru.c | 162 +++++++++++++++++++++++++++++++++++------------
> 2 files changed, 130 insertions(+), 46 deletions(-)
>
> diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
> index 394c28c..9073f97 100644
> --- a/include/linux/list_lru.h
> +++ b/include/linux/list_lru.h
> @@ -8,6 +8,7 @@
> #define _LRU_LIST_H
>
> #include <linux/list.h>
> +#include <linux/nodemask.h>
>
> enum lru_status {
> LRU_REMOVED, /* item removed from list */
> @@ -16,20 +17,21 @@ enum lru_status {
> LRU_RETRY, /* item not freeable, lock dropped */
> };
>
> -struct list_lru {
> +struct list_lru_node {
> spinlock_t lock;
> struct list_head list;
> long nr_items;
> +} ____cacheline_aligned_in_smp;
> +
> +struct list_lru {
> + struct list_lru_node node[MAX_NUMNODES];
> + nodemask_t active_nodes;
> };
>
> int list_lru_init(struct list_lru *lru);
> int list_lru_add(struct list_lru *lru, struct list_head *item);
> int list_lru_del(struct list_lru *lru, struct list_head *item);
> -
> -static inline long list_lru_count(struct list_lru *lru)
> -{
> - return lru->nr_items;
> -}
> +long list_lru_count(struct list_lru *lru);
>
> typedef enum lru_status
> (*list_lru_walk_cb)(struct list_head *item, spinlock_t *lock, void *cb_arg);
> diff --git a/lib/list_lru.c b/lib/list_lru.c
> index 03bd984..0119af8 100644
> --- a/lib/list_lru.c
> +++ b/lib/list_lru.c
> @@ -6,6 +6,7 @@
> */
> #include <linux/kernel.h>
> #include <linux/module.h>
> +#include <linux/mm.h>
> #include <linux/list_lru.h>
>
> int
> @@ -13,14 +14,19 @@ list_lru_add(
> struct list_lru *lru,
> struct list_head *item)
> {
> - spin_lock(&lru->lock);
> + int nid = page_to_nid(virt_to_page(item));
> + struct list_lru_node *nlru = &lru->node[nid];
> +
> + spin_lock(&nlru->lock);
> + BUG_ON(nlru->nr_items < 0);
> if (list_empty(item)) {
> - list_add_tail(item, &lru->list);
> - lru->nr_items++;
> - spin_unlock(&lru->lock);
> + list_add_tail(item, &nlru->list);
> + if (nlru->nr_items++ == 0)
> + node_set(nid, lru->active_nodes);
> + spin_unlock(&nlru->lock);
> return 1;
> }
> - spin_unlock(&lru->lock);
> + spin_unlock(&nlru->lock);
> return 0;
> }
> EXPORT_SYMBOL_GPL(list_lru_add);
> @@ -30,43 +36,72 @@ list_lru_del(
> struct list_lru *lru,
> struct list_head *item)
> {
> - spin_lock(&lru->lock);
> + int nid = page_to_nid(virt_to_page(item));
> + struct list_lru_node *nlru = &lru->node[nid];
> +
> + spin_lock(&nlru->lock);
> if (!list_empty(item)) {
> list_del_init(item);
> - lru->nr_items--;
> - spin_unlock(&lru->lock);
> + if (--nlru->nr_items == 0)
> + node_clear(nid, lru->active_nodes);
> + BUG_ON(nlru->nr_items < 0);
> + spin_unlock(&nlru->lock);
> return 1;
> }
> - spin_unlock(&lru->lock);
> + spin_unlock(&nlru->lock);
> return 0;
> }
> EXPORT_SYMBOL_GPL(list_lru_del);
>
> long
> -list_lru_walk(
> - struct list_lru *lru,
> - list_lru_walk_cb isolate,
> - void *cb_arg,
> - long nr_to_walk)
> +list_lru_count(
> + struct list_lru *lru)
> {
> + long count = 0;
> + int nid;
> +
> + for_each_node_mask(nid, lru->active_nodes) {
> + struct list_lru_node *nlru = &lru->node[nid];
> +
> + spin_lock(&nlru->lock);
I'm not sure if the spin_lock() is really needed here. It wasn't
grabbed before this patch.
> + BUG_ON(nlru->nr_items < 0);
> + count += nlru->nr_items;
> + spin_unlock(&nlru->lock);
> + }
> +
> + return count;
> +}
> +EXPORT_SYMBOL_GPL(list_lru_count);
> +
> +static long
> +list_lru_walk_node(
> + struct list_lru *lru,
> + int nid,
> + list_lru_walk_cb isolate,
> + void *cb_arg,
> + long *nr_to_walk)
> +{
> + struct list_lru_node *nlru = &lru->node[nid];
> struct list_head *item, *n;
> - long removed = 0;
> + long isolated = 0;
> restart:
> - spin_lock(&lru->lock);
> - list_for_each_safe(item, n, &lru->list) {
> + spin_lock(&nlru->lock);
> + list_for_each_safe(item, n, &nlru->list) {
> int ret;
>
> - if (nr_to_walk-- < 0)
> + if ((*nr_to_walk)-- < 0)
> break;
>
> - ret = isolate(item, &lru->lock, cb_arg);
> + ret = isolate(item, &nlru->lock, cb_arg);
> switch (ret) {
> case LRU_REMOVED:
> - lru->nr_items--;
> - removed++;
> + if (--nlru->nr_items == 0)
> + node_clear(nid, lru->active_nodes);
> + BUG_ON(nlru->nr_items < 0);
> + isolated++;
> break;
> case LRU_ROTATE:
> - list_move_tail(item, &lru->list);
> + list_move_tail(item, &nlru->list);
> break;
> case LRU_SKIP:
> break;
> @@ -76,42 +111,89 @@ restart:
> BUG();
> }
> }
> - spin_unlock(&lru->lock);
> - return removed;
> + spin_unlock(&nlru->lock);
> + return isolated;
> }
> -EXPORT_SYMBOL_GPL(list_lru_walk);
>
> long
> -list_lru_dispose_all(
> - struct list_lru *lru,
> - list_lru_dispose_cb dispose)
> +list_lru_walk(
> + struct list_lru *lru,
> + list_lru_walk_cb isolate,
> + void *cb_arg,
> + long nr_to_walk)
> {
> - long disposed = 0;
> + long isolated = 0;
> + int nid;
> +
> + for_each_node_mask(nid, lru->active_nodes) {
> + isolated += list_lru_walk_node(lru, nid, isolate,
> + cb_arg, &nr_to_walk);
> + if (nr_to_walk <= 0)
> + break;
> + }
> + return isolated;
> +}
> +EXPORT_SYMBOL_GPL(list_lru_walk);
> +
> +static long
> +list_lru_dispose_all_node(
> + struct list_lru *lru,
> + int nid,
> + list_lru_dispose_cb dispose)
> +{
> + struct list_lru_node *nlru = &lru->node[nid];
> LIST_HEAD(dispose_list);
> + long disposed = 0;
>
> - spin_lock(&lru->lock);
> - while (!list_empty(&lru->list)) {
> - list_splice_init(&lru->list, &dispose_list);
> - disposed += lru->nr_items;
> - lru->nr_items = 0;
> - spin_unlock(&lru->lock);
> + spin_lock(&nlru->lock);
> + while (!list_empty(&nlru->list)) {
> + list_splice_init(&nlru->list, &dispose_list);
> + disposed += nlru->nr_items;
> + nlru->nr_items = 0;
> + node_clear(nid, lru->active_nodes);
> + spin_unlock(&nlru->lock);
>
> dispose(&dispose_list);
>
> - spin_lock(&lru->lock);
> + spin_lock(&nlru->lock);
> }
> - spin_unlock(&lru->lock);
> + spin_unlock(&nlru->lock);
> return disposed;
> }
>
> +long
> +list_lru_dispose_all(
> + struct list_lru *lru,
> + list_lru_dispose_cb dispose)
> +{
> + long disposed;
> + long total = 0;
> + int nid;
> +
> + do {
> + disposed = 0;
> + for_each_node_mask(nid, lru->active_nodes) {
> + disposed += list_lru_dispose_all_node(lru, nid,
> + dispose);
> + }
> + total += disposed;
> + } while (disposed != 0);
> +
> + return total;
> +}
> +
> int
> list_lru_init(
> struct list_lru *lru)
> {
> - spin_lock_init(&lru->lock);
> - INIT_LIST_HEAD(&lru->list);
> - lru->nr_items = 0;
> + int i;
>
> + nodes_clear(lru->active_nodes);
> + for (i = 0; i < MAX_NUMNODES; i++) {
> + spin_lock_init(&lru->node[i].lock);
> + INIT_LIST_HEAD(&lru->node[i].list);
> + lru->node[i].nr_items = 0;
> + }
> return 0;
> }
> EXPORT_SYMBOL_GPL(list_lru_init);
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2013-04-15 5:37 UTC|newest]
Thread overview: 45+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-04-08 14:00 [PATCH v3 00/32] memcg-aware slab shrinking with lasers and numbers Glauber Costa
2013-04-08 14:00 ` [PATCH v3 01/32] super: fix calculation of shrinkable objects for small numbers Glauber Costa
2013-04-08 14:00 ` [PATCH v3 02/32] vmscan: take at least one pass with shrinkers Glauber Costa
2013-04-08 14:00 ` [PATCH v3 03/32] dcache: convert dentry_stat.nr_unused to per-cpu counters Glauber Costa
2013-04-08 14:00 ` [PATCH v3 04/32] dentry: move to per-sb LRU locks Glauber Costa
2013-04-08 14:00 ` [PATCH v3 05/32] dcache: remove dentries from LRU before putting on dispose list Glauber Costa
2013-04-08 14:00 ` [PATCH v3 06/32] mm: new shrinker API Glauber Costa
2013-04-08 14:00 ` [PATCH v3 07/32] shrinker: convert superblock shrinkers to new API Glauber Costa
2013-04-08 14:00 ` [PATCH v3 08/32] list: add a new LRU list type Glauber Costa
2013-04-15 5:35 ` Greg Thelen
2013-04-15 17:56 ` Greg Thelen
2013-04-16 14:43 ` Glauber Costa
2013-04-08 14:00 ` [PATCH v3 09/32] inode: convert inode lru list to generic lru list code Glauber Costa
2013-04-08 14:00 ` [PATCH v3 10/32] dcache: convert to use new lru list infrastructure Glauber Costa
2013-04-08 14:00 ` [PATCH v3 11/32] list_lru: per-node " Glauber Costa
2013-04-15 5:37 ` Greg Thelen [this message]
2013-04-08 14:00 ` [PATCH v3 12/32] shrinker: add node awareness Glauber Costa
2013-04-15 5:38 ` Greg Thelen
2013-04-08 14:00 ` [PATCH v3 13/32] fs: convert inode and dentry shrinking to be node aware Glauber Costa
2013-04-08 14:00 ` [PATCH v3 14/32] xfs: convert buftarg LRU to generic code Glauber Costa
2013-04-15 5:38 ` Greg Thelen
2013-04-15 10:14 ` Glauber Costa
2013-04-08 14:00 ` [PATCH v3 15/32] xfs: convert dquot cache lru to list_lru Glauber Costa
2013-04-08 14:00 ` [PATCH v3 16/32] fs: convert fs shrinkers to new scan/count API Glauber Costa
2013-04-08 14:00 ` [PATCH v3 17/32] drivers: convert shrinkers to new count/scan API Glauber Costa
2013-04-08 14:00 ` [PATCH v3 18/32] shrinker: convert remaining shrinkers to " Glauber Costa
2013-04-08 14:00 ` [PATCH v3 19/32] hugepage: convert huge zero page shrinker to new shrinker API Glauber Costa
2013-04-15 5:38 ` Greg Thelen
2013-04-15 8:10 ` Kirill A. Shutemov
2013-04-08 14:00 ` [PATCH v3 20/32] shrinker: Kill old ->shrink API Glauber Costa
2013-04-15 5:38 ` Greg Thelen
2013-04-08 14:00 ` [PATCH v3 21/32] vmscan: also shrink slab in memcg pressure Glauber Costa
2013-04-08 14:00 ` [PATCH v3 22/32] memcg,list_lru: duplicate LRUs upon kmemcg creation Glauber Costa
2013-04-08 14:00 ` [PATCH v3 23/32] lru: add an element to a memcg list Glauber Costa
2013-04-08 14:00 ` [PATCH v3 24/32] list_lru: also include memcg lists in counts and scans Glauber Costa
2013-04-08 14:00 ` [PATCH v3 25/32] list_lru: per-memcg walks Glauber Costa
2013-04-08 14:00 ` [PATCH v3 26/32] memcg: per-memcg kmem shrinking Glauber Costa
2013-04-08 14:00 ` [PATCH v3 27/32] list_lru: reclaim proportionaly between memcgs and nodes Glauber Costa
2013-04-08 14:00 ` [PATCH v3 28/32] memcg: scan cache objects hierarchically Glauber Costa
2013-04-08 14:00 ` [PATCH v3 29/32] memcg: move initialization to memcg creation Glauber Costa
2013-04-08 14:00 ` [PATCH v3 30/32] memcg: shrink dead memcgs upon global memory pressure Glauber Costa
2013-04-08 14:00 ` [PATCH v3 31/32] super: targeted memcg reclaim Glauber Costa
2013-04-08 14:00 ` [PATCH v3 32/32] memcg: debugging facility to access dangling memcgs Glauber Costa
2013-04-08 20:51 ` [PATCH v3 00/32] memcg-aware slab shrinking with lasers and numbers Andrew Morton
2013-04-09 7:25 ` Glauber Costa
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=xr93k3o48ic0.fsf@gthelen.mtv.corp.google.com \
--to=gthelen@google.com \
--cc=akpm@linux-foundation.org \
--cc=cgroups@vger.kernel.org \
--cc=containers@lists.linux-foundation.org \
--cc=david@fromorbit.com \
--cc=dchinner@redhat.com \
--cc=glommer@parallels.com \
--cc=hannes@cmpxchg.org \
--cc=hughd@google.com \
--cc=kamezawa.hiroyu@jp.fujitsu.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mhocko@suse.cz \
--cc=serge.hallyn@canonical.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox