From: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
To: David Rientjes <rientjes@google.com>
Cc: linux-mm@kvack.org, linux-numa@vger.kernel.org,
Andrew Morton <akpm@linux-foundation.org>,
Mel Gorman <mel@csn.ul.ie>,
Randy Dunlap <randy.dunlap@oracle.com>,
Nishanth Aravamudan <nacc@us.ibm.com>,
Andi Kleen <andi@firstfloor.org>, Adam Litke <agl@us.ibm.com>,
Andy Whitcroft <apw@canonical.com>,
eric.whitney@hp.com
Subject: Re: [PATCH 7/12] hugetlb: add per node hstate attributes
Date: Fri, 09 Oct 2009 08:57:07 -0400 [thread overview]
Message-ID: <1255093027.14370.35.camel@useless.americas.hpqcorp.net> (raw)
In-Reply-To: <alpine.DEB.1.00.0910081339391.4765@chino.kir.corp.google.com>
On Thu, 2009-10-08 at 13:42 -0700, David Rientjes wrote:
> On Thu, 8 Oct 2009, Lee Schermerhorn wrote:
>
> > Index: linux-2.6.31-mmotm-090925-1435/mm/hugetlb.c
> > ===================================================================
> > --- linux-2.6.31-mmotm-090925-1435.orig/mm/hugetlb.c 2009-10-07 12:31:59.000000000 -0400
> > +++ linux-2.6.31-mmotm-090925-1435/mm/hugetlb.c 2009-10-07 12:32:01.000000000 -0400
> > @@ -24,6 +24,7 @@
> > #include <asm/io.h>
> >
> > #include <linux/hugetlb.h>
> > +#include <linux/node.h>
> > #include "internal.h"
> >
> > const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
> > @@ -1320,39 +1321,71 @@ out:
> > static struct kobject *hugepages_kobj;
> > static struct kobject *hstate_kobjs[HUGE_MAX_HSTATE];
> >
> > -static struct hstate *kobj_to_hstate(struct kobject *kobj)
> > +static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp);
> > +
> > +static struct hstate *kobj_to_hstate(struct kobject *kobj, int *nidp)
> > {
> > int i;
> > +
> > for (i = 0; i < HUGE_MAX_HSTATE; i++)
> > - if (hstate_kobjs[i] == kobj)
> > + if (hstate_kobjs[i] == kobj) {
> > + if (nidp)
> > + *nidp = NUMA_NO_NODE;
> > return &hstates[i];
> > - BUG();
> > - return NULL;
> > + }
> > +
> > + return kobj_to_node_hstate(kobj, nidp);
> > }
> >
> > static ssize_t nr_hugepages_show_common(struct kobject *kobj,
> > struct kobj_attribute *attr, char *buf)
> > {
> > - struct hstate *h = kobj_to_hstate(kobj);
> > - return sprintf(buf, "%lu\n", h->nr_huge_pages);
> > + struct hstate *h;
> > + unsigned long nr_huge_pages;
> > + int nid;
> > +
> > + h = kobj_to_hstate(kobj, &nid);
> > + if (nid == NUMA_NO_NODE)
> > + nr_huge_pages = h->nr_huge_pages;
> > + else
> > + nr_huge_pages = h->nr_huge_pages_node[nid];
> > +
> > + return sprintf(buf, "%lu\n", nr_huge_pages);
> > }
> > static ssize_t nr_hugepages_store_common(bool obey_mempolicy,
> > struct kobject *kobj, struct kobj_attribute *attr,
> > const char *buf, size_t len)
> > {
> > int err;
> > + int nid;
> > unsigned long count;
> > - struct hstate *h = kobj_to_hstate(kobj);
> > + struct hstate *h;
> > NODEMASK_ALLOC(nodemask_t, nodes_allowed);
> >
> > err = strict_strtoul(buf, 10, &count);
> > if (err)
> > return 0;
> >
> > - if (!(obey_mempolicy && init_nodemask_of_mempolicy(nodes_allowed))) {
> > - NODEMASK_FREE(nodes_allowed);
> > - nodes_allowed = &node_online_map;
> > - }
> > + h = kobj_to_hstate(kobj, &nid);
> > + if (nid == NUMA_NO_NODE) {
> > + /*
> > + * global hstate attribute
> > + */
> > + if (!(obey_mempolicy &&
> > + init_nodemask_of_mempolicy(nodes_allowed))) {
> > + NODEMASK_FREE(nodes_allowed);
> > + nodes_allowed = &node_states[N_HIGH_MEMORY];
> > + }
> > + } else if (nodes_allowed) {
> > + /*
> > + * per node hstate attribute: adjust count to global,
> > + * but restrict alloc/free to the specified node.
> > + */
> > + count += h->nr_huge_pages - h->nr_huge_pages_node[nid];
> > + init_nodemask_of_node(nodes_allowed, nid);
> > + } else
> > + nodes_allowed = &node_states[N_HIGH_MEMORY];
> > +
> > h->max_huge_pages = set_max_huge_pages(h, count, nodes_allowed);
> >
> > if (nodes_allowed != &node_online_map)
> > @@ -1398,7 +1431,7 @@ HSTATE_ATTR(nr_hugepages_mempolicy);
> > static ssize_t nr_overcommit_hugepages_show(struct kobject *kobj,
> > struct kobj_attribute *attr, char *buf)
> > {
> > - struct hstate *h = kobj_to_hstate(kobj);
> > + struct hstate *h = kobj_to_hstate(kobj, NULL);
> > return sprintf(buf, "%lu\n", h->nr_overcommit_huge_pages);
> > }
> > static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj,
> > @@ -1406,7 +1439,7 @@ static ssize_t nr_overcommit_hugepages_s
> > {
> > int err;
> > unsigned long input;
> > - struct hstate *h = kobj_to_hstate(kobj);
> > + struct hstate *h = kobj_to_hstate(kobj, NULL);
> >
> > err = strict_strtoul(buf, 10, &input);
> > if (err)
> > @@ -1423,15 +1456,24 @@ HSTATE_ATTR(nr_overcommit_hugepages);
> > static ssize_t free_hugepages_show(struct kobject *kobj,
> > struct kobj_attribute *attr, char *buf)
> > {
> > - struct hstate *h = kobj_to_hstate(kobj);
> > - return sprintf(buf, "%lu\n", h->free_huge_pages);
> > + struct hstate *h;
> > + unsigned long free_huge_pages;
> > + int nid;
> > +
> > + h = kobj_to_hstate(kobj, &nid);
> > + if (nid == NUMA_NO_NODE)
> > + free_huge_pages = h->free_huge_pages;
> > + else
> > + free_huge_pages = h->free_huge_pages_node[nid];
> > +
> > + return sprintf(buf, "%lu\n", free_huge_pages);
> > }
> > HSTATE_ATTR_RO(free_hugepages);
> >
> > static ssize_t resv_hugepages_show(struct kobject *kobj,
> > struct kobj_attribute *attr, char *buf)
> > {
> > - struct hstate *h = kobj_to_hstate(kobj);
> > + struct hstate *h = kobj_to_hstate(kobj, NULL);
> > return sprintf(buf, "%lu\n", h->resv_huge_pages);
> > }
> > HSTATE_ATTR_RO(resv_hugepages);
> > @@ -1439,8 +1481,17 @@ HSTATE_ATTR_RO(resv_hugepages);
> > static ssize_t surplus_hugepages_show(struct kobject *kobj,
> > struct kobj_attribute *attr, char *buf)
> > {
> > - struct hstate *h = kobj_to_hstate(kobj);
> > - return sprintf(buf, "%lu\n", h->surplus_huge_pages);
> > + struct hstate *h;
> > + unsigned long surplus_huge_pages;
> > + int nid;
> > +
> > + h = kobj_to_hstate(kobj, &nid);
> > + if (nid == NUMA_NO_NODE)
> > + surplus_huge_pages = h->surplus_huge_pages;
> > + else
> > + surplus_huge_pages = h->surplus_huge_pages_node[nid];
> > +
> > + return sprintf(buf, "%lu\n", surplus_huge_pages);
> > }
> > HSTATE_ATTR_RO(surplus_hugepages);
> >
> > @@ -1460,19 +1511,21 @@ static struct attribute_group hstate_att
> > .attrs = hstate_attrs,
> > };
> >
> > -static int __init hugetlb_sysfs_add_hstate(struct hstate *h)
> > +static int __init hugetlb_sysfs_add_hstate(struct hstate *h,
> > + struct kobject *parent,
> > + struct kobject **hstate_kobjs,
> > + struct attribute_group *hstate_attr_group)
> > {
> > int retval;
> > + int hi = h - hstates;
> >
> > - hstate_kobjs[h - hstates] = kobject_create_and_add(h->name,
> > - hugepages_kobj);
> > - if (!hstate_kobjs[h - hstates])
> > + hstate_kobjs[hi] = kobject_create_and_add(h->name, parent);
> > + if (!hstate_kobjs[hi])
> > return -ENOMEM;
> >
> > - retval = sysfs_create_group(hstate_kobjs[h - hstates],
> > - &hstate_attr_group);
> > + retval = sysfs_create_group(hstate_kobjs[hi], hstate_attr_group);
> > if (retval)
> > - kobject_put(hstate_kobjs[h - hstates]);
> > + kobject_put(hstate_kobjs[hi]);
> >
> > return retval;
> > }
> > @@ -1487,17 +1540,184 @@ static void __init hugetlb_sysfs_init(vo
> > return;
> >
> > for_each_hstate(h) {
> > - err = hugetlb_sysfs_add_hstate(h);
> > + err = hugetlb_sysfs_add_hstate(h, hugepages_kobj,
> > + hstate_kobjs, &hstate_attr_group);
> > if (err)
> > printk(KERN_ERR "Hugetlb: Unable to add hstate %s",
> > h->name);
> > }
> > }
> >
> > +#ifdef CONFIG_NUMA
> > +
> > +/*
> > + * node_hstate/s - associate per node hstate attributes, via their kobjects,
> > + * with node sysdevs in node_devices[] using a parallel array. The array
> > + * index of a node sysdev or _hstate == node id.
> > + * This is here to avoid any static dependency of the node sysdev driver, in
> > + * the base kernel, on the hugetlb module.
> > + */
> > +struct node_hstate {
> > + struct kobject *hugepages_kobj;
> > + struct kobject *hstate_kobjs[HUGE_MAX_HSTATE];
> > +};
> > +struct node_hstate node_hstates[MAX_NUMNODES];
> > +
> > +/*
> > + * A subset of global hstate attributes for node sysdevs
> > + */
> > +static struct attribute *per_node_hstate_attrs[] = {
> > + &nr_hugepages_attr.attr,
> > + &free_hugepages_attr.attr,
> > + &surplus_hugepages_attr.attr,
> > + NULL,
> > +};
> > +
> > +static struct attribute_group per_node_hstate_attr_group = {
> > + .attrs = per_node_hstate_attrs,
> > +};
> > +
> > +/*
> > + * kobj_to_node_hstate - lookup global hstate for node sysdev hstate attr kobj.
> > + * Returns node id via non-NULL nidp.
> > + */
> > +static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp)
> > +{
> > + int nid;
> > +
> > + for (nid = 0; nid < nr_node_ids; nid++) {
>
> I previously asked if this should use for_each_node_mask() instead?
>
> > + struct node_hstate *nhs = &node_hstates[nid];
> > + int i;
> > + for (i = 0; i < HUGE_MAX_HSTATE; i++)
> > + if (nhs->hstate_kobjs[i] == kobj) {
> > + if (nidp)
> > + *nidp = nid;
> > + return &hstates[i];
> > + }
> > + }
> > +
> > + BUG();
> > + return NULL;
> > +}
> > +
> > +/*
> > + * Unregister hstate attributes from a single node sysdev.
> > + * No-op if no hstate attributes attached.
> > + */
> > +void hugetlb_unregister_node(struct node *node)
> > +{
> > + struct hstate *h;
> > + struct node_hstate *nhs = &node_hstates[node->sysdev.id];
> > +
> > + if (!nhs->hugepages_kobj)
> > + return;
> > +
> > + for_each_hstate(h)
> > + if (nhs->hstate_kobjs[h - hstates]) {
> > + kobject_put(nhs->hstate_kobjs[h - hstates]);
> > + nhs->hstate_kobjs[h - hstates] = NULL;
> > + }
> > +
> > + kobject_put(nhs->hugepages_kobj);
> > + nhs->hugepages_kobj = NULL;
> > +}
> > +
> > +/*
> > + * hugetlb module exit: unregister hstate attributes from node sysdevs
> > + * that have them.
> > + */
> > +static void hugetlb_unregister_all_nodes(void)
> > +{
> > + int nid;
> > +
> > + /*
> > + * disable node sysdev registrations.
> > + */
> > + register_hugetlbfs_with_node(NULL, NULL);
> > +
> > + /*
> > + * remove hstate attributes from any nodes that have them.
> > + */
> > + for (nid = 0; nid < nr_node_ids; nid++)
> > + hugetlb_unregister_node(&node_devices[nid]);
> > +}
> > +
> > +/*
> > + * Register hstate attributes for a single node sysdev.
> > + * No-op if attributes already registered.
> > + */
> > +void hugetlb_register_node(struct node *node)
> > +{
> > + struct hstate *h;
> > + struct node_hstate *nhs = &node_hstates[node->sysdev.id];
> > + int err;
> > +
> > + if (nhs->hugepages_kobj)
> > + return; /* already allocated */
> > +
> > + nhs->hugepages_kobj = kobject_create_and_add("hugepages",
> > + &node->sysdev.kobj);
> > + if (!nhs->hugepages_kobj)
> > + return;
> > +
> > + for_each_hstate(h) {
> > + err = hugetlb_sysfs_add_hstate(h, nhs->hugepages_kobj,
> > + nhs->hstate_kobjs,
> > + &per_node_hstate_attr_group);
> > + if (err) {
> > + printk(KERN_ERR "Hugetlb: Unable to add hstate %s"
> > + " for node %d\n",
> > + h->name, node->sysdev.id);
> > + hugetlb_unregister_node(node);
> > + break;
> > + }
> > + }
> > +}
> > +
> > +/*
> > + * hugetlb init time: register hstate attributes for all registered
> > + * node sysdevs. All on-line nodes should have registered their
> > + * associated sysdev by the time the hugetlb module initializes.
> > + */
> > +static void hugetlb_register_all_nodes(void)
> > +{
> > + int nid;
> > +
> > + for (nid = 0; nid < nr_node_ids; nid++) {
> > + struct node *node = &node_devices[nid];
> > + if (node->sysdev.id == nid)
> > + hugetlb_register_node(node);
> > + }
>
> This looks like another use of for_each_node_mask over N_HIGH_MEMORY. I
> previously asked if the check for node->sysdev.id == nid is still
> necessary at this point?
Sorry. The check for sysdev.id == nid is there to ensure that this node
sysdev has been registered when this function is called. nr_node_ids is
the maximum node id seen so far, but we can't assume that all nodes
0..nr_node_ids are present/on-line.
As for using for_each_node_mask: I think that would be OK. This code
works because hugetlb_register_node() filters out nodes w/o memory; so
only visiting nodes with memory should work as well. We can change this
[for consistency] with an incremental patch, if you like.
I'd hate to respin V11 for just this. But, if we have to for other
reasons, I'll [try to remember to] do this.
>
> > +
> > + /*
> > + * Let the node sysdev driver know we're here so it can
> > + * [un]register hstate attributes on node hotplug.
> > + */
> > + register_hugetlbfs_with_node(hugetlb_register_node,
> > + hugetlb_unregister_node);
> > +}
> > +#else /* !CONFIG_NUMA */
> > +
> > +static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp)
> > +{
> > + BUG();
> > + if (nidp)
> > + *nidp = -1;
> > + return NULL;
> > +}
> > +
> > +static void hugetlb_unregister_all_nodes(void) { }
> > +
> > +static void hugetlb_register_all_nodes(void) { }
> > +
> > +#endif
> > +
> > static void __exit hugetlb_exit(void)
> > {
> > struct hstate *h;
> >
> > + hugetlb_unregister_all_nodes();
> > +
> > for_each_hstate(h) {
> > kobject_put(hstate_kobjs[h - hstates]);
> > }
> > @@ -1532,6 +1752,8 @@ static int __init hugetlb_init(void)
> >
> > hugetlb_sysfs_init();
> >
> > + hugetlb_register_all_nodes();
> > +
> > return 0;
> > }
> > module_init(hugetlb_init);
> > Index: linux-2.6.31-mmotm-090925-1435/include/linux/node.h
> > ===================================================================
> > --- linux-2.6.31-mmotm-090925-1435.orig/include/linux/node.h 2009-10-07 12:31:51.000000000 -0400
> > +++ linux-2.6.31-mmotm-090925-1435/include/linux/node.h 2009-10-07 12:32:01.000000000 -0400
> > @@ -28,6 +28,7 @@ struct node {
> >
> > struct memory_block;
> > extern struct node node_devices[];
> > +typedef void (*node_registration_func_t)(struct node *);
> >
> > extern int register_node(struct node *, int, struct node *);
> > extern void unregister_node(struct node *node);
>
> I previously suggested against the typedef unless this functionality (node
> hotplug notifiers) becomes more generic outside of the hugetlb use case.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-numa" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2009-10-09 12:57 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-10-08 16:24 [PATCH 0/12] hugetlb: V10 numa control of persistent huge pages alloc/free Lee Schermerhorn
2009-10-08 16:25 ` [PATCH 1/12] nodemask: make NODEMASK_ALLOC more general Lee Schermerhorn
2009-10-08 20:17 ` David Rientjes
2009-10-08 16:25 ` [PATCH 2/12] hugetlb: rework hstate_next_node_* functions Lee Schermerhorn
2009-10-08 16:25 ` [PATCH 3/12] hugetlb: add nodemask arg to huge page alloc, free and surplus adjust fcns Lee Schermerhorn
2009-10-08 20:32 ` David Rientjes
2009-10-08 16:25 ` [PATCH 4/12] hugetlb: factor init_nodemask_of_node Lee Schermerhorn
2009-10-08 20:20 ` David Rientjes
2009-10-08 16:25 ` [PATCH 5/12] hugetlb: derive huge pages nodes allowed from task mempolicy Lee Schermerhorn
2009-10-08 21:22 ` [patch] mm: add gfp flags for NODEMASK_ALLOC slab allocations David Rientjes
2009-10-09 1:01 ` KAMEZAWA Hiroyuki
2009-10-08 16:25 ` [PATCH 6/12] hugetlb: add generic definition of NUMA_NO_NODE Lee Schermerhorn
2009-10-08 20:16 ` Christoph Lameter
2009-10-08 20:26 ` David Rientjes
2009-10-27 21:44 ` [patch -mm] acpi: remove NID_INVAL David Rientjes
2009-10-28 14:53 ` Cyrill Gorcunov
2009-10-29 18:40 ` Christoph Lameter
2009-10-08 16:25 ` [PATCH 7/12] hugetlb: add per node hstate attributes Lee Schermerhorn
2009-10-08 20:42 ` David Rientjes
2009-10-09 12:57 ` Lee Schermerhorn [this message]
2009-10-09 22:10 ` David Rientjes
2009-10-09 13:49 ` Lee Schermerhorn
2009-10-09 22:18 ` David Rientjes
2009-10-12 15:41 ` Lee Schermerhorn
2009-10-13 2:09 ` David Rientjes
2009-10-08 16:25 ` [PATCH 8/12] hugetlb: update hugetlb documentation for NUMA controls Lee Schermerhorn
2009-10-08 16:25 ` [PATCH 9/12] hugetlb: use only nodes with memory for huge pages Lee Schermerhorn
2009-10-08 16:26 ` [PATCH 10/12] mm: clear node in N_HIGH_MEMORY and stop kswapd when all memory is offlined Lee Schermerhorn
2009-10-08 20:19 ` David Rientjes
2009-10-08 16:26 ` [PATCH 11/12] hugetlb: handle memory hot-plug events Lee Schermerhorn
2009-10-08 16:26 ` [PATCH 12/12] hugetlb: offload per node attribute registrations Lee Schermerhorn
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1255093027.14370.35.camel@useless.americas.hpqcorp.net \
--to=lee.schermerhorn@hp.com \
--cc=agl@us.ibm.com \
--cc=akpm@linux-foundation.org \
--cc=andi@firstfloor.org \
--cc=apw@canonical.com \
--cc=eric.whitney@hp.com \
--cc=linux-mm@kvack.org \
--cc=linux-numa@vger.kernel.org \
--cc=mel@csn.ul.ie \
--cc=nacc@us.ibm.com \
--cc=randy.dunlap@oracle.com \
--cc=rientjes@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox