linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
To: David Rientjes <rientjes@google.com>
Cc: linux-mm@kvack.org, Andrew Morton <akpm@linux-foundation.org>,
	Mel Gorman <mel@csn.ul.ie>, Nishanth Aravamudan <nacc@us.ibm.com>,
	linux-numa@vger.kernel.org, Adam Litke <agl@us.ibm.com>,
	Andy Whitcroft <apw@canonical.com>,
	eric.whitney@hp.com
Subject: Re: [PATCH 5/6] hugetlb:  add per node hstate attributes
Date: Thu, 03 Sep 2009 16:41:25 -0400	[thread overview]
Message-ID: <1252010485.6029.180.camel@useless.americas.hpqcorp.net> (raw)
In-Reply-To: <alpine.DEB.1.00.0909031241160.24821@chino.kir.corp.google.com>

On Thu, 2009-09-03 at 12:52 -0700, David Rientjes wrote:
> On Fri, 28 Aug 2009, Lee Schermerhorn wrote:
> 
> > Index: linux-2.6.31-rc7-mmotm-090827-0057/mm/hugetlb.c
> > ===================================================================
> > --- linux-2.6.31-rc7-mmotm-090827-0057.orig/mm/hugetlb.c	2009-08-28 09:21:28.000000000 -0400
> > +++ linux-2.6.31-rc7-mmotm-090827-0057/mm/hugetlb.c	2009-08-28 09:21:31.000000000 -0400
> > @@ -24,6 +24,7 @@
> >  #include <asm/io.h>
> >  
> >  #include <linux/hugetlb.h>
> > +#include <linux/node.h>
> >  #include "internal.h"
> >  
> >  const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
> > @@ -1245,7 +1246,8 @@ static int adjust_pool_surplus(struct hs
> >  }
> >  
> >  #define persistent_huge_pages(h) (h->nr_huge_pages - h->surplus_huge_pages)
> > -static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count)
> > +static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
> > +								int nid)
> >  {
> >  	unsigned long min_count, ret;
> >  	nodemask_t *nodes_allowed;
> > @@ -1253,7 +1255,21 @@ static unsigned long set_max_huge_pages(
> >  	if (h->order >= MAX_ORDER)
> >  		return h->max_huge_pages;
> >  
> > -	nodes_allowed = huge_mpol_nodes_allowed();
> > +	if (nid == NO_NODEID_SPECIFIED)
> > +		nodes_allowed = huge_mpol_nodes_allowed();
> > +	else {
> > +		/*
> > +		 * incoming 'count' is for node 'nid' only, so
> > +		 * adjust count to global, but restrict alloc/free
> > +		 * to the specified node.
> > +		 */
> > +		count += h->nr_huge_pages - h->nr_huge_pages_node[nid];
> > +		nodes_allowed = alloc_nodemask_of_node(nid);
> > +		if (!nodes_allowed)
> > +			printk(KERN_WARNING "%s unable to allocate allowed "
> > +			       "nodes mask for huge page allocation/free.  "
> > +			       "Falling back to default.\n", current->comm);
> > +	}
> >  
> >  	/*
> >  	 * Increase the pool size
> > @@ -1329,51 +1345,71 @@ out:
> >  static struct kobject *hugepages_kobj;
> >  static struct kobject *hstate_kobjs[HUGE_MAX_HSTATE];
> >  
> > -static struct hstate *kobj_to_hstate(struct kobject *kobj)
> > +static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp);
> > +
> > +static struct hstate *kobj_to_hstate(struct kobject *kobj, int *nidp)
> >  {
> >  	int i;
> > +
> >  	for (i = 0; i < HUGE_MAX_HSTATE; i++)
> > -		if (hstate_kobjs[i] == kobj)
> > +		if (hstate_kobjs[i] == kobj) {
> > +			if (nidp)
> > +				*nidp = NO_NODEID_SPECIFIED;
> >  			return &hstates[i];
> > -	BUG();
> > -	return NULL;
> > +		}
> > +
> > +	return kobj_to_node_hstate(kobj, nidp);
> >  }
> >  
> >  static ssize_t nr_hugepages_show(struct kobject *kobj,
> >  					struct kobj_attribute *attr, char *buf)
> >  {
> > -	struct hstate *h = kobj_to_hstate(kobj);
> > -	return sprintf(buf, "%lu\n", h->nr_huge_pages);
> > +	struct hstate *h;
> > +	unsigned long nr_huge_pages;
> > +	int nid;
> > +
> > +	h = kobj_to_hstate(kobj, &nid);
> > +	if (nid == NO_NODEID_SPECIFIED)
> > +		nr_huge_pages = h->nr_huge_pages;
> > +	else
> > +		nr_huge_pages = h->nr_huge_pages_node[nid];
> > +
> > +	return sprintf(buf, "%lu\n", nr_huge_pages);
> >  }
> > +
> >  static ssize_t nr_hugepages_store(struct kobject *kobj,
> > -		struct kobj_attribute *attr, const char *buf, size_t count)
> > +		struct kobj_attribute *attr, const char *buf, size_t len)
> >  {
> > +	unsigned long count;
> > +	struct hstate *h;
> > +	int nid;
> >  	int err;
> > -	unsigned long input;
> > -	struct hstate *h = kobj_to_hstate(kobj);
> >  
> > -	err = strict_strtoul(buf, 10, &input);
> > +	err = strict_strtoul(buf, 10, &count);
> >  	if (err)
> >  		return 0;
> >  
> > -	h->max_huge_pages = set_max_huge_pages(h, input);
> > +	h = kobj_to_hstate(kobj, &nid);
> > +	h->max_huge_pages = set_max_huge_pages(h, count, nid);
> >  
> > -	return count;
> > +	return len;
> >  }
> >  HSTATE_ATTR(nr_hugepages);
> >  
> >  static ssize_t nr_overcommit_hugepages_show(struct kobject *kobj,
> >  					struct kobj_attribute *attr, char *buf)
> >  {
> > -	struct hstate *h = kobj_to_hstate(kobj);
> > +	struct hstate *h = kobj_to_hstate(kobj, NULL);
> > +
> >  	return sprintf(buf, "%lu\n", h->nr_overcommit_huge_pages);
> >  }
> > +
> >  static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj,
> >  		struct kobj_attribute *attr, const char *buf, size_t count)
> >  {
> >  	int err;
> >  	unsigned long input;
> > -	struct hstate *h = kobj_to_hstate(kobj);
> > +	struct hstate *h = kobj_to_hstate(kobj, NULL);
> >  
> >  	err = strict_strtoul(buf, 10, &input);
> >  	if (err)
> > @@ -1390,15 +1426,24 @@ HSTATE_ATTR(nr_overcommit_hugepages);
> >  static ssize_t free_hugepages_show(struct kobject *kobj,
> >  					struct kobj_attribute *attr, char *buf)
> >  {
> > -	struct hstate *h = kobj_to_hstate(kobj);
> > -	return sprintf(buf, "%lu\n", h->free_huge_pages);
> > +	struct hstate *h;
> > +	unsigned long free_huge_pages;
> > +	int nid;
> > +
> > +	h = kobj_to_hstate(kobj, &nid);
> > +	if (nid == NO_NODEID_SPECIFIED)
> > +		free_huge_pages = h->free_huge_pages;
> > +	else
> > +		free_huge_pages = h->free_huge_pages_node[nid];
> > +
> > +	return sprintf(buf, "%lu\n", free_huge_pages);
> >  }
> >  HSTATE_ATTR_RO(free_hugepages);
> >  
> >  static ssize_t resv_hugepages_show(struct kobject *kobj,
> >  					struct kobj_attribute *attr, char *buf)
> >  {
> > -	struct hstate *h = kobj_to_hstate(kobj);
> > +	struct hstate *h = kobj_to_hstate(kobj, NULL);
> >  	return sprintf(buf, "%lu\n", h->resv_huge_pages);
> >  }
> >  HSTATE_ATTR_RO(resv_hugepages);
> > @@ -1406,8 +1451,17 @@ HSTATE_ATTR_RO(resv_hugepages);
> >  static ssize_t surplus_hugepages_show(struct kobject *kobj,
> >  					struct kobj_attribute *attr, char *buf)
> >  {
> > -	struct hstate *h = kobj_to_hstate(kobj);
> > -	return sprintf(buf, "%lu\n", h->surplus_huge_pages);
> > +	struct hstate *h;
> > +	unsigned long surplus_huge_pages;
> > +	int nid;
> > +
> > +	h = kobj_to_hstate(kobj, &nid);
> > +	if (nid == NO_NODEID_SPECIFIED)
> > +		surplus_huge_pages = h->surplus_huge_pages;
> > +	else
> > +		surplus_huge_pages = h->surplus_huge_pages_node[nid];
> > +
> > +	return sprintf(buf, "%lu\n", surplus_huge_pages);
> >  }
> >  HSTATE_ATTR_RO(surplus_hugepages);
> >  
> > @@ -1424,19 +1478,21 @@ static struct attribute_group hstate_att
> >  	.attrs = hstate_attrs,
> >  };
> >  
> > -static int __init hugetlb_sysfs_add_hstate(struct hstate *h)
> > +static int __init hugetlb_sysfs_add_hstate(struct hstate *h,
> > +				struct kobject *parent,
> > +				struct kobject **hstate_kobjs,
> > +				struct attribute_group *hstate_attr_group)
> >  {
> >  	int retval;
> > +	int hi = h - hstates;
> >  
> > -	hstate_kobjs[h - hstates] = kobject_create_and_add(h->name,
> > -							hugepages_kobj);
> > -	if (!hstate_kobjs[h - hstates])
> > +	hstate_kobjs[hi] = kobject_create_and_add(h->name, parent);
> > +	if (!hstate_kobjs[hi])
> >  		return -ENOMEM;
> >  
> > -	retval = sysfs_create_group(hstate_kobjs[h - hstates],
> > -							&hstate_attr_group);
> > +	retval = sysfs_create_group(hstate_kobjs[hi], hstate_attr_group);
> >  	if (retval)
> > -		kobject_put(hstate_kobjs[h - hstates]);
> > +		kobject_put(hstate_kobjs[hi]);
> >  
> >  	return retval;
> >  }
> > @@ -1451,17 +1507,143 @@ static void __init hugetlb_sysfs_init(vo
> >  		return;
> >  
> >  	for_each_hstate(h) {
> > -		err = hugetlb_sysfs_add_hstate(h);
> > +		err = hugetlb_sysfs_add_hstate(h, hugepages_kobj,
> > +					 hstate_kobjs, &hstate_attr_group);
> >  		if (err)
> >  			printk(KERN_ERR "Hugetlb: Unable to add hstate %s",
> >  								h->name);
> >  	}
> >  }
> >  
> > +#ifdef CONFIG_NUMA
> > +
> > +struct node_hstate {
> > +	struct kobject		*hugepages_kobj;
> > +	struct kobject		*hstate_kobjs[HUGE_MAX_HSTATE];
> > +};
> > +struct node_hstate node_hstates[MAX_NUMNODES];
> > +
> > +static struct attribute *per_node_hstate_attrs[] = {
> > +	&nr_hugepages_attr.attr,
> > +	&free_hugepages_attr.attr,
> > +	&surplus_hugepages_attr.attr,
> > +	NULL,
> > +};
> > +
> > +static struct attribute_group per_node_hstate_attr_group = {
> > +	.attrs = per_node_hstate_attrs,
> > +};
> > +
> > +static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp)
> > +{
> > +	int nid;
> > +
> > +	for (nid = 0; nid < nr_node_ids; nid++) {
> > +		struct node_hstate *nhs = &node_hstates[nid];
> > +		int i;
> > +		for (i = 0; i < HUGE_MAX_HSTATE; i++)
> > +			if (nhs->hstate_kobjs[i] == kobj) {
> > +				if (nidp)
> > +					*nidp = nid;
> > +				return &hstates[i];
> > +			}
> > +	}
> > +
> > +	BUG();
> > +	return NULL;
> > +}
> > +
> > +void hugetlb_unregister_node(struct node *node)
> > +{
> > +	struct hstate *h;
> > +	struct node_hstate *nhs = &node_hstates[node->sysdev.id];
> > +
> > +	if (!nhs->hugepages_kobj)
> > +		return;
> > +
> > +	for_each_hstate(h)
> > +		if (nhs->hstate_kobjs[h - hstates]) {
> > +			kobject_put(nhs->hstate_kobjs[h - hstates]);
> > +			nhs->hstate_kobjs[h - hstates] = NULL;
> > +		}
> > +
> > +	kobject_put(nhs->hugepages_kobj);
> > +	nhs->hugepages_kobj = NULL;
> > +}
> > +
> > +static void hugetlb_unregister_all_nodes(void)
> > +{
> > +	int nid;
> > +
> > +	for (nid = 0; nid < nr_node_ids; nid++)
> > +		hugetlb_unregister_node(&node_devices[nid]);
> > +
> > +	register_hugetlbfs_with_node(NULL, NULL);
> > +}
> > +
> > +void hugetlb_register_node(struct node *node)
> > +{
> > +	struct hstate *h;
> > +	struct node_hstate *nhs = &node_hstates[node->sysdev.id];
> > +	int err;
> > +
> > +	if (nhs->hugepages_kobj)
> > +		return;		/* already allocated */
> > +
> > +	nhs->hugepages_kobj = kobject_create_and_add("hugepages",
> > +							&node->sysdev.kobj);
> > +	if (!nhs->hugepages_kobj)
> > +		return;
> > +
> > +	for_each_hstate(h) {
> > +		err = hugetlb_sysfs_add_hstate(h, nhs->hugepages_kobj,
> > +						nhs->hstate_kobjs,
> > +						&per_node_hstate_attr_group);
> > +		if (err) {
> > +			printk(KERN_ERR "Hugetlb: Unable to add hstate %s"
> > +					" for node %d\n",
> > +						h->name, node->sysdev.id);
> 
> Maybe add `err' to the printk so we know whether it was an -ENOMEM 
> condition or sysfs problem?

Just the raw negative number?

> 
> > +			hugetlb_unregister_node(node);
> > +			break;
> > +		}
> > +	}
> > +}
> > +
> > +static void hugetlb_register_all_nodes(void)
> > +{
> > +	int nid;
> > +
> > +	for (nid = 0; nid < nr_node_ids; nid++) {
> 
> Don't you want to do this for all nodes in N_HIGH_MEMORY?  I don't think 
> we should be adding attributes for memoryless nodes.


Well, I wondered about that.  The persistent huge page allocation code
is careful to skip over nodes where it can't allow a huge page there,
whether or not the node has [any] memory.  So, it's safe to leave it
this way.  And, I was worried about the interaction with memory hotplug,
as separate from node hotplug.  The current code handles node hot plug,
bug I wasn't sure about memory hot-plug within a node.  I.e., would the
node driver get called to register the attributes in this case?   Maybe
that case doesn't exist, so I don't have to worry about it.   I think
this is somewhat similar to the top cpuset mems_allowed being set to all
possible to cover any subsequently added nodes/memory.

It's easy to change to use node_state[N_HIGH_MEMORY], but then the
memory hotplug guys might jump in and say that, now it doesn't work for
memory hot add/remove.  I really don't want to go there...

But, I can understand the confusion about providing an explicit control
like this for a node without memory.  It is somewhat different from just
visiting all online nodes for the default mask, as hugetlb.c has always
done.
  
> 
> > +		struct node *node = &node_devices[nid];
> > +		if (node->sysdev.id == nid)
> > +			hugetlb_register_node(node);
> > +	}
> > +
> > +	register_hugetlbfs_with_node(hugetlb_register_node,
> > +                                     hugetlb_unregister_node);
> > +}
> > +#else	/* !CONFIG_NUMA */
> > +
> > +static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp)
> > +{
> > +	BUG();
> > +	if (nidp)
> > +		*nidp = -1;
> > +	return NULL;
> > +}
> > +
> > +static void hugetlb_unregister_all_nodes(void) { }
> > +
> > +static void hugetlb_register_all_nodes(void) { }
> > +
> > +#endif
> > +
> >  static void __exit hugetlb_exit(void)
> >  {
> >  	struct hstate *h;
> >  
> > +	hugetlb_unregister_all_nodes();
> > +
> >  	for_each_hstate(h) {
> >  		kobject_put(hstate_kobjs[h - hstates]);
> >  	}
> > @@ -1496,6 +1678,8 @@ static int __init hugetlb_init(void)
> >  
> >  	hugetlb_sysfs_init();
> >  
> > +	hugetlb_register_all_nodes();
> > +
> >  	return 0;
> >  }
> >  module_init(hugetlb_init);
> > @@ -1598,7 +1782,8 @@ int hugetlb_sysctl_handler(struct ctl_ta
> >  	proc_doulongvec_minmax(table, write, buffer, length, ppos);
> >  
> >  	if (write)
> > -		h->max_huge_pages = set_max_huge_pages(h, tmp);
> > +		h->max_huge_pages = set_max_huge_pages(h, tmp,
> > +		                                       NO_NODEID_SPECIFIED);
> >  
> >  	return 0;
> >  }
> > Index: linux-2.6.31-rc7-mmotm-090827-0057/include/linux/numa.h
> > ===================================================================
> > --- linux-2.6.31-rc7-mmotm-090827-0057.orig/include/linux/numa.h	2009-08-28 09:21:17.000000000 -0400
> > +++ linux-2.6.31-rc7-mmotm-090827-0057/include/linux/numa.h	2009-08-28 09:21:31.000000000 -0400
> > @@ -10,4 +10,6 @@
> >  
> >  #define MAX_NUMNODES    (1 << NODES_SHIFT)
> >  
> > +#define NO_NODEID_SPECIFIED	(-1)
> > +
> >  #endif /* _LINUX_NUMA_H */
> 
> Hmm, so we already have NUMA_NO_NODE in the ia64 and x86_64 code and 
> NID_INVAL in the ACPI code, both of which are defined to -1.  Maybe rename 
> your addition here in favor of NUMA_NO_NODE, remove it from the ia64 and 
> x86 arch headers, and convert the ACPI code?

OK, replacing 'NO_NODEID_SPECIFIED' with 'NUMA_NO_NODE,' works w/o
decending into header dependency hell.  The symbol is already visible in
hugetlb.c  I'll fix that.  But, ACPI?  Not today, thanks :).  

> 
> Thanks for doing this!

Well, we do need this, as well.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2009-09-03 20:42 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-08-28 16:03 [PATCH 0/6] hugetlb: V5 constrain allocation/free based on task mempolicy Lee Schermerhorn
2009-08-28 16:03 ` [PATCH 1/6] hugetlb: rework hstate_next_node_* functions Lee Schermerhorn
2009-08-28 16:03 ` [PATCH 2/6] hugetlb: add nodemask arg to huge page alloc, free and surplus adjust fcns Lee Schermerhorn
2009-09-03 18:39   ` David Rientjes
2009-08-28 16:03 ` [PATCH 3/6] hugetlb: derive huge pages nodes allowed from task mempolicy Lee Schermerhorn
2009-09-01 14:47   ` Mel Gorman
2009-09-03 19:22   ` David Rientjes
2009-09-03 20:15     ` Lee Schermerhorn
2009-09-03 20:49       ` David Rientjes
2009-08-28 16:03 ` [PATCH 4/6] hugetlb: introduce alloc_nodemask_of_node Lee Schermerhorn
2009-09-01 14:49   ` Mel Gorman
2009-09-01 16:42     ` Lee Schermerhorn
2009-09-03 18:34       ` David Rientjes
2009-09-03 20:49         ` Lee Schermerhorn
2009-09-03 21:03           ` David Rientjes
2009-08-28 16:03 ` [PATCH 5/6] hugetlb: add per node hstate attributes Lee Schermerhorn
2009-09-01 15:20   ` Mel Gorman
2009-09-03 19:52   ` David Rientjes
2009-09-03 20:41     ` Lee Schermerhorn [this message]
2009-09-03 21:02       ` David Rientjes
2009-09-04 14:30         ` Lee Schermerhorn
2009-08-28 16:03 ` [PATCH 6/6] hugetlb: update hugetlb documentation for mempolicy based management Lee Schermerhorn
2009-09-03 20:07   ` David Rientjes
2009-09-03 21:09     ` Lee Schermerhorn
2009-09-03 21:25       ` David Rientjes
2009-09-08 10:44         ` Mel Gorman
2009-09-08 19:51           ` David Rientjes
2009-09-08 20:04             ` Mel Gorman
2009-09-08 20:18               ` David Rientjes
2009-09-08 21:41                 ` Mel Gorman
2009-09-08 22:54                   ` David Rientjes
2009-09-09  8:16                     ` Mel Gorman
2009-09-09 20:44                       ` David Rientjes
2009-09-10 12:26                         ` Mel Gorman
2009-09-11 22:27                           ` David Rientjes
2009-09-14 13:33                             ` Mel Gorman
2009-09-14 14:15                               ` Lee Schermerhorn
2009-09-14 15:41                                 ` Mel Gorman
2009-09-14 19:15                                   ` David Rientjes
2009-09-15 11:48                                     ` Mel Gorman
2009-09-14 19:14                               ` David Rientjes
2009-09-14 21:28                                 ` David Rientjes
2009-09-16 10:21                                   ` Mel Gorman
2009-09-03 20:42   ` Randy Dunlap
2009-09-04 15:23     ` Lee Schermerhorn
2009-09-09 16:31 [PATCH 0/6] hugetlb: V6 constrain allocation/free based on task mempolicy Lee Schermerhorn
2009-09-09 16:31 ` [PATCH 5/6] hugetlb: add per node hstate attributes Lee Schermerhorn
2009-09-10 12:32   ` Mel Gorman
2009-09-10 14:26     ` Lee Schermerhorn
2009-09-10 19:50       ` David Rientjes
2009-09-10 19:58         ` Lee Schermerhorn
2009-09-10 23:31   ` Andrew Morton
2009-09-11 13:12     ` Lee Schermerhorn

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1252010485.6029.180.camel@useless.americas.hpqcorp.net \
    --to=lee.schermerhorn@hp.com \
    --cc=agl@us.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=apw@canonical.com \
    --cc=eric.whitney@hp.com \
    --cc=linux-mm@kvack.org \
    --cc=linux-numa@vger.kernel.org \
    --cc=mel@csn.ul.ie \
    --cc=nacc@us.ibm.com \
    --cc=rientjes@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox