linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: David Rientjes <rientjes@google.com>
To: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: linux-mm@kvack.org, linux-numa@vger.kernel.org,
	akpm@linux-foundation.org, Mel Gorman <mel@csn.ul.ie>,
	Randy Dunlap <randy.dunlap@oracle.com>,
	Nishanth Aravamudan <nacc@us.ibm.com>,
	Adam Litke <agl@us.ibm.com>, Andy Whitcroft <apw@canonical.com>,
	eric.whitney@hp.com
Subject: Re: [PATCH 2/11] hugetlb:  add nodemask arg to huge page alloc, free and surplus adjust fcns
Date: Tue, 6 Oct 2009 20:26:32 -0700 (PDT)	[thread overview]
Message-ID: <alpine.DEB.1.00.0910062018070.3099@chino.kir.corp.google.com> (raw)
In-Reply-To: <20091006031751.22576.23355.sendpatchset@localhost.localdomain>

On Mon, 5 Oct 2009, Lee Schermerhorn wrote:

> Index: linux-2.6.31-mmotm-090925-1435/mm/hugetlb.c
> ===================================================================
> --- linux-2.6.31-mmotm-090925-1435.orig/mm/hugetlb.c	2009-09-28 10:12:20.000000000 -0400
> +++ linux-2.6.31-mmotm-090925-1435/mm/hugetlb.c	2009-09-30 11:41:36.000000000 -0400
> @@ -622,48 +622,56 @@ static struct page *alloc_fresh_huge_pag
>  }
>  
>  /*
> - * common helper function for hstate_next_node_to_{alloc|free}.
> - * return next node in node_online_map, wrapping at end.
> + * common helper functions for hstate_next_node_to_{alloc|free}.
> + * We may have allocated or freed a huge page based on a different
> + * nodes_allowed previously, so h->next_node_to_{alloc|free} might
> + * be outside of *nodes_allowed.  Ensure that we use an allowed
> + * node for alloc or free.
>   */
> -static int next_node_allowed(int nid)
> +static int next_node_allowed(int nid, nodemask_t *nodes_allowed)
>  {
> -	nid = next_node(nid, node_online_map);
> +	nid = next_node(nid, *nodes_allowed);
>  	if (nid == MAX_NUMNODES)
> -		nid = first_node(node_online_map);
> +		nid = first_node(*nodes_allowed);
>  	VM_BUG_ON(nid >= MAX_NUMNODES);
>  
>  	return nid;
>  }
>  
> +static int get_valid_node_allowed(int nid, nodemask_t *nodes_allowed)
> +{
> +	if (!node_isset(nid, *nodes_allowed))
> +		nid = next_node_allowed(nid, nodes_allowed);
> +	return nid;
> +}
> +
>  /*
> - * Use a helper variable to find the next node and then
> - * copy it back to next_nid_to_alloc afterwards:
> - * otherwise there's a window in which a racer might
> - * pass invalid nid MAX_NUMNODES to alloc_pages_exact_node.
> - * But we don't need to use a spin_lock here: it really
> - * doesn't matter if occasionally a racer chooses the
> - * same nid as we do.  Move nid forward in the mask even
> - * if we just successfully allocated a hugepage so that
> - * the next caller gets hugepages on the next node.
> + * returns the previously saved node ["this node"] from which to
> + * allocate a persistent huge page for the pool and advance the
> + * next node from which to allocate, handling wrap at end of node
> + * mask.
>   */
> -static int hstate_next_node_to_alloc(struct hstate *h)
> +static int hstate_next_node_to_alloc(struct hstate *h,
> +					nodemask_t *nodes_allowed)
>  {
> -	int nid, next_nid;
> +	int nid;
> +
> +	VM_BUG_ON(!nodes_allowed);
> +
> +	nid = get_valid_node_allowed(h->next_nid_to_alloc, nodes_allowed);
> +	h->next_nid_to_alloc = next_node_allowed(nid, nodes_allowed);
>  
> -	nid = h->next_nid_to_alloc;
> -	next_nid = next_node_allowed(nid);
> -	h->next_nid_to_alloc = next_nid;
>  	return nid;
>  }
>  
> -static int alloc_fresh_huge_page(struct hstate *h)
> +static int alloc_fresh_huge_page(struct hstate *h, nodemask_t *nodes_allowed)
>  {
>  	struct page *page;
>  	int start_nid;
>  	int next_nid;
>  	int ret = 0;
>  
> -	start_nid = hstate_next_node_to_alloc(h);
> +	start_nid = hstate_next_node_to_alloc(h, nodes_allowed);
>  	next_nid = start_nid;
>  
>  	do {
> @@ -672,7 +680,7 @@ static int alloc_fresh_huge_page(struct
>  			ret = 1;
>  			break;
>  		}
> -		next_nid = hstate_next_node_to_alloc(h);
> +		next_nid = hstate_next_node_to_alloc(h, nodes_allowed);
>  	} while (next_nid != start_nid);
>  
>  	if (ret)
> @@ -684,18 +692,20 @@ static int alloc_fresh_huge_page(struct
>  }
>  
>  /*
> - * helper for free_pool_huge_page() - return the next node
> - * from which to free a huge page.  Advance the next node id
> - * whether or not we find a free huge page to free so that the
> - * next attempt to free addresses the next node.
> + * helper for free_pool_huge_page() - return the previously saved
> + * node ["this node"] from which to free a huge page.  Advance the
> + * next node id whether or not we find a free huge page to free so
> + * that the next attempt to free addresses the next node.
>   */
> -static int hstate_next_node_to_free(struct hstate *h)
> +static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
>  {
> -	int nid, next_nid;
> +	int nid;
> +
> +	VM_BUG_ON(!nodes_allowed);
> +
> +	nid = get_valid_node_allowed(h->next_nid_to_free, nodes_allowed);
> +	h->next_nid_to_free = next_node_allowed(nid, nodes_allowed);
>  
> -	nid = h->next_nid_to_free;
> -	next_nid = next_node_allowed(nid);
> -	h->next_nid_to_free = next_nid;
>  	return nid;
>  }
>  
> @@ -705,13 +715,14 @@ static int hstate_next_node_to_free(stru
>   * balanced over allowed nodes.
>   * Called with hugetlb_lock locked.
>   */
> -static int free_pool_huge_page(struct hstate *h, bool acct_surplus)
> +static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
> +							 bool acct_surplus)
>  {
>  	int start_nid;
>  	int next_nid;
>  	int ret = 0;
>  
> -	start_nid = hstate_next_node_to_free(h);
> +	start_nid = hstate_next_node_to_free(h, nodes_allowed);
>  	next_nid = start_nid;
>  
>  	do {
> @@ -735,7 +746,7 @@ static int free_pool_huge_page(struct hs
>  			ret = 1;
>  			break;
>  		}
> -		next_nid = hstate_next_node_to_free(h);
> +		next_nid = hstate_next_node_to_free(h, nodes_allowed);
>  	} while (next_nid != start_nid);
>  
>  	return ret;
> @@ -937,7 +948,7 @@ static void return_unused_surplus_pages(
>  	 * on-line nodes for us and will handle the hstate accounting.
>  	 */
>  	while (nr_pages--) {
> -		if (!free_pool_huge_page(h, 1))
> +		if (!free_pool_huge_page(h, &node_online_map, 1))
>  			break;
>  	}
>  }
> @@ -1047,7 +1058,7 @@ int __weak alloc_bootmem_huge_page(struc
>  		void *addr;
>  
>  		addr = __alloc_bootmem_node_nopanic(
> -				NODE_DATA(hstate_next_node_to_alloc(h)),
> +				NODE_DATA(hstate_next_node_to_alloc(h, NULL)),
>  				huge_page_size(h), huge_page_size(h), 0);
>  
>  		if (addr) {
> @@ -1102,7 +1113,7 @@ static void __init hugetlb_hstate_alloc_
>  		if (h->order >= MAX_ORDER) {
>  			if (!alloc_bootmem_huge_page(h))
>  				break;
> -		} else if (!alloc_fresh_huge_page(h))
> +		} else if (!alloc_fresh_huge_page(h, &node_online_map))
>  			break;
>  	}
>  	h->max_huge_pages = i;
> @@ -1144,16 +1155,22 @@ static void __init report_hugepages(void
>  }
>  
>  #ifdef CONFIG_HIGHMEM
> -static void try_to_free_low(struct hstate *h, unsigned long count)
> +static void try_to_free_low(struct hstate *h, unsigned long count,
> +						nodemask_t *nodes_allowed)
>  {
>  	int i;
>  
>  	if (h->order >= MAX_ORDER)
>  		return;
>  
> +	if (!nodes_allowed)
> +		nodes_allowed = &node_online_map;
> +
>  	for (i = 0; i < MAX_NUMNODES; ++i) {
>  		struct page *page, *next;
>  		struct list_head *freel = &h->hugepage_freelists[i];
> +		if (!node_isset(i, *nodes_allowed))
> +			continue;
>  		list_for_each_entry_safe(page, next, freel, lru) {
>  			if (count >= h->nr_huge_pages)
>  				return;

Simply converting the iteration to use
for_each_node_mask(i, *nodes_allowed) would be cleaner.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2009-10-07  3:26 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-10-06  3:17 [PATCH 0/11] hugetlb: V9 numa control of persistent huge pages alloc/free Lee Schermerhorn
2009-10-06  3:17 ` [PATCH 1/11] hugetlb: rework hstate_next_node_* functions Lee Schermerhorn
2009-10-06  3:17 ` [PATCH 2/11] hugetlb: add nodemask arg to huge page alloc, free and surplus adjust fcns Lee Schermerhorn
2009-10-06  9:09   ` David Rientjes
2009-10-07  3:26   ` David Rientjes [this message]
2009-10-07 14:13     ` Lee Schermerhorn
2009-10-06  3:17 ` [PATCH 3/11] hugetlb: factor init_nodemask_of_node Lee Schermerhorn
2009-10-07  3:21   ` David Rientjes
2009-10-06  3:18 ` [PATCH 4/11] hugetlb: derive huge pages nodes allowed from task mempolicy Lee Schermerhorn
2009-10-07  3:26   ` David Rientjes
2009-10-07 16:30     ` Lee Schermerhorn
2009-10-07 20:09       ` David Rientjes
2009-10-06  3:18 ` [PATCH 5/11] hugetlb: accomodate reworked NODEMASK_ALLOC Lee Schermerhorn
2009-10-06  3:18 ` [PATCH 6/11] hugetlb: add generic definition of NUMA_NO_NODE Lee Schermerhorn
2009-10-06  9:28   ` David Rientjes
2009-10-06  3:18 ` [PATCH 7/11] hugetlb: add per node hstate attributes Lee Schermerhorn
2009-10-07  4:04   ` David Rientjes
2009-10-06  3:18 ` [PATCH 8/11] hugetlb: update hugetlb documentation for NUMA controls Lee Schermerhorn
2009-10-06  3:18 ` [PATCH 9/11] hugetlb: use only nodes with memory for huge pages Lee Schermerhorn
2009-10-06  3:18 ` [PATCH 10/11] hugetlb: handle memory hot-plug events Lee Schermerhorn
2009-10-07  4:12   ` David Rientjes
2009-10-06  3:19 ` [PATCH 11/11] hugetlb: offload per node attribute registrations Lee Schermerhorn
2009-10-06 16:01   ` Andi Kleen
2009-10-06 16:28     ` Lee Schermerhorn
2009-10-06 16:46       ` Andi Kleen
2009-10-06 17:57         ` Lee Schermerhorn
2009-10-07  8:24   ` [patch] mm: clear node in N_HIGH_MEMORY and stop kswapd when all memory is offlined David Rientjes
2009-10-07 14:25     ` Christoph Lameter
2009-10-07 16:48     ` Lee Schermerhorn
2009-10-07 19:53       ` David Rientjes
2009-10-06 16:02 ` [PATCH 0/11] hugetlb: V9 numa control of persistent huge pages alloc/free Andi Kleen
  -- strict thread matches above, loose matches on Subject: below --
2009-09-15 20:43 [PATCH 0/11] hugetlb: V7 constrain allocation/free based on task mempolicy Lee Schermerhorn
2009-09-15 20:44 ` [PATCH 2/11] hugetlb: add nodemask arg to huge page alloc, free and surplus adjust fcns Lee Schermerhorn

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=alpine.DEB.1.00.0910062018070.3099@chino.kir.corp.google.com \
    --to=rientjes@google.com \
    --cc=agl@us.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=apw@canonical.com \
    --cc=eric.whitney@hp.com \
    --cc=lee.schermerhorn@hp.com \
    --cc=linux-mm@kvack.org \
    --cc=linux-numa@vger.kernel.org \
    --cc=mel@csn.ul.ie \
    --cc=nacc@us.ibm.com \
    --cc=randy.dunlap@oracle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox