Re: [PATCH v12 01/11] mm: deferred_init_memmap improvements

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Pavel Tatashin <pasha.tatashin@oracle.com>
To: Michal Hocko <mhocko@kernel.org>
Cc: linux-kernel@vger.kernel.org, sparclinux@vger.kernel.org,
	linux-mm@kvack.org, linuxppc-dev@lists.ozlabs.org,
	linux-s390@vger.kernel.org, linux-arm-kernel@lists.infradead.org,
	x86@kernel.org, kasan-dev@googlegroups.com,
	borntraeger@de.ibm.com, heiko.carstens@de.ibm.com,
	davem@davemloft.net, willy@infradead.org,
	ard.biesheuvel@linaro.org, mark.rutland@arm.com,
	will.deacon@arm.com, catalin.marinas@arm.com, sam@ravnborg.org,
	mgorman@techsingularity.net, akpm@linux-foundation.org,
	steven.sistare@oracle.com, daniel.m.jordan@oracle.com,
	bob.picco@oracle.com
Subject: Re: [PATCH v12 01/11] mm: deferred_init_memmap improvements
Date: Tue, 17 Oct 2017 11:13:19 -0400	[thread overview]
Message-ID: <dc43ac9d-8dff-d655-afd5-cb035a9f1a1a@oracle.com> (raw)
In-Reply-To: <20171017114028.uyt63277md5tuc4j@dhcp22.suse.cz>

> This really begs to have two patches... I will not insist though. I also
> suspect the code can be further simplified but again this is nothing to
> block this to go.

Perhaps "page" can be avoided in deferred_init_range(), as pfn is 
converted to page in deferred_free_range, but I have not studied it.

>   
>> Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
>> Reviewed-by: Steven Sistare <steven.sistare@oracle.com>
>> Reviewed-by: Daniel Jordan <daniel.m.jordan@oracle.com>
>> Reviewed-by: Bob Picco <bob.picco@oracle.com>
> 
> I do not see any obvious issues in the patch
> 
> Acked-by: Michal Hocko <mhocko@suse.com>

Thank you very much!

Pavel

> 
>> ---
>>   mm/page_alloc.c | 168 ++++++++++++++++++++++++++++----------------------------
>>   1 file changed, 85 insertions(+), 83 deletions(-)
>>
>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>> index 77e4d3c5c57b..cdbd14829fd3 100644
>> --- a/mm/page_alloc.c
>> +++ b/mm/page_alloc.c
>> @@ -1410,14 +1410,17 @@ void clear_zone_contiguous(struct zone *zone)
>>   }
>>   
>>   #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
>> -static void __init deferred_free_range(struct page *page,
>> -					unsigned long pfn, int nr_pages)
>> +static void __init deferred_free_range(unsigned long pfn,
>> +				       unsigned long nr_pages)
>>   {
>> -	int i;
>> +	struct page *page;
>> +	unsigned long i;
>>   
>> -	if (!page)
>> +	if (!nr_pages)
>>   		return;
>>   
>> +	page = pfn_to_page(pfn);
>> +
>>   	/* Free a large naturally-aligned chunk if possible */
>>   	if (nr_pages == pageblock_nr_pages &&
>>   	    (pfn & (pageblock_nr_pages - 1)) == 0) {
>> @@ -1443,19 +1446,89 @@ static inline void __init pgdat_init_report_one_done(void)
>>   		complete(&pgdat_init_all_done_comp);
>>   }
>>   
>> +/*
>> + * Helper for deferred_init_range, free the given range, reset the counters, and
>> + * return number of pages freed.
>> + */
>> +static inline unsigned long __def_free(unsigned long *nr_free,
>> +				       unsigned long *free_base_pfn,
>> +				       struct page **page)
>> +{
>> +	unsigned long nr = *nr_free;
>> +
>> +	deferred_free_range(*free_base_pfn, nr);
>> +	*free_base_pfn = 0;
>> +	*nr_free = 0;
>> +	*page = NULL;
>> +
>> +	return nr;
>> +}
>> +
>> +static unsigned long deferred_init_range(int nid, int zid, unsigned long pfn,
>> +					 unsigned long end_pfn)
>> +{
>> +	struct mminit_pfnnid_cache nid_init_state = { };
>> +	unsigned long nr_pgmask = pageblock_nr_pages - 1;
>> +	unsigned long free_base_pfn = 0;
>> +	unsigned long nr_pages = 0;
>> +	unsigned long nr_free = 0;
>> +	struct page *page = NULL;
>> +
>> +	for (; pfn < end_pfn; pfn++) {
>> +		/*
>> +		 * First we check if pfn is valid on architectures where it is
>> +		 * possible to have holes within pageblock_nr_pages. On systems
>> +		 * where it is not possible, this function is optimized out.
>> +		 *
>> +		 * Then, we check if a current large page is valid by only
>> +		 * checking the validity of the head pfn.
>> +		 *
>> +		 * meminit_pfn_in_nid is checked on systems where pfns can
>> +		 * interleave within a node: a pfn is between start and end
>> +		 * of a node, but does not belong to this memory node.
>> +		 *
>> +		 * Finally, we minimize pfn page lookups and scheduler checks by
>> +		 * performing it only once every pageblock_nr_pages.
>> +		 */
>> +		if (!pfn_valid_within(pfn)) {
>> +			nr_pages += __def_free(&nr_free, &free_base_pfn, &page);
>> +		} else if (!(pfn & nr_pgmask) && !pfn_valid(pfn)) {
>> +			nr_pages += __def_free(&nr_free, &free_base_pfn, &page);
>> +		} else if (!meminit_pfn_in_nid(pfn, nid, &nid_init_state)) {
>> +			nr_pages += __def_free(&nr_free, &free_base_pfn, &page);
>> +		} else if (page && (pfn & nr_pgmask)) {
>> +			page++;
>> +			__init_single_page(page, pfn, zid, nid);
>> +			nr_free++;
>> +		} else {
>> +			nr_pages += __def_free(&nr_free, &free_base_pfn, &page);
>> +			page = pfn_to_page(pfn);
>> +			__init_single_page(page, pfn, zid, nid);
>> +			free_base_pfn = pfn;
>> +			nr_free = 1;
>> +			cond_resched();
>> +		}
>> +	}
>> +	/* Free the last block of pages to allocator */
>> +	nr_pages += __def_free(&nr_free, &free_base_pfn, &page);
>> +
>> +	return nr_pages;
>> +}
>> +
>>   /* Initialise remaining memory on a node */
>>   static int __init deferred_init_memmap(void *data)
>>   {
>>   	pg_data_t *pgdat = data;
>>   	int nid = pgdat->node_id;
>> -	struct mminit_pfnnid_cache nid_init_state = { };
>>   	unsigned long start = jiffies;
>>   	unsigned long nr_pages = 0;
>> -	unsigned long walk_start, walk_end;
>> -	int i, zid;
>> +	unsigned long spfn, epfn;
>> +	phys_addr_t spa, epa;
>> +	int zid;
>>   	struct zone *zone;
>>   	unsigned long first_init_pfn = pgdat->first_deferred_pfn;
>>   	const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
>> +	u64 i;
>>   
>>   	if (first_init_pfn == ULONG_MAX) {
>>   		pgdat_init_report_one_done();
>> @@ -1477,83 +1550,12 @@ static int __init deferred_init_memmap(void *data)
>>   		if (first_init_pfn < zone_end_pfn(zone))
>>   			break;
>>   	}
>> +	first_init_pfn = max(zone->zone_start_pfn, first_init_pfn);
>>   
>> -	for_each_mem_pfn_range(i, nid, &walk_start, &walk_end, NULL) {
>> -		unsigned long pfn, end_pfn;
>> -		struct page *page = NULL;
>> -		struct page *free_base_page = NULL;
>> -		unsigned long free_base_pfn = 0;
>> -		int nr_to_free = 0;
>> -
>> -		end_pfn = min(walk_end, zone_end_pfn(zone));
>> -		pfn = first_init_pfn;
>> -		if (pfn < walk_start)
>> -			pfn = walk_start;
>> -		if (pfn < zone->zone_start_pfn)
>> -			pfn = zone->zone_start_pfn;
>> -
>> -		for (; pfn < end_pfn; pfn++) {
>> -			if (!pfn_valid_within(pfn))
>> -				goto free_range;
>> -
>> -			/*
>> -			 * Ensure pfn_valid is checked every
>> -			 * pageblock_nr_pages for memory holes
>> -			 */
>> -			if ((pfn & (pageblock_nr_pages - 1)) == 0) {
>> -				if (!pfn_valid(pfn)) {
>> -					page = NULL;
>> -					goto free_range;
>> -				}
>> -			}
>> -
>> -			if (!meminit_pfn_in_nid(pfn, nid, &nid_init_state)) {
>> -				page = NULL;
>> -				goto free_range;
>> -			}
>> -
>> -			/* Minimise pfn page lookups and scheduler checks */
>> -			if (page && (pfn & (pageblock_nr_pages - 1)) != 0) {
>> -				page++;
>> -			} else {
>> -				nr_pages += nr_to_free;
>> -				deferred_free_range(free_base_page,
>> -						free_base_pfn, nr_to_free);
>> -				free_base_page = NULL;
>> -				free_base_pfn = nr_to_free = 0;
>> -
>> -				page = pfn_to_page(pfn);
>> -				cond_resched();
>> -			}
>> -
>> -			if (page->flags) {
>> -				VM_BUG_ON(page_zone(page) != zone);
>> -				goto free_range;
>> -			}
>> -
>> -			__init_single_page(page, pfn, zid, nid);
>> -			if (!free_base_page) {
>> -				free_base_page = page;
>> -				free_base_pfn = pfn;
>> -				nr_to_free = 0;
>> -			}
>> -			nr_to_free++;
>> -
>> -			/* Where possible, batch up pages for a single free */
>> -			continue;
>> -free_range:
>> -			/* Free the current block of pages to allocator */
>> -			nr_pages += nr_to_free;
>> -			deferred_free_range(free_base_page, free_base_pfn,
>> -								nr_to_free);
>> -			free_base_page = NULL;
>> -			free_base_pfn = nr_to_free = 0;
>> -		}
>> -		/* Free the last block of pages to allocator */
>> -		nr_pages += nr_to_free;
>> -		deferred_free_range(free_base_page, free_base_pfn, nr_to_free);
>> -
>> -		first_init_pfn = max(end_pfn, first_init_pfn);
>> +	for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) {
>> +		spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa));
>> +		epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa));
>> +		nr_pages += deferred_init_range(nid, zid, spfn, epfn);
>>   	}
>>   
>>   	/* Sanity check that the next zone really is unpopulated */
>> -- 
>> 2.14.2
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

next prev parent reply	other threads:[~2017-10-17 15:14 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-10-13 17:32 [PATCH v12 00/11] complete deferred page initialization Pavel Tatashin
2017-10-13 17:32 ` [PATCH v12 01/11] mm: deferred_init_memmap improvements Pavel Tatashin
2017-10-17 11:40   ` Michal Hocko
2017-10-17 15:13     ` Pavel Tatashin [this message]
2017-10-13 17:32 ` [PATCH v12 02/11] x86/mm: setting fields in deferred pages Pavel Tatashin
2017-10-13 17:32 ` [PATCH v12 03/11] sparc64/mm: " Pavel Tatashin
2017-10-13 17:32 ` [PATCH v12 04/11] sparc64: simplify vmemmap_populate Pavel Tatashin
2017-10-13 17:32 ` [PATCH v12 05/11] mm: defining memblock_virt_alloc_try_nid_raw Pavel Tatashin
2017-10-13 17:32 ` [PATCH v12 06/11] mm: zero reserved and unavailable struct pages Pavel Tatashin
2017-10-13 17:32 ` [PATCH v12 07/11] x86/kasan: add and use kasan_map_populate() Pavel Tatashin
2017-10-18 17:11   ` Andrey Ryabinin
2017-10-18 17:14     ` Pavel Tatashin
2017-10-18 17:20       ` Andrey Ryabinin
2017-10-13 17:32 ` [PATCH v12 08/11] arm64/kasan: " Pavel Tatashin
2017-10-18 16:55   ` Andrey Ryabinin
2017-10-18 17:03     ` Pavel Tatashin
2017-10-18 17:06       ` Will Deacon
2017-10-18 17:08         ` Pavel Tatashin
2017-10-18 17:18           ` Andrey Ryabinin
2017-10-18 17:23             ` Pavel Tatashin
2017-11-03 15:40               ` Andrey Ryabinin
2017-11-03 15:50                 ` Pavel Tatashin
2017-10-13 17:32 ` [PATCH v12 09/11] mm: stop zeroing memory during allocation in vmemmap Pavel Tatashin
2017-10-19 23:59   ` Andrew Morton
2017-10-20  1:13     ` Pavel Tatashin
2017-10-13 17:32 ` [PATCH v12 10/11] sparc64: optimized struct page zeroing Pavel Tatashin
2017-10-13 17:32 ` [PATCH v12 11/11] arm64: kasan: Avoid using vmemmap_populate to initialise shadow Pavel Tatashin
2017-10-13 18:23 ` [PATCH v12 00/11] complete deferred page initialization Bob Picco

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=dc43ac9d-8dff-d655-afd5-cb035a9f1a1a@oracle.com \
    --to=pasha.tatashin@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=ard.biesheuvel@linaro.org \
    --cc=bob.picco@oracle.com \
    --cc=borntraeger@de.ibm.com \
    --cc=catalin.marinas@arm.com \
    --cc=daniel.m.jordan@oracle.com \
    --cc=davem@davemloft.net \
    --cc=heiko.carstens@de.ibm.com \
    --cc=kasan-dev@googlegroups.com \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=mark.rutland@arm.com \
    --cc=mgorman@techsingularity.net \
    --cc=mhocko@kernel.org \
    --cc=sam@ravnborg.org \
    --cc=sparclinux@vger.kernel.org \
    --cc=steven.sistare@oracle.com \
    --cc=will.deacon@arm.com \
    --cc=willy@infradead.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox