[PATCH] mm, page_alloc: enable pcpu_drain with zone capability

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

* [PATCH] mm, page_alloc: enable pcpu_drain with zone capability
@ 2018-12-12  0:29 Wei Yang
  2018-12-12 12:52 ` Michal Hocko
  2018-12-12 14:25 ` [PATCH v2] " Wei Yang
  0 siblings, 2 replies; 8+ messages in thread
From: Wei Yang @ 2018-12-12  0:29 UTC (permalink / raw)
  To: linux-mm; +Cc: akpm, mhocko, osalvador, david, Wei Yang

Current pcpu_drain is defined as work_struct, which is not capable to
carry the zone information to drain pages. During __offline_pages(), the
code is sure the exact zone to drain pages. This will leads to
__offline_pages() to drain other zones which we don't want to touch and
to some extend increase the contention of the system.

This patch enable pcpu_drain with zone information, so that we could
drain pages on the exact zone.

Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
---
 mm/page_alloc.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 65db26995466..eb4df3f63f5e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -96,8 +96,12 @@ int _node_numa_mem_[MAX_NUMNODES];
 #endif
 
 /* work_structs for global per-cpu drains */
+struct pcpu_drain {
+	struct zone *zone;
+	struct work_struct work;
+};
 DEFINE_MUTEX(pcpu_drain_mutex);
-DEFINE_PER_CPU(struct work_struct, pcpu_drain);
+DEFINE_PER_CPU(struct pcpu_drain, pcpu_drain);
 
 #ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY
 volatile unsigned long latent_entropy __latent_entropy;
@@ -2596,6 +2600,8 @@ void drain_local_pages(struct zone *zone)
 
 static void drain_local_pages_wq(struct work_struct *work)
 {
+	struct pcpu_drain *drain =
+		container_of(work, struct pcpu_drain, work);
 	/*
 	 * drain_all_pages doesn't use proper cpu hotplug protection so
 	 * we can race with cpu offline when the WQ can move this from
@@ -2604,7 +2610,7 @@ static void drain_local_pages_wq(struct work_struct *work)
 	 * a different one.
 	 */
 	preempt_disable();
-	drain_local_pages(NULL);
+	drain_local_pages(drain->zone);
 	preempt_enable();
 }
 
@@ -2675,12 +2681,14 @@ void drain_all_pages(struct zone *zone)
 	}
 
 	for_each_cpu(cpu, &cpus_with_pcps) {
-		struct work_struct *work = per_cpu_ptr(&pcpu_drain, cpu);
-		INIT_WORK(work, drain_local_pages_wq);
-		queue_work_on(cpu, mm_percpu_wq, work);
+		struct pcpu_drain *drain = per_cpu_ptr(&pcpu_drain, cpu);
+
+		drain->zone = zone;
+		INIT_WORK(&drain->work, drain_local_pages_wq);
+		queue_work_on(cpu, mm_percpu_wq, &drain->work);
 	}
 	for_each_cpu(cpu, &cpus_with_pcps)
-		flush_work(per_cpu_ptr(&pcpu_drain, cpu));
+		flush_work(&per_cpu_ptr(&pcpu_drain, cpu)->work);
 
 	mutex_unlock(&pcpu_drain_mutex);
 }
-- 
2.15.1

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] mm, page_alloc: enable pcpu_drain with zone capability
  2018-12-12  0:29 [PATCH] mm, page_alloc: enable pcpu_drain with zone capability Wei Yang
@ 2018-12-12 12:52 ` Michal Hocko
  2018-12-12 14:22   ` Wei Yang
  2018-12-12 14:25 ` [PATCH v2] " Wei Yang
  1 sibling, 1 reply; 8+ messages in thread
From: Michal Hocko @ 2018-12-12 12:52 UTC (permalink / raw)
  To: Wei Yang; +Cc: linux-mm, akpm, osalvador, david

On Wed 12-12-18 08:29:33, Wei Yang wrote:
> Current pcpu_drain is defined as work_struct, which is not capable to
> carry the zone information to drain pages. During __offline_pages(), the
> code is sure the exact zone to drain pages. This will leads to
> __offline_pages() to drain other zones which we don't want to touch and
> to some extend increase the contention of the system.

I think the above is quite vague and imprecise. I would formulate it as
follows. Feel free to take it or parts that you find useful.
"
drain_all_pages is documented to drain per-cpu pages for a given zone
(if non-NULL). The current implementation doesn't match the description
though. It will drain all pcp pages for all zones that happen to have
cached pages on the same cpu as the given zone. This will leave to
premature pcp cache draining for zones that are not of an interest for
the caller - e.g. compaction, hwpoison or memory offline.

This would force the page allocator to take locks and potential lock
contention as a result.

There is no real reason for this sub-optimal implementnation. Replace
per-cpu work item with a dedicated structure which contains a pointer
to zone and pass it over to the worker. This will get the zone
information all the way down to the worker function and do the right
job.
"
 
> This patch enable pcpu_drain with zone information, so that we could
> drain pages on the exact zone.
> 
> Signed-off-by: Wei Yang <richard.weiyang@gmail.com>

Other than that this makes sense to me
Acked-by: Michal Hocko <mhocko@suse.com>

Thanks!

> ---
>  mm/page_alloc.c | 20 ++++++++++++++------
>  1 file changed, 14 insertions(+), 6 deletions(-)
> 
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 65db26995466..eb4df3f63f5e 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -96,8 +96,12 @@ int _node_numa_mem_[MAX_NUMNODES];
>  #endif
>  
>  /* work_structs for global per-cpu drains */
> +struct pcpu_drain {
> +	struct zone *zone;
> +	struct work_struct work;
> +};
>  DEFINE_MUTEX(pcpu_drain_mutex);
> -DEFINE_PER_CPU(struct work_struct, pcpu_drain);
> +DEFINE_PER_CPU(struct pcpu_drain, pcpu_drain);
>  
>  #ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY
>  volatile unsigned long latent_entropy __latent_entropy;
> @@ -2596,6 +2600,8 @@ void drain_local_pages(struct zone *zone)
>  
>  static void drain_local_pages_wq(struct work_struct *work)
>  {
> +	struct pcpu_drain *drain =
> +		container_of(work, struct pcpu_drain, work);
>  	/*
>  	 * drain_all_pages doesn't use proper cpu hotplug protection so
>  	 * we can race with cpu offline when the WQ can move this from
> @@ -2604,7 +2610,7 @@ static void drain_local_pages_wq(struct work_struct *work)
>  	 * a different one.
>  	 */
>  	preempt_disable();
> -	drain_local_pages(NULL);
> +	drain_local_pages(drain->zone);
>  	preempt_enable();
>  }
>  
> @@ -2675,12 +2681,14 @@ void drain_all_pages(struct zone *zone)
>  	}
>  
>  	for_each_cpu(cpu, &cpus_with_pcps) {
> -		struct work_struct *work = per_cpu_ptr(&pcpu_drain, cpu);
> -		INIT_WORK(work, drain_local_pages_wq);
> -		queue_work_on(cpu, mm_percpu_wq, work);
> +		struct pcpu_drain *drain = per_cpu_ptr(&pcpu_drain, cpu);
> +
> +		drain->zone = zone;
> +		INIT_WORK(&drain->work, drain_local_pages_wq);
> +		queue_work_on(cpu, mm_percpu_wq, &drain->work);
>  	}
>  	for_each_cpu(cpu, &cpus_with_pcps)
> -		flush_work(per_cpu_ptr(&pcpu_drain, cpu));
> +		flush_work(&per_cpu_ptr(&pcpu_drain, cpu)->work);
>  
>  	mutex_unlock(&pcpu_drain_mutex);
>  }
> -- 
> 2.15.1
> 

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] mm, page_alloc: enable pcpu_drain with zone capability
  2018-12-12 12:52 ` Michal Hocko
@ 2018-12-12 14:22   ` Wei Yang
  0 siblings, 0 replies; 8+ messages in thread
From: Wei Yang @ 2018-12-12 14:22 UTC (permalink / raw)
  To: Michal Hocko; +Cc: Wei Yang, linux-mm, akpm, osalvador, david

On Wed, Dec 12, 2018 at 01:52:38PM +0100, Michal Hocko wrote:
>On Wed 12-12-18 08:29:33, Wei Yang wrote:
>> Current pcpu_drain is defined as work_struct, which is not capable to
>> carry the zone information to drain pages. During __offline_pages(), the
>> code is sure the exact zone to drain pages. This will leads to
>> __offline_pages() to drain other zones which we don't want to touch and
>> to some extend increase the contention of the system.
>
>I think the above is quite vague and imprecise. I would formulate it as
>follows. Feel free to take it or parts that you find useful.
>"
>drain_all_pages is documented to drain per-cpu pages for a given zone
>(if non-NULL). The current implementation doesn't match the description
>though. It will drain all pcp pages for all zones that happen to have
>cached pages on the same cpu as the given zone. This will leave to
>premature pcp cache draining for zones that are not of an interest for
>the caller - e.g. compaction, hwpoison or memory offline.
>
>This would force the page allocator to take locks and potential lock
>contention as a result.
>
>There is no real reason for this sub-optimal implementnation. Replace
>per-cpu work item with a dedicated structure which contains a pointer
>to zone and pass it over to the worker. This will get the zone
>information all the way down to the worker function and do the right
>job.
>"
> 

Thanks, this looks much better :-)

>> This patch enable pcpu_drain with zone information, so that we could
>> drain pages on the exact zone.
>> 
>> Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
>
>Other than that this makes sense to me
>Acked-by: Michal Hocko <mhocko@suse.com>
>
>Thanks!
>
>> ---
>>  mm/page_alloc.c | 20 ++++++++++++++------
>>  1 file changed, 14 insertions(+), 6 deletions(-)
>> 
>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>> index 65db26995466..eb4df3f63f5e 100644
>> --- a/mm/page_alloc.c
>> +++ b/mm/page_alloc.c
>> @@ -96,8 +96,12 @@ int _node_numa_mem_[MAX_NUMNODES];
>>  #endif
>>  
>>  /* work_structs for global per-cpu drains */
>> +struct pcpu_drain {
>> +	struct zone *zone;
>> +	struct work_struct work;
>> +};
>>  DEFINE_MUTEX(pcpu_drain_mutex);
>> -DEFINE_PER_CPU(struct work_struct, pcpu_drain);
>> +DEFINE_PER_CPU(struct pcpu_drain, pcpu_drain);
>>  
>>  #ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY
>>  volatile unsigned long latent_entropy __latent_entropy;
>> @@ -2596,6 +2600,8 @@ void drain_local_pages(struct zone *zone)
>>  
>>  static void drain_local_pages_wq(struct work_struct *work)
>>  {
>> +	struct pcpu_drain *drain =
>> +		container_of(work, struct pcpu_drain, work);
>>  	/*
>>  	 * drain_all_pages doesn't use proper cpu hotplug protection so
>>  	 * we can race with cpu offline when the WQ can move this from
>> @@ -2604,7 +2610,7 @@ static void drain_local_pages_wq(struct work_struct *work)
>>  	 * a different one.
>>  	 */
>>  	preempt_disable();
>> -	drain_local_pages(NULL);
>> +	drain_local_pages(drain->zone);
>>  	preempt_enable();
>>  }
>>  
>> @@ -2675,12 +2681,14 @@ void drain_all_pages(struct zone *zone)
>>  	}
>>  
>>  	for_each_cpu(cpu, &cpus_with_pcps) {
>> -		struct work_struct *work = per_cpu_ptr(&pcpu_drain, cpu);
>> -		INIT_WORK(work, drain_local_pages_wq);
>> -		queue_work_on(cpu, mm_percpu_wq, work);
>> +		struct pcpu_drain *drain = per_cpu_ptr(&pcpu_drain, cpu);
>> +
>> +		drain->zone = zone;
>> +		INIT_WORK(&drain->work, drain_local_pages_wq);
>> +		queue_work_on(cpu, mm_percpu_wq, &drain->work);
>>  	}
>>  	for_each_cpu(cpu, &cpus_with_pcps)
>> -		flush_work(per_cpu_ptr(&pcpu_drain, cpu));
>> +		flush_work(&per_cpu_ptr(&pcpu_drain, cpu)->work);
>>  
>>  	mutex_unlock(&pcpu_drain_mutex);
>>  }
>> -- 
>> 2.15.1
>> 
>
>-- 
>Michal Hocko
>SUSE Labs

-- 
Wei Yang
Help you, Help me

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v2] mm, page_alloc: enable pcpu_drain with zone capability
  2018-12-12  0:29 [PATCH] mm, page_alloc: enable pcpu_drain with zone capability Wei Yang
  2018-12-12 12:52 ` Michal Hocko
@ 2018-12-12 14:25 ` Wei Yang
  2018-12-12 14:47   ` Oscar Salvador
  2018-12-12 17:00   ` David Hildenbrand
  1 sibling, 2 replies; 8+ messages in thread
From: Wei Yang @ 2018-12-12 14:25 UTC (permalink / raw)
  To: linux-mm; +Cc: akpm, mhocko, osalvador, david, Wei Yang

drain_all_pages is documented to drain per-cpu pages for a given zone (if
non-NULL). The current implementation doesn't match the description though.
It will drain all pcp pages for all zones that happen to have cached pages
on the same cpu as the given zone. This will leave to premature pcp cache
draining for zones that are not of an interest for the caller - e.g.
compaction, hwpoison or memory offline.

This would force the page allocator to take locks and potential lock
contention as a result.

There is no real reason for this sub-optimal implementnation. Replace
per-cpu work item with a dedicated structure which contains a pointer to
zone and pass it over to the worker. This will get the zone information all
the way down to the worker function and do the right job.

[mhocko@suse.com: refactor the whole changelog]

Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
Acked-by: Michal Hocko <mhocko@suse.com>
---
v2:
   * refactor changelog from Michal's suggestion
---
 mm/page_alloc.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 65db26995466..eb4df3f63f5e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -96,8 +96,12 @@ int _node_numa_mem_[MAX_NUMNODES];
 #endif
 
 /* work_structs for global per-cpu drains */
+struct pcpu_drain {
+	struct zone *zone;
+	struct work_struct work;
+};
 DEFINE_MUTEX(pcpu_drain_mutex);
-DEFINE_PER_CPU(struct work_struct, pcpu_drain);
+DEFINE_PER_CPU(struct pcpu_drain, pcpu_drain);
 
 #ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY
 volatile unsigned long latent_entropy __latent_entropy;
@@ -2596,6 +2600,8 @@ void drain_local_pages(struct zone *zone)
 
 static void drain_local_pages_wq(struct work_struct *work)
 {
+	struct pcpu_drain *drain =
+		container_of(work, struct pcpu_drain, work);
 	/*
 	 * drain_all_pages doesn't use proper cpu hotplug protection so
 	 * we can race with cpu offline when the WQ can move this from
@@ -2604,7 +2610,7 @@ static void drain_local_pages_wq(struct work_struct *work)
 	 * a different one.
 	 */
 	preempt_disable();
-	drain_local_pages(NULL);
+	drain_local_pages(drain->zone);
 	preempt_enable();
 }
 
@@ -2675,12 +2681,14 @@ void drain_all_pages(struct zone *zone)
 	}
 
 	for_each_cpu(cpu, &cpus_with_pcps) {
-		struct work_struct *work = per_cpu_ptr(&pcpu_drain, cpu);
-		INIT_WORK(work, drain_local_pages_wq);
-		queue_work_on(cpu, mm_percpu_wq, work);
+		struct pcpu_drain *drain = per_cpu_ptr(&pcpu_drain, cpu);
+
+		drain->zone = zone;
+		INIT_WORK(&drain->work, drain_local_pages_wq);
+		queue_work_on(cpu, mm_percpu_wq, &drain->work);
 	}
 	for_each_cpu(cpu, &cpus_with_pcps)
-		flush_work(per_cpu_ptr(&pcpu_drain, cpu));
+		flush_work(&per_cpu_ptr(&pcpu_drain, cpu)->work);
 
 	mutex_unlock(&pcpu_drain_mutex);
 }
-- 
2.15.1

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] mm, page_alloc: enable pcpu_drain with zone capability
  2018-12-12 14:25 ` [PATCH v2] " Wei Yang
@ 2018-12-12 14:47   ` Oscar Salvador
  2018-12-12 14:57     ` Wei Yang
  2018-12-12 17:00   ` David Hildenbrand
  1 sibling, 1 reply; 8+ messages in thread
From: Oscar Salvador @ 2018-12-12 14:47 UTC (permalink / raw)
  To: Wei Yang; +Cc: linux-mm, akpm, mhocko, david

On Wed, Dec 12, 2018 at 10:25:50PM +0800, Wei Yang wrote:
> drain_all_pages is documented to drain per-cpu pages for a given zone (if
> non-NULL). The current implementation doesn't match the description though.
> It will drain all pcp pages for all zones that happen to have cached pages
> on the same cpu as the given zone. This will leave to premature pcp cache
> draining for zones that are not of an interest for the caller - e.g.
> compaction, hwpoison or memory offline.
> 
> This would force the page allocator to take locks and potential lock
> contention as a result.
> 
> There is no real reason for this sub-optimal implementnation. Replace
> per-cpu work item with a dedicated structure which contains a pointer to
> zone and pass it over to the worker. This will get the zone information all
> the way down to the worker function and do the right job.
> 
> [mhocko@suse.com: refactor the whole changelog]
> 
> Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
> Acked-by: Michal Hocko <mhocko@suse.com>

Looks to me

Reviewed-by: Oscar Salvador <osalvador@suse.de>

thanks

-- 
Oscar Salvador
SUSE L3

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] mm, page_alloc: enable pcpu_drain with zone capability
  2018-12-12 14:47   ` Oscar Salvador
@ 2018-12-12 14:57     ` Wei Yang
  0 siblings, 0 replies; 8+ messages in thread
From: Wei Yang @ 2018-12-12 14:57 UTC (permalink / raw)
  To: Oscar Salvador; +Cc: Wei Yang, linux-mm, akpm, mhocko, david

On Wed, Dec 12, 2018 at 03:47:02PM +0100, Oscar Salvador wrote:
>On Wed, Dec 12, 2018 at 10:25:50PM +0800, Wei Yang wrote:
>> drain_all_pages is documented to drain per-cpu pages for a given zone (if
>> non-NULL). The current implementation doesn't match the description though.
>> It will drain all pcp pages for all zones that happen to have cached pages
>> on the same cpu as the given zone. This will leave to premature pcp cache
>> draining for zones that are not of an interest for the caller - e.g.
>> compaction, hwpoison or memory offline.
>> 
>> This would force the page allocator to take locks and potential lock
>> contention as a result.
>> 
>> There is no real reason for this sub-optimal implementnation. Replace
>> per-cpu work item with a dedicated structure which contains a pointer to
>> zone and pass it over to the worker. This will get the zone information all
>> the way down to the worker function and do the right job.
>> 
>> [mhocko@suse.com: refactor the whole changelog]
>> 
>> Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
>> Acked-by: Michal Hocko <mhocko@suse.com>
>
>Looks to me
>
>Reviewed-by: Oscar Salvador <osalvador@suse.de>
>
>thanks

Thanks :-)

>
>-- 
>Oscar Salvador
>SUSE L3

-- 
Wei Yang
Help you, Help me

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] mm, page_alloc: enable pcpu_drain with zone capability
  2018-12-12 14:25 ` [PATCH v2] " Wei Yang
  2018-12-12 14:47   ` Oscar Salvador
@ 2018-12-12 17:00   ` David Hildenbrand
  2018-12-13  1:18     ` Wei Yang
  1 sibling, 1 reply; 8+ messages in thread
From: David Hildenbrand @ 2018-12-12 17:00 UTC (permalink / raw)
  To: Wei Yang, linux-mm; +Cc: akpm, mhocko, osalvador

On 12.12.18 15:25, Wei Yang wrote:
> drain_all_pages is documented to drain per-cpu pages for a given zone (if
> non-NULL). The current implementation doesn't match the description though.
> It will drain all pcp pages for all zones that happen to have cached pages
> on the same cpu as the given zone. This will leave to premature pcp cache
> draining for zones that are not of an interest for the caller - e.g.
> compaction, hwpoison or memory offline.
> 
> This would force the page allocator to take locks and potential lock
> contention as a result.
> 
> There is no real reason for this sub-optimal implementnation. Replace
> per-cpu work item with a dedicated structure which contains a pointer to
> zone and pass it over to the worker. This will get the zone information all
> the way down to the worker function and do the right job.
> 
> [mhocko@suse.com: refactor the whole changelog]
> 
> Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
> Acked-by: Michal Hocko <mhocko@suse.com>
> ---
> v2:
>    * refactor changelog from Michal's suggestion
> ---
>  mm/page_alloc.c | 20 ++++++++++++++------
>  1 file changed, 14 insertions(+), 6 deletions(-)
> 
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 65db26995466..eb4df3f63f5e 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -96,8 +96,12 @@ int _node_numa_mem_[MAX_NUMNODES];
>  #endif
>  
>  /* work_structs for global per-cpu drains */

s/work_structs/work_struct/ ?

> +struct pcpu_drain {
> +	struct zone *zone;
> +	struct work_struct work;
> +};
>  DEFINE_MUTEX(pcpu_drain_mutex);
> -DEFINE_PER_CPU(struct work_struct, pcpu_drain);
> +DEFINE_PER_CPU(struct pcpu_drain, pcpu_drain);
>  
>  #ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY
>  volatile unsigned long latent_entropy __latent_entropy;
> @@ -2596,6 +2600,8 @@ void drain_local_pages(struct zone *zone)
>  
>  static void drain_local_pages_wq(struct work_struct *work)
>  {
> +	struct pcpu_drain *drain =
> +		container_of(work, struct pcpu_drain, work);
>  	/*
>  	 * drain_all_pages doesn't use proper cpu hotplug protection so
>  	 * we can race with cpu offline when the WQ can move this from
> @@ -2604,7 +2610,7 @@ static void drain_local_pages_wq(struct work_struct *work)
>  	 * a different one.
>  	 */
>  	preempt_disable();
> -	drain_local_pages(NULL);
> +	drain_local_pages(drain->zone);
>  	preempt_enable();
>  }
>  
> @@ -2675,12 +2681,14 @@ void drain_all_pages(struct zone *zone)
>  	}
>  
>  	for_each_cpu(cpu, &cpus_with_pcps) {
> -		struct work_struct *work = per_cpu_ptr(&pcpu_drain, cpu);
> -		INIT_WORK(work, drain_local_pages_wq);
> -		queue_work_on(cpu, mm_percpu_wq, work);
> +		struct pcpu_drain *drain = per_cpu_ptr(&pcpu_drain, cpu);
> +
> +		drain->zone = zone;
> +		INIT_WORK(&drain->work, drain_local_pages_wq);
> +		queue_work_on(cpu, mm_percpu_wq, &drain->work);
>  	}
>  	for_each_cpu(cpu, &cpus_with_pcps)
> -		flush_work(per_cpu_ptr(&pcpu_drain, cpu));
> +		flush_work(&per_cpu_ptr(&pcpu_drain, cpu)->work);
>  
>  	mutex_unlock(&pcpu_drain_mutex);
>  }
> 

Looks good to me!

Reviewed-by: David Hildenbrand <david@redhat.com>


-- 

Thanks,

David / dhildenb

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] mm, page_alloc: enable pcpu_drain with zone capability
  2018-12-12 17:00   ` David Hildenbrand
@ 2018-12-13  1:18     ` Wei Yang
  0 siblings, 0 replies; 8+ messages in thread
From: Wei Yang @ 2018-12-13  1:18 UTC (permalink / raw)
  To: David Hildenbrand; +Cc: Wei Yang, linux-mm, akpm, mhocko, osalvador

On Wed, Dec 12, 2018 at 06:00:49PM +0100, David Hildenbrand wrote:
>On 12.12.18 15:25, Wei Yang wrote:
>> drain_all_pages is documented to drain per-cpu pages for a given zone (if
>> non-NULL). The current implementation doesn't match the description though.
>> It will drain all pcp pages for all zones that happen to have cached pages
>> on the same cpu as the given zone. This will leave to premature pcp cache
>> draining for zones that are not of an interest for the caller - e.g.
>> compaction, hwpoison or memory offline.
>> 
>> This would force the page allocator to take locks and potential lock
>> contention as a result.
>> 
>> There is no real reason for this sub-optimal implementnation. Replace
>> per-cpu work item with a dedicated structure which contains a pointer to
>> zone and pass it over to the worker. This will get the zone information all
>> the way down to the worker function and do the right job.
>> 
>> [mhocko@suse.com: refactor the whole changelog]
>> 
>> Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
>> Acked-by: Michal Hocko <mhocko@suse.com>
>> ---
>> v2:
>>    * refactor changelog from Michal's suggestion
>> ---
>>  mm/page_alloc.c | 20 ++++++++++++++------
>>  1 file changed, 14 insertions(+), 6 deletions(-)
>> 
>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>> index 65db26995466..eb4df3f63f5e 100644
>> --- a/mm/page_alloc.c
>> +++ b/mm/page_alloc.c
>> @@ -96,8 +96,12 @@ int _node_numa_mem_[MAX_NUMNODES];
>>  #endif
>>  
>>  /* work_structs for global per-cpu drains */
>
>s/work_structs/work_struct/ ?

Maybe the original comment wants to use plural form to mean there are
totally several work_structs since each cpu gets one?

>
>> +struct pcpu_drain {
>> +	struct zone *zone;
>> +	struct work_struct work;
>> +};
>>  DEFINE_MUTEX(pcpu_drain_mutex);
>> -DEFINE_PER_CPU(struct work_struct, pcpu_drain);
>> +DEFINE_PER_CPU(struct pcpu_drain, pcpu_drain);
>>  
>>  #ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY
>>  volatile unsigned long latent_entropy __latent_entropy;
>> @@ -2596,6 +2600,8 @@ void drain_local_pages(struct zone *zone)
>>  
>>  static void drain_local_pages_wq(struct work_struct *work)
>>  {
>> +	struct pcpu_drain *drain =
>> +		container_of(work, struct pcpu_drain, work);
>>  	/*
>>  	 * drain_all_pages doesn't use proper cpu hotplug protection so
>>  	 * we can race with cpu offline when the WQ can move this from
>> @@ -2604,7 +2610,7 @@ static void drain_local_pages_wq(struct work_struct *work)
>>  	 * a different one.
>>  	 */
>>  	preempt_disable();
>> -	drain_local_pages(NULL);
>> +	drain_local_pages(drain->zone);
>>  	preempt_enable();
>>  }
>>  
>> @@ -2675,12 +2681,14 @@ void drain_all_pages(struct zone *zone)
>>  	}
>>  
>>  	for_each_cpu(cpu, &cpus_with_pcps) {
>> -		struct work_struct *work = per_cpu_ptr(&pcpu_drain, cpu);
>> -		INIT_WORK(work, drain_local_pages_wq);
>> -		queue_work_on(cpu, mm_percpu_wq, work);
>> +		struct pcpu_drain *drain = per_cpu_ptr(&pcpu_drain, cpu);
>> +
>> +		drain->zone = zone;
>> +		INIT_WORK(&drain->work, drain_local_pages_wq);
>> +		queue_work_on(cpu, mm_percpu_wq, &drain->work);
>>  	}
>>  	for_each_cpu(cpu, &cpus_with_pcps)
>> -		flush_work(per_cpu_ptr(&pcpu_drain, cpu));
>> +		flush_work(&per_cpu_ptr(&pcpu_drain, cpu)->work);
>>  
>>  	mutex_unlock(&pcpu_drain_mutex);
>>  }
>> 
>
>Looks good to me!
>
>Reviewed-by: David Hildenbrand <david@redhat.com>
>
>
>-- 
>
>Thanks,
>
>David / dhildenb

-- 
Wei Yang
Help you, Help me

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2018-12-13  1:18 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-12-12  0:29 [PATCH] mm, page_alloc: enable pcpu_drain with zone capability Wei Yang
2018-12-12 12:52 ` Michal Hocko
2018-12-12 14:22   ` Wei Yang
2018-12-12 14:25 ` [PATCH v2] " Wei Yang
2018-12-12 14:47   ` Oscar Salvador
2018-12-12 14:57     ` Wei Yang
2018-12-12 17:00   ` David Hildenbrand
2018-12-13  1:18     ` Wei Yang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox