[PATCH] mm: migrate: record the mlocked page status to remove unnecessary lru drain

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

* [PATCH] mm: migrate: record the mlocked page status to remove unnecessary lru drain
@ 2023-10-18 13:04 Baolin Wang
  2023-10-18 14:00 ` Zi Yan
  0 siblings, 1 reply; 18+ messages in thread
From: Baolin Wang @ 2023-10-18 13:04 UTC (permalink / raw)
  To: akpm
  Cc: mgorman, hughd, vbabka, ying.huang, ziy, baolin.wang, linux-mm,
	linux-kernel

When doing compaction, I found the lru_add_drain() is an obvious hotspot
when migrating pages. The distribution of this hotspot is as follows:
   - 18.75% compact_zone
      - 17.39% migrate_pages
         - 13.79% migrate_pages_batch
            - 11.66% migrate_folio_move
               - 7.02% lru_add_drain
                  + 7.02% lru_add_drain_cpu
               + 3.00% move_to_new_folio
                 1.23% rmap_walk
            + 1.92% migrate_folio_unmap
         + 3.20% migrate_pages_sync
      + 0.90% isolate_migratepages

The lru_add_drain() was added by commit c3096e6782b7 ("mm/migrate:
__unmap_and_move() push good newpage to LRU") to drain the newpage to LRU
immediately, to help to build up the correct newpage->mlock_count in
remove_migration_ptes() for mlocked pages. However, if there are no mlocked
pages are migrating, then we can avoid this lru drain operation, especailly
for the heavy concurrent scenarios.

So we can record the source pages' mlocked status in migrate_folio_unmap(),
and only drain the lru list when the mlocked status is set in migrate_folio_move().
In addition, the page was already isolated from lru when migrating, so we
check the mlocked status is stable by folio_test_mlocked() in migrate_folio_unmap().

After this patch, I can see the hotpot of the lru_add_drain() is gone:
   - 9.41% migrate_pages_batch
      - 6.15% migrate_folio_move
         - 3.64% move_to_new_folio
            + 1.80% migrate_folio_extra
            + 1.70% buffer_migrate_folio
         + 1.41% rmap_walk
         + 0.62% folio_add_lru
      + 3.07% migrate_folio_unmap

Meanwhile, the compaction latency shows some improvements when running
thpscale:
                            base                   patched
Amean     fault-both-1      1131.22 (   0.00%)     1112.55 *   1.65%*
Amean     fault-both-3      2489.75 (   0.00%)     2324.15 *   6.65%*
Amean     fault-both-5      3257.37 (   0.00%)     3183.18 *   2.28%*
Amean     fault-both-7      4257.99 (   0.00%)     4079.04 *   4.20%*
Amean     fault-both-12     6614.02 (   0.00%)     6075.60 *   8.14%*
Amean     fault-both-18    10607.78 (   0.00%)     8978.86 *  15.36%*
Amean     fault-both-24    14911.65 (   0.00%)    11619.55 *  22.08%*
Amean     fault-both-30    14954.67 (   0.00%)    14925.66 *   0.19%*
Amean     fault-both-32    16654.87 (   0.00%)    15580.31 *   6.45%*

Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
---
 mm/migrate.c | 50 ++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 38 insertions(+), 12 deletions(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index 4caf405b6504..32c96f89710f 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1027,22 +1027,32 @@ union migration_ptr {
 	struct anon_vma *anon_vma;
 	struct address_space *mapping;
 };
+
+enum {
+	PAGE_WAS_MAPPED = 1 << 0,
+	PAGE_WAS_MLOCKED = 1 << 1,
+};
+
 static void __migrate_folio_record(struct folio *dst,
-				   unsigned long page_was_mapped,
+				   unsigned long page_flags,
 				   struct anon_vma *anon_vma)
 {
 	union migration_ptr ptr = { .anon_vma = anon_vma };
 	dst->mapping = ptr.mapping;
-	dst->private = (void *)page_was_mapped;
+	dst->private = (void *)page_flags;
 }
 
 static void __migrate_folio_extract(struct folio *dst,
 				   int *page_was_mappedp,
+				   int *page_was_mlocked,
 				   struct anon_vma **anon_vmap)
 {
 	union migration_ptr ptr = { .mapping = dst->mapping };
+	unsigned long page_flags = (unsigned long)dst->private;
+
 	*anon_vmap = ptr.anon_vma;
-	*page_was_mappedp = (unsigned long)dst->private;
+	*page_was_mappedp = page_flags & PAGE_WAS_MAPPED ? 1 : 0;
+	*page_was_mlocked = page_flags & PAGE_WAS_MLOCKED ? 1 : 0;
 	dst->mapping = NULL;
 	dst->private = NULL;
 }
@@ -1103,7 +1113,7 @@ static int migrate_folio_unmap(new_folio_t get_new_folio,
 {
 	struct folio *dst;
 	int rc = -EAGAIN;
-	int page_was_mapped = 0;
+	int page_was_mapped = 0, page_was_mlocked = 0;
 	struct anon_vma *anon_vma = NULL;
 	bool is_lru = !__folio_test_movable(src);
 	bool locked = false;
@@ -1157,6 +1167,7 @@ static int migrate_folio_unmap(new_folio_t get_new_folio,
 		folio_lock(src);
 	}
 	locked = true;
+	page_was_mlocked = folio_test_mlocked(src);
 
 	if (folio_test_writeback(src)) {
 		/*
@@ -1206,7 +1217,7 @@ static int migrate_folio_unmap(new_folio_t get_new_folio,
 	dst_locked = true;
 
 	if (unlikely(!is_lru)) {
-		__migrate_folio_record(dst, page_was_mapped, anon_vma);
+		__migrate_folio_record(dst, 0, anon_vma);
 		return MIGRATEPAGE_UNMAP;
 	}
 
@@ -1236,7 +1247,13 @@ static int migrate_folio_unmap(new_folio_t get_new_folio,
 	}
 
 	if (!folio_mapped(src)) {
-		__migrate_folio_record(dst, page_was_mapped, anon_vma);
+		unsigned int page_flags = 0;
+
+		if (page_was_mapped)
+			page_flags |= PAGE_WAS_MAPPED;
+		if (page_was_mlocked)
+			page_flags |= PAGE_WAS_MLOCKED;
+		__migrate_folio_record(dst, page_flags, anon_vma);
 		return MIGRATEPAGE_UNMAP;
 	}
 
@@ -1261,12 +1278,13 @@ static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private,
 			      struct list_head *ret)
 {
 	int rc;
-	int page_was_mapped = 0;
+	int page_was_mapped = 0, page_was_mlocked = 0;
 	struct anon_vma *anon_vma = NULL;
 	bool is_lru = !__folio_test_movable(src);
 	struct list_head *prev;
 
-	__migrate_folio_extract(dst, &page_was_mapped, &anon_vma);
+	__migrate_folio_extract(dst, &page_was_mapped,
+				&page_was_mlocked, &anon_vma);
 	prev = dst->lru.prev;
 	list_del(&dst->lru);
 
@@ -1287,7 +1305,7 @@ static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private,
 	 * isolated from the unevictable LRU: but this case is the easiest.
 	 */
 	folio_add_lru(dst);
-	if (page_was_mapped)
+	if (page_was_mlocked)
 		lru_add_drain();
 
 	if (page_was_mapped)
@@ -1321,8 +1339,15 @@ static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private,
 	 * right list unless we want to retry.
 	 */
 	if (rc == -EAGAIN) {
+		unsigned int page_flags = 0;
+
+		if (page_was_mapped)
+			page_flags |= PAGE_WAS_MAPPED;
+		if (page_was_mlocked)
+			page_flags |= PAGE_WAS_MLOCKED;
+
 		list_add(&dst->lru, prev);
-		__migrate_folio_record(dst, page_was_mapped, anon_vma);
+		__migrate_folio_record(dst, page_flags, anon_vma);
 		return rc;
 	}
 
@@ -1799,10 +1824,11 @@ static int migrate_pages_batch(struct list_head *from,
 	dst = list_first_entry(&dst_folios, struct folio, lru);
 	dst2 = list_next_entry(dst, lru);
 	list_for_each_entry_safe(folio, folio2, &unmap_folios, lru) {
-		int page_was_mapped = 0;
+		int page_was_mapped = 0, page_was_mlocked = 0;
 		struct anon_vma *anon_vma = NULL;
 
-		__migrate_folio_extract(dst, &page_was_mapped, &anon_vma);
+		__migrate_folio_extract(dst, &page_was_mapped,
+					&page_was_mlocked, &anon_vma);
 		migrate_folio_undo_src(folio, page_was_mapped, anon_vma,
 				       true, ret_folios);
 		list_del(&dst->lru);
-- 
2.39.3



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] mm: migrate: record the mlocked page status to remove unnecessary lru drain
  2023-10-18 13:04 [PATCH] mm: migrate: record the mlocked page status to remove unnecessary lru drain Baolin Wang
@ 2023-10-18 14:00 ` Zi Yan
  2023-10-19  6:09   ` Huang, Ying
  0 siblings, 1 reply; 18+ messages in thread
From: Zi Yan @ 2023-10-18 14:00 UTC (permalink / raw)
  To: Baolin Wang
  Cc: akpm, mgorman, hughd, vbabka, ying.huang, linux-mm, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 7910 bytes --]

On 18 Oct 2023, at 9:04, Baolin Wang wrote:

> When doing compaction, I found the lru_add_drain() is an obvious hotspot
> when migrating pages. The distribution of this hotspot is as follows:
>    - 18.75% compact_zone
>       - 17.39% migrate_pages
>          - 13.79% migrate_pages_batch
>             - 11.66% migrate_folio_move
>                - 7.02% lru_add_drain
>                   + 7.02% lru_add_drain_cpu
>                + 3.00% move_to_new_folio
>                  1.23% rmap_walk
>             + 1.92% migrate_folio_unmap
>          + 3.20% migrate_pages_sync
>       + 0.90% isolate_migratepages
>
> The lru_add_drain() was added by commit c3096e6782b7 ("mm/migrate:
> __unmap_and_move() push good newpage to LRU") to drain the newpage to LRU
> immediately, to help to build up the correct newpage->mlock_count in
> remove_migration_ptes() for mlocked pages. However, if there are no mlocked
> pages are migrating, then we can avoid this lru drain operation, especailly
> for the heavy concurrent scenarios.

lru_add_drain() is also used to drain pages out of folio_batch. Pages in folio_batch
have an additional pin to prevent migration. See folio_get(folio); in folio_add_lru().

>
> So we can record the source pages' mlocked status in migrate_folio_unmap(),
> and only drain the lru list when the mlocked status is set in migrate_folio_move().
> In addition, the page was already isolated from lru when migrating, so we
> check the mlocked status is stable by folio_test_mlocked() in migrate_folio_unmap().
>
> After this patch, I can see the hotpot of the lru_add_drain() is gone:
>    - 9.41% migrate_pages_batch
>       - 6.15% migrate_folio_move
>          - 3.64% move_to_new_folio
>             + 1.80% migrate_folio_extra
>             + 1.70% buffer_migrate_folio
>          + 1.41% rmap_walk
>          + 0.62% folio_add_lru
>       + 3.07% migrate_folio_unmap
>
> Meanwhile, the compaction latency shows some improvements when running
> thpscale:
>                             base                   patched
> Amean     fault-both-1      1131.22 (   0.00%)     1112.55 *   1.65%*
> Amean     fault-both-3      2489.75 (   0.00%)     2324.15 *   6.65%*
> Amean     fault-both-5      3257.37 (   0.00%)     3183.18 *   2.28%*
> Amean     fault-both-7      4257.99 (   0.00%)     4079.04 *   4.20%*
> Amean     fault-both-12     6614.02 (   0.00%)     6075.60 *   8.14%*
> Amean     fault-both-18    10607.78 (   0.00%)     8978.86 *  15.36%*
> Amean     fault-both-24    14911.65 (   0.00%)    11619.55 *  22.08%*
> Amean     fault-both-30    14954.67 (   0.00%)    14925.66 *   0.19%*
> Amean     fault-both-32    16654.87 (   0.00%)    15580.31 *   6.45%*
>
> Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
> ---
>  mm/migrate.c | 50 ++++++++++++++++++++++++++++++++++++++------------
>  1 file changed, 38 insertions(+), 12 deletions(-)
>
> diff --git a/mm/migrate.c b/mm/migrate.c
> index 4caf405b6504..32c96f89710f 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -1027,22 +1027,32 @@ union migration_ptr {
>  	struct anon_vma *anon_vma;
>  	struct address_space *mapping;
>  };
> +
> +enum {
> +	PAGE_WAS_MAPPED = 1 << 0,
> +	PAGE_WAS_MLOCKED = 1 << 1,
> +};
> +
>  static void __migrate_folio_record(struct folio *dst,
> -				   unsigned long page_was_mapped,
> +				   unsigned long page_flags,
>  				   struct anon_vma *anon_vma)
>  {
>  	union migration_ptr ptr = { .anon_vma = anon_vma };
>  	dst->mapping = ptr.mapping;
> -	dst->private = (void *)page_was_mapped;
> +	dst->private = (void *)page_flags;
>  }
>
>  static void __migrate_folio_extract(struct folio *dst,
>  				   int *page_was_mappedp,
> +				   int *page_was_mlocked,
>  				   struct anon_vma **anon_vmap)
>  {
>  	union migration_ptr ptr = { .mapping = dst->mapping };
> +	unsigned long page_flags = (unsigned long)dst->private;
> +
>  	*anon_vmap = ptr.anon_vma;
> -	*page_was_mappedp = (unsigned long)dst->private;
> +	*page_was_mappedp = page_flags & PAGE_WAS_MAPPED ? 1 : 0;
> +	*page_was_mlocked = page_flags & PAGE_WAS_MLOCKED ? 1 : 0;
>  	dst->mapping = NULL;
>  	dst->private = NULL;
>  }
> @@ -1103,7 +1113,7 @@ static int migrate_folio_unmap(new_folio_t get_new_folio,
>  {
>  	struct folio *dst;
>  	int rc = -EAGAIN;
> -	int page_was_mapped = 0;
> +	int page_was_mapped = 0, page_was_mlocked = 0;
>  	struct anon_vma *anon_vma = NULL;
>  	bool is_lru = !__folio_test_movable(src);
>  	bool locked = false;
> @@ -1157,6 +1167,7 @@ static int migrate_folio_unmap(new_folio_t get_new_folio,
>  		folio_lock(src);
>  	}
>  	locked = true;
> +	page_was_mlocked = folio_test_mlocked(src);
>
>  	if (folio_test_writeback(src)) {
>  		/*
> @@ -1206,7 +1217,7 @@ static int migrate_folio_unmap(new_folio_t get_new_folio,
>  	dst_locked = true;
>
>  	if (unlikely(!is_lru)) {
> -		__migrate_folio_record(dst, page_was_mapped, anon_vma);
> +		__migrate_folio_record(dst, 0, anon_vma);
>  		return MIGRATEPAGE_UNMAP;
>  	}
>
> @@ -1236,7 +1247,13 @@ static int migrate_folio_unmap(new_folio_t get_new_folio,
>  	}
>
>  	if (!folio_mapped(src)) {
> -		__migrate_folio_record(dst, page_was_mapped, anon_vma);
> +		unsigned int page_flags = 0;
> +
> +		if (page_was_mapped)
> +			page_flags |= PAGE_WAS_MAPPED;
> +		if (page_was_mlocked)
> +			page_flags |= PAGE_WAS_MLOCKED;
> +		__migrate_folio_record(dst, page_flags, anon_vma);
>  		return MIGRATEPAGE_UNMAP;
>  	}
>
> @@ -1261,12 +1278,13 @@ static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private,
>  			      struct list_head *ret)
>  {
>  	int rc;
> -	int page_was_mapped = 0;
> +	int page_was_mapped = 0, page_was_mlocked = 0;
>  	struct anon_vma *anon_vma = NULL;
>  	bool is_lru = !__folio_test_movable(src);
>  	struct list_head *prev;
>
> -	__migrate_folio_extract(dst, &page_was_mapped, &anon_vma);
> +	__migrate_folio_extract(dst, &page_was_mapped,
> +				&page_was_mlocked, &anon_vma);

It is better to read out the flag, then check page_was_mapped and page_was_mlocked
to avoid future __migrate_folio_extract() interface churns.

>  	prev = dst->lru.prev;
>  	list_del(&dst->lru);
>
> @@ -1287,7 +1305,7 @@ static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private,
>  	 * isolated from the unevictable LRU: but this case is the easiest.
>  	 */
>  	folio_add_lru(dst);
> -	if (page_was_mapped)
> +	if (page_was_mlocked)
>  		lru_add_drain();

Like I said at the top, this would be if (page_was_mapped || page_was_mlocked).

>
>  	if (page_was_mapped)
> @@ -1321,8 +1339,15 @@ static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private,
>  	 * right list unless we want to retry.
>  	 */
>  	if (rc == -EAGAIN) {
> +		unsigned int page_flags = 0;
> +
> +		if (page_was_mapped)
> +			page_flags |= PAGE_WAS_MAPPED;
> +		if (page_was_mlocked)
> +			page_flags |= PAGE_WAS_MLOCKED;
> +
>  		list_add(&dst->lru, prev);
> -		__migrate_folio_record(dst, page_was_mapped, anon_vma);
> +		__migrate_folio_record(dst, page_flags, anon_vma);
>  		return rc;
>  	}
>
> @@ -1799,10 +1824,11 @@ static int migrate_pages_batch(struct list_head *from,
>  	dst = list_first_entry(&dst_folios, struct folio, lru);
>  	dst2 = list_next_entry(dst, lru);
>  	list_for_each_entry_safe(folio, folio2, &unmap_folios, lru) {
> -		int page_was_mapped = 0;
> +		int page_was_mapped = 0, page_was_mlocked = 0;
>  		struct anon_vma *anon_vma = NULL;
>
> -		__migrate_folio_extract(dst, &page_was_mapped, &anon_vma);
> +		__migrate_folio_extract(dst, &page_was_mapped,
> +					&page_was_mlocked, &anon_vma);
>  		migrate_folio_undo_src(folio, page_was_mapped, anon_vma,
>  				       true, ret_folios);
>  		list_del(&dst->lru);
> -- 
> 2.39.3


--
Best Regards,
Yan, Zi

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 854 bytes --]

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] mm: migrate: record the mlocked page status to remove unnecessary lru drain
  2023-10-18 14:00 ` Zi Yan
@ 2023-10-19  6:09   ` Huang, Ying
  2023-10-19  7:25     ` Baolin Wang
  2023-10-19 13:23     ` Zi Yan
  0 siblings, 2 replies; 18+ messages in thread
From: Huang, Ying @ 2023-10-19  6:09 UTC (permalink / raw)
  To: Zi Yan
  Cc: Baolin Wang, akpm, mgorman, hughd, vbabka, linux-mm,
	linux-kernel, Fengwei Yin

Zi Yan <ziy@nvidia.com> writes:

> On 18 Oct 2023, at 9:04, Baolin Wang wrote:
>
>> When doing compaction, I found the lru_add_drain() is an obvious hotspot
>> when migrating pages. The distribution of this hotspot is as follows:
>>    - 18.75% compact_zone
>>       - 17.39% migrate_pages
>>          - 13.79% migrate_pages_batch
>>             - 11.66% migrate_folio_move
>>                - 7.02% lru_add_drain
>>                   + 7.02% lru_add_drain_cpu
>>                + 3.00% move_to_new_folio
>>                  1.23% rmap_walk
>>             + 1.92% migrate_folio_unmap
>>          + 3.20% migrate_pages_sync
>>       + 0.90% isolate_migratepages
>>
>> The lru_add_drain() was added by commit c3096e6782b7 ("mm/migrate:
>> __unmap_and_move() push good newpage to LRU") to drain the newpage to LRU
>> immediately, to help to build up the correct newpage->mlock_count in
>> remove_migration_ptes() for mlocked pages. However, if there are no mlocked
>> pages are migrating, then we can avoid this lru drain operation, especailly
>> for the heavy concurrent scenarios.
>
> lru_add_drain() is also used to drain pages out of folio_batch. Pages in folio_batch
> have an additional pin to prevent migration. See folio_get(folio); in folio_add_lru().

lru_add_drain() is called after the page reference count checking in
move_to_new_folio().  So, I don't this is an issue.

>>
>> So we can record the source pages' mlocked status in migrate_folio_unmap(),
>> and only drain the lru list when the mlocked status is set in migrate_folio_move().
>> In addition, the page was already isolated from lru when migrating, so we
>> check the mlocked status is stable by folio_test_mlocked() in migrate_folio_unmap().
>>
>> After this patch, I can see the hotpot of the lru_add_drain() is gone:
>>    - 9.41% migrate_pages_batch
>>       - 6.15% migrate_folio_move
>>          - 3.64% move_to_new_folio
>>             + 1.80% migrate_folio_extra
>>             + 1.70% buffer_migrate_folio
>>          + 1.41% rmap_walk
>>          + 0.62% folio_add_lru
>>       + 3.07% migrate_folio_unmap
>>
>> Meanwhile, the compaction latency shows some improvements when running
>> thpscale:
>>                             base                   patched
>> Amean     fault-both-1      1131.22 (   0.00%)     1112.55 *   1.65%*
>> Amean     fault-both-3      2489.75 (   0.00%)     2324.15 *   6.65%*
>> Amean     fault-both-5      3257.37 (   0.00%)     3183.18 *   2.28%*
>> Amean     fault-both-7      4257.99 (   0.00%)     4079.04 *   4.20%*
>> Amean     fault-both-12     6614.02 (   0.00%)     6075.60 *   8.14%*
>> Amean     fault-both-18    10607.78 (   0.00%)     8978.86 *  15.36%*
>> Amean     fault-both-24    14911.65 (   0.00%)    11619.55 *  22.08%*
>> Amean     fault-both-30    14954.67 (   0.00%)    14925.66 *   0.19%*
>> Amean     fault-both-32    16654.87 (   0.00%)    15580.31 *   6.45%*
>>
>> Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
>> ---
>>  mm/migrate.c | 50 ++++++++++++++++++++++++++++++++++++++------------
>>  1 file changed, 38 insertions(+), 12 deletions(-)
>>
>> diff --git a/mm/migrate.c b/mm/migrate.c
>> index 4caf405b6504..32c96f89710f 100644
>> --- a/mm/migrate.c
>> +++ b/mm/migrate.c
>> @@ -1027,22 +1027,32 @@ union migration_ptr {
>>  	struct anon_vma *anon_vma;
>>  	struct address_space *mapping;
>>  };
>> +
>> +enum {
>> +	PAGE_WAS_MAPPED = 1 << 0,
>> +	PAGE_WAS_MLOCKED = 1 << 1,
>> +};
>> +
>>  static void __migrate_folio_record(struct folio *dst,
>> -				   unsigned long page_was_mapped,
>> +				   unsigned long page_flags,
>>  				   struct anon_vma *anon_vma)
>>  {
>>  	union migration_ptr ptr = { .anon_vma = anon_vma };
>>  	dst->mapping = ptr.mapping;
>> -	dst->private = (void *)page_was_mapped;
>> +	dst->private = (void *)page_flags;
>>  }
>>
>>  static void __migrate_folio_extract(struct folio *dst,
>>  				   int *page_was_mappedp,
>> +				   int *page_was_mlocked,
>>  				   struct anon_vma **anon_vmap)
>>  {
>>  	union migration_ptr ptr = { .mapping = dst->mapping };
>> +	unsigned long page_flags = (unsigned long)dst->private;
>> +
>>  	*anon_vmap = ptr.anon_vma;
>> -	*page_was_mappedp = (unsigned long)dst->private;
>> +	*page_was_mappedp = page_flags & PAGE_WAS_MAPPED ? 1 : 0;
>> +	*page_was_mlocked = page_flags & PAGE_WAS_MLOCKED ? 1 : 0;
>>  	dst->mapping = NULL;
>>  	dst->private = NULL;
>>  }
>> @@ -1103,7 +1113,7 @@ static int migrate_folio_unmap(new_folio_t get_new_folio,
>>  {
>>  	struct folio *dst;
>>  	int rc = -EAGAIN;
>> -	int page_was_mapped = 0;
>> +	int page_was_mapped = 0, page_was_mlocked = 0;
>>  	struct anon_vma *anon_vma = NULL;
>>  	bool is_lru = !__folio_test_movable(src);
>>  	bool locked = false;
>> @@ -1157,6 +1167,7 @@ static int migrate_folio_unmap(new_folio_t get_new_folio,
>>  		folio_lock(src);
>>  	}
>>  	locked = true;
>> +	page_was_mlocked = folio_test_mlocked(src);
>>
>>  	if (folio_test_writeback(src)) {
>>  		/*
>> @@ -1206,7 +1217,7 @@ static int migrate_folio_unmap(new_folio_t get_new_folio,
>>  	dst_locked = true;
>>
>>  	if (unlikely(!is_lru)) {
>> -		__migrate_folio_record(dst, page_was_mapped, anon_vma);
>> +		__migrate_folio_record(dst, 0, anon_vma);
>>  		return MIGRATEPAGE_UNMAP;
>>  	}
>>
>> @@ -1236,7 +1247,13 @@ static int migrate_folio_unmap(new_folio_t get_new_folio,
>>  	}
>>
>>  	if (!folio_mapped(src)) {
>> -		__migrate_folio_record(dst, page_was_mapped, anon_vma);
>> +		unsigned int page_flags = 0;
>> +
>> +		if (page_was_mapped)
>> +			page_flags |= PAGE_WAS_MAPPED;
>> +		if (page_was_mlocked)
>> +			page_flags |= PAGE_WAS_MLOCKED;
>> +		__migrate_folio_record(dst, page_flags, anon_vma);
>>  		return MIGRATEPAGE_UNMAP;
>>  	}
>>
>> @@ -1261,12 +1278,13 @@ static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private,
>>  			      struct list_head *ret)
>>  {
>>  	int rc;
>> -	int page_was_mapped = 0;
>> +	int page_was_mapped = 0, page_was_mlocked = 0;
>>  	struct anon_vma *anon_vma = NULL;
>>  	bool is_lru = !__folio_test_movable(src);
>>  	struct list_head *prev;
>>
>> -	__migrate_folio_extract(dst, &page_was_mapped, &anon_vma);
>> +	__migrate_folio_extract(dst, &page_was_mapped,
>> +				&page_was_mlocked, &anon_vma);
>
> It is better to read out the flag, then check page_was_mapped and page_was_mlocked
> to avoid future __migrate_folio_extract() interface churns.

IHMO, in contrast, it's better to use separate flags in
__migrate_folio_record() too to avoid to pack flags in each call site.

>>  	prev = dst->lru.prev;
>>  	list_del(&dst->lru);
>>
>> @@ -1287,7 +1305,7 @@ static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private,
>>  	 * isolated from the unevictable LRU: but this case is the easiest.
>>  	 */
>>  	folio_add_lru(dst);
>> -	if (page_was_mapped)
>> +	if (page_was_mlocked)
>>  		lru_add_drain();
>
> Like I said at the top, this would be if (page_was_mapped || page_was_mlocked).
>
>>
>>  	if (page_was_mapped)
>> @@ -1321,8 +1339,15 @@ static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private,
>>  	 * right list unless we want to retry.
>>  	 */
>>  	if (rc == -EAGAIN) {
>> +		unsigned int page_flags = 0;
>> +
>> +		if (page_was_mapped)
>> +			page_flags |= PAGE_WAS_MAPPED;
>> +		if (page_was_mlocked)
>> +			page_flags |= PAGE_WAS_MLOCKED;
>> +
>>  		list_add(&dst->lru, prev);
>> -		__migrate_folio_record(dst, page_was_mapped, anon_vma);
>> +		__migrate_folio_record(dst, page_flags, anon_vma);
>>  		return rc;
>>  	}
>>
>> @@ -1799,10 +1824,11 @@ static int migrate_pages_batch(struct list_head *from,
>>  	dst = list_first_entry(&dst_folios, struct folio, lru);
>>  	dst2 = list_next_entry(dst, lru);
>>  	list_for_each_entry_safe(folio, folio2, &unmap_folios, lru) {
>> -		int page_was_mapped = 0;
>> +		int page_was_mapped = 0, page_was_mlocked = 0;
>>  		struct anon_vma *anon_vma = NULL;
>>
>> -		__migrate_folio_extract(dst, &page_was_mapped, &anon_vma);
>> +		__migrate_folio_extract(dst, &page_was_mapped,
>> +					&page_was_mlocked, &anon_vma);
>>  		migrate_folio_undo_src(folio, page_was_mapped, anon_vma,
>>  				       true, ret_folios);
>>  		list_del(&dst->lru);
>> -- 
>> 2.39.3

--
Best Regards,
Huang, Ying


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] mm: migrate: record the mlocked page status to remove unnecessary lru drain
  2023-10-19  6:09   ` Huang, Ying
@ 2023-10-19  7:25     ` Baolin Wang
  2023-10-19  8:22       ` Yin Fengwei
  2023-10-19 13:23     ` Zi Yan
  1 sibling, 1 reply; 18+ messages in thread
From: Baolin Wang @ 2023-10-19  7:25 UTC (permalink / raw)
  To: Huang, Ying, Zi Yan
  Cc: akpm, mgorman, hughd, vbabka, linux-mm, linux-kernel, Fengwei Yin



On 10/19/2023 2:09 PM, Huang, Ying wrote:
> Zi Yan <ziy@nvidia.com> writes:
> 
>> On 18 Oct 2023, at 9:04, Baolin Wang wrote:
>>
>>> When doing compaction, I found the lru_add_drain() is an obvious hotspot
>>> when migrating pages. The distribution of this hotspot is as follows:
>>>     - 18.75% compact_zone
>>>        - 17.39% migrate_pages
>>>           - 13.79% migrate_pages_batch
>>>              - 11.66% migrate_folio_move
>>>                 - 7.02% lru_add_drain
>>>                    + 7.02% lru_add_drain_cpu
>>>                 + 3.00% move_to_new_folio
>>>                   1.23% rmap_walk
>>>              + 1.92% migrate_folio_unmap
>>>           + 3.20% migrate_pages_sync
>>>        + 0.90% isolate_migratepages
>>>
>>> The lru_add_drain() was added by commit c3096e6782b7 ("mm/migrate:
>>> __unmap_and_move() push good newpage to LRU") to drain the newpage to LRU
>>> immediately, to help to build up the correct newpage->mlock_count in
>>> remove_migration_ptes() for mlocked pages. However, if there are no mlocked
>>> pages are migrating, then we can avoid this lru drain operation, especailly
>>> for the heavy concurrent scenarios.
>>
>> lru_add_drain() is also used to drain pages out of folio_batch. Pages in folio_batch
>> have an additional pin to prevent migration. See folio_get(folio); in folio_add_lru().
> 
> lru_add_drain() is called after the page reference count checking in
> move_to_new_folio().  So, I don't this is an issue.

Agree. The purpose of adding lru_add_drain() is to address the 
'mlock_count' issue for mlocked pages. Please see commit c3096e6782b7 
and related comments. Moreover I haven't seen an increase in the number 
of page migration failures due to page reference count checking after 
this patch.

>>> So we can record the source pages' mlocked status in migrate_folio_unmap(),
>>> and only drain the lru list when the mlocked status is set in migrate_folio_move().
>>> In addition, the page was already isolated from lru when migrating, so we
>>> check the mlocked status is stable by folio_test_mlocked() in migrate_folio_unmap().
>>>
>>> After this patch, I can see the hotpot of the lru_add_drain() is gone:
>>>     - 9.41% migrate_pages_batch
>>>        - 6.15% migrate_folio_move
>>>           - 3.64% move_to_new_folio
>>>              + 1.80% migrate_folio_extra
>>>              + 1.70% buffer_migrate_folio
>>>           + 1.41% rmap_walk
>>>           + 0.62% folio_add_lru
>>>        + 3.07% migrate_folio_unmap
>>>
>>> Meanwhile, the compaction latency shows some improvements when running
>>> thpscale:
>>>                              base                   patched
>>> Amean     fault-both-1      1131.22 (   0.00%)     1112.55 *   1.65%*
>>> Amean     fault-both-3      2489.75 (   0.00%)     2324.15 *   6.65%*
>>> Amean     fault-both-5      3257.37 (   0.00%)     3183.18 *   2.28%*
>>> Amean     fault-both-7      4257.99 (   0.00%)     4079.04 *   4.20%*
>>> Amean     fault-both-12     6614.02 (   0.00%)     6075.60 *   8.14%*
>>> Amean     fault-both-18    10607.78 (   0.00%)     8978.86 *  15.36%*
>>> Amean     fault-both-24    14911.65 (   0.00%)    11619.55 *  22.08%*
>>> Amean     fault-both-30    14954.67 (   0.00%)    14925.66 *   0.19%*
>>> Amean     fault-both-32    16654.87 (   0.00%)    15580.31 *   6.45%*
>>>
>>> Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
>>> ---
>>>   mm/migrate.c | 50 ++++++++++++++++++++++++++++++++++++++------------
>>>   1 file changed, 38 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/mm/migrate.c b/mm/migrate.c
>>> index 4caf405b6504..32c96f89710f 100644
>>> --- a/mm/migrate.c
>>> +++ b/mm/migrate.c
>>> @@ -1027,22 +1027,32 @@ union migration_ptr {
>>>   	struct anon_vma *anon_vma;
>>>   	struct address_space *mapping;
>>>   };
>>> +
>>> +enum {
>>> +	PAGE_WAS_MAPPED = 1 << 0,
>>> +	PAGE_WAS_MLOCKED = 1 << 1,
>>> +};
>>> +
>>>   static void __migrate_folio_record(struct folio *dst,
>>> -				   unsigned long page_was_mapped,
>>> +				   unsigned long page_flags,
>>>   				   struct anon_vma *anon_vma)
>>>   {
>>>   	union migration_ptr ptr = { .anon_vma = anon_vma };
>>>   	dst->mapping = ptr.mapping;
>>> -	dst->private = (void *)page_was_mapped;
>>> +	dst->private = (void *)page_flags;
>>>   }
>>>
>>>   static void __migrate_folio_extract(struct folio *dst,
>>>   				   int *page_was_mappedp,
>>> +				   int *page_was_mlocked,
>>>   				   struct anon_vma **anon_vmap)
>>>   {
>>>   	union migration_ptr ptr = { .mapping = dst->mapping };
>>> +	unsigned long page_flags = (unsigned long)dst->private;
>>> +
>>>   	*anon_vmap = ptr.anon_vma;
>>> -	*page_was_mappedp = (unsigned long)dst->private;
>>> +	*page_was_mappedp = page_flags & PAGE_WAS_MAPPED ? 1 : 0;
>>> +	*page_was_mlocked = page_flags & PAGE_WAS_MLOCKED ? 1 : 0;
>>>   	dst->mapping = NULL;
>>>   	dst->private = NULL;
>>>   }
>>> @@ -1103,7 +1113,7 @@ static int migrate_folio_unmap(new_folio_t get_new_folio,
>>>   {
>>>   	struct folio *dst;
>>>   	int rc = -EAGAIN;
>>> -	int page_was_mapped = 0;
>>> +	int page_was_mapped = 0, page_was_mlocked = 0;
>>>   	struct anon_vma *anon_vma = NULL;
>>>   	bool is_lru = !__folio_test_movable(src);
>>>   	bool locked = false;
>>> @@ -1157,6 +1167,7 @@ static int migrate_folio_unmap(new_folio_t get_new_folio,
>>>   		folio_lock(src);
>>>   	}
>>>   	locked = true;
>>> +	page_was_mlocked = folio_test_mlocked(src);
>>>
>>>   	if (folio_test_writeback(src)) {
>>>   		/*
>>> @@ -1206,7 +1217,7 @@ static int migrate_folio_unmap(new_folio_t get_new_folio,
>>>   	dst_locked = true;
>>>
>>>   	if (unlikely(!is_lru)) {
>>> -		__migrate_folio_record(dst, page_was_mapped, anon_vma);
>>> +		__migrate_folio_record(dst, 0, anon_vma);
>>>   		return MIGRATEPAGE_UNMAP;
>>>   	}
>>>
>>> @@ -1236,7 +1247,13 @@ static int migrate_folio_unmap(new_folio_t get_new_folio,
>>>   	}
>>>
>>>   	if (!folio_mapped(src)) {
>>> -		__migrate_folio_record(dst, page_was_mapped, anon_vma);
>>> +		unsigned int page_flags = 0;
>>> +
>>> +		if (page_was_mapped)
>>> +			page_flags |= PAGE_WAS_MAPPED;
>>> +		if (page_was_mlocked)
>>> +			page_flags |= PAGE_WAS_MLOCKED;
>>> +		__migrate_folio_record(dst, page_flags, anon_vma);
>>>   		return MIGRATEPAGE_UNMAP;
>>>   	}
>>>
>>> @@ -1261,12 +1278,13 @@ static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private,
>>>   			      struct list_head *ret)
>>>   {
>>>   	int rc;
>>> -	int page_was_mapped = 0;
>>> +	int page_was_mapped = 0, page_was_mlocked = 0;
>>>   	struct anon_vma *anon_vma = NULL;
>>>   	bool is_lru = !__folio_test_movable(src);
>>>   	struct list_head *prev;
>>>
>>> -	__migrate_folio_extract(dst, &page_was_mapped, &anon_vma);
>>> +	__migrate_folio_extract(dst, &page_was_mapped,
>>> +				&page_was_mlocked, &anon_vma);
>>
>> It is better to read out the flag, then check page_was_mapped and page_was_mlocked
>> to avoid future __migrate_folio_extract() interface churns.
> 
> IHMO, in contrast, it's better to use separate flags in
> __migrate_folio_record() too to avoid to pack flags in each call site.

Either way is okay for me. And avoiding to pack flags in each call site 
seems more reasonable to me.

> 
>>>   	prev = dst->lru.prev;
>>>   	list_del(&dst->lru);
>>>
>>> @@ -1287,7 +1305,7 @@ static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private,
>>>   	 * isolated from the unevictable LRU: but this case is the easiest.
>>>   	 */
>>>   	folio_add_lru(dst);
>>> -	if (page_was_mapped)
>>> +	if (page_was_mlocked)
>>>   		lru_add_drain();
>>
>> Like I said at the top, this would be if (page_was_mapped || page_was_mlocked).

I don't think so. Like I said above, we can drain lru list only if page 
was mlocked.

>>>   	if (page_was_mapped)
>>> @@ -1321,8 +1339,15 @@ static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private,
>>>   	 * right list unless we want to retry.
>>>   	 */
>>>   	if (rc == -EAGAIN) {
>>> +		unsigned int page_flags = 0;
>>> +
>>> +		if (page_was_mapped)
>>> +			page_flags |= PAGE_WAS_MAPPED;
>>> +		if (page_was_mlocked)
>>> +			page_flags |= PAGE_WAS_MLOCKED;
>>> +
>>>   		list_add(&dst->lru, prev);
>>> -		__migrate_folio_record(dst, page_was_mapped, anon_vma);
>>> +		__migrate_folio_record(dst, page_flags, anon_vma);
>>>   		return rc;
>>>   	}
>>>
>>> @@ -1799,10 +1824,11 @@ static int migrate_pages_batch(struct list_head *from,
>>>   	dst = list_first_entry(&dst_folios, struct folio, lru);
>>>   	dst2 = list_next_entry(dst, lru);
>>>   	list_for_each_entry_safe(folio, folio2, &unmap_folios, lru) {
>>> -		int page_was_mapped = 0;
>>> +		int page_was_mapped = 0, page_was_mlocked = 0;
>>>   		struct anon_vma *anon_vma = NULL;
>>>
>>> -		__migrate_folio_extract(dst, &page_was_mapped, &anon_vma);
>>> +		__migrate_folio_extract(dst, &page_was_mapped,
>>> +					&page_was_mlocked, &anon_vma);
>>>   		migrate_folio_undo_src(folio, page_was_mapped, anon_vma,
>>>   				       true, ret_folios);
>>>   		list_del(&dst->lru);
>>> -- 
>>> 2.39.3
> 
> --
> Best Regards,
> Huang, Ying


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] mm: migrate: record the mlocked page status to remove unnecessary lru drain
  2023-10-19  7:25     ` Baolin Wang
@ 2023-10-19  8:22       ` Yin Fengwei
  2023-10-19  8:51         ` Baolin Wang
  0 siblings, 1 reply; 18+ messages in thread
From: Yin Fengwei @ 2023-10-19  8:22 UTC (permalink / raw)
  To: Baolin Wang, Huang, Ying, Zi Yan
  Cc: akpm, mgorman, hughd, vbabka, linux-mm, linux-kernel

Hi Baolin,

On 10/19/23 15:25, Baolin Wang wrote:
> 
> 
> On 10/19/2023 2:09 PM, Huang, Ying wrote:
>> Zi Yan <ziy@nvidia.com> writes:
>>
>>> On 18 Oct 2023, at 9:04, Baolin Wang wrote:
>>>
>>>> When doing compaction, I found the lru_add_drain() is an obvious hotspot
>>>> when migrating pages. The distribution of this hotspot is as follows:
>>>>     - 18.75% compact_zone
>>>>        - 17.39% migrate_pages
>>>>           - 13.79% migrate_pages_batch
>>>>              - 11.66% migrate_folio_move
>>>>                 - 7.02% lru_add_drain
>>>>                    + 7.02% lru_add_drain_cpu
>>>>                 + 3.00% move_to_new_folio
>>>>                   1.23% rmap_walk
>>>>              + 1.92% migrate_folio_unmap
>>>>           + 3.20% migrate_pages_sync
>>>>        + 0.90% isolate_migratepages
>>>>
>>>> The lru_add_drain() was added by commit c3096e6782b7 ("mm/migrate:
>>>> __unmap_and_move() push good newpage to LRU") to drain the newpage to LRU
>>>> immediately, to help to build up the correct newpage->mlock_count in
>>>> remove_migration_ptes() for mlocked pages. However, if there are no mlocked
>>>> pages are migrating, then we can avoid this lru drain operation, especailly
>>>> for the heavy concurrent scenarios.
>>>
>>> lru_add_drain() is also used to drain pages out of folio_batch. Pages in folio_batch
>>> have an additional pin to prevent migration. See folio_get(folio); in folio_add_lru().
>>
>> lru_add_drain() is called after the page reference count checking in
>> move_to_new_folio().  So, I don't this is an issue.
> 
> Agree. The purpose of adding lru_add_drain() is to address the 'mlock_count' issue for mlocked pages. Please see commit c3096e6782b7 and related comments. Moreover I haven't seen an increase in the number of page migration failures due to page reference count checking after this patch.

I agree with your. My understanding also is that the lru_add_drain() is only needed
for mlocked folio to correct mlock_count. Like to hear the confirmation from Huge.


But I have question: why do we need use page_was_mlocked instead of check
folio_test_mlocked(src)? Does page migration clear the mlock flag? Thanks.


Regards
Yin, Fengwei


> 
>>>> So we can record the source pages' mlocked status in migrate_folio_unmap(),
>>>> and only drain the lru list when the mlocked status is set in migrate_folio_move().
>>>> In addition, the page was already isolated from lru when migrating, so we
>>>> check the mlocked status is stable by folio_test_mlocked() in migrate_folio_unmap().
>>>>
>>>> After this patch, I can see the hotpot of the lru_add_drain() is gone:
>>>>     - 9.41% migrate_pages_batch
>>>>        - 6.15% migrate_folio_move
>>>>           - 3.64% move_to_new_folio
>>>>              + 1.80% migrate_folio_extra
>>>>              + 1.70% buffer_migrate_folio
>>>>           + 1.41% rmap_walk
>>>>           + 0.62% folio_add_lru
>>>>        + 3.07% migrate_folio_unmap
>>>>
>>>> Meanwhile, the compaction latency shows some improvements when running
>>>> thpscale:
>>>>                              base                   patched
>>>> Amean     fault-both-1      1131.22 (   0.00%)     1112.55 *   1.65%*
>>>> Amean     fault-both-3      2489.75 (   0.00%)     2324.15 *   6.65%*
>>>> Amean     fault-both-5      3257.37 (   0.00%)     3183.18 *   2.28%*
>>>> Amean     fault-both-7      4257.99 (   0.00%)     4079.04 *   4.20%*
>>>> Amean     fault-both-12     6614.02 (   0.00%)     6075.60 *   8.14%*
>>>> Amean     fault-both-18    10607.78 (   0.00%)     8978.86 *  15.36%*
>>>> Amean     fault-both-24    14911.65 (   0.00%)    11619.55 *  22.08%*
>>>> Amean     fault-both-30    14954.67 (   0.00%)    14925.66 *   0.19%*
>>>> Amean     fault-both-32    16654.87 (   0.00%)    15580.31 *   6.45%*
>>>>
>>>> Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
>>>> ---
>>>>   mm/migrate.c | 50 ++++++++++++++++++++++++++++++++++++++------------
>>>>   1 file changed, 38 insertions(+), 12 deletions(-)
>>>>
>>>> diff --git a/mm/migrate.c b/mm/migrate.c
>>>> index 4caf405b6504..32c96f89710f 100644
>>>> --- a/mm/migrate.c
>>>> +++ b/mm/migrate.c
>>>> @@ -1027,22 +1027,32 @@ union migration_ptr {
>>>>       struct anon_vma *anon_vma;
>>>>       struct address_space *mapping;
>>>>   };
>>>> +
>>>> +enum {
>>>> +    PAGE_WAS_MAPPED = 1 << 0,
>>>> +    PAGE_WAS_MLOCKED = 1 << 1,
>>>> +};
>>>> +
>>>>   static void __migrate_folio_record(struct folio *dst,
>>>> -                   unsigned long page_was_mapped,
>>>> +                   unsigned long page_flags,
>>>>                      struct anon_vma *anon_vma)
>>>>   {
>>>>       union migration_ptr ptr = { .anon_vma = anon_vma };
>>>>       dst->mapping = ptr.mapping;
>>>> -    dst->private = (void *)page_was_mapped;
>>>> +    dst->private = (void *)page_flags;
>>>>   }
>>>>
>>>>   static void __migrate_folio_extract(struct folio *dst,
>>>>                      int *page_was_mappedp,
>>>> +                   int *page_was_mlocked,
>>>>                      struct anon_vma **anon_vmap)
>>>>   {
>>>>       union migration_ptr ptr = { .mapping = dst->mapping };
>>>> +    unsigned long page_flags = (unsigned long)dst->private;
>>>> +
>>>>       *anon_vmap = ptr.anon_vma;
>>>> -    *page_was_mappedp = (unsigned long)dst->private;
>>>> +    *page_was_mappedp = page_flags & PAGE_WAS_MAPPED ? 1 : 0;
>>>> +    *page_was_mlocked = page_flags & PAGE_WAS_MLOCKED ? 1 : 0;
>>>>       dst->mapping = NULL;
>>>>       dst->private = NULL;
>>>>   }
>>>> @@ -1103,7 +1113,7 @@ static int migrate_folio_unmap(new_folio_t get_new_folio,
>>>>   {
>>>>       struct folio *dst;
>>>>       int rc = -EAGAIN;
>>>> -    int page_was_mapped = 0;
>>>> +    int page_was_mapped = 0, page_was_mlocked = 0;
>>>>       struct anon_vma *anon_vma = NULL;
>>>>       bool is_lru = !__folio_test_movable(src);
>>>>       bool locked = false;
>>>> @@ -1157,6 +1167,7 @@ static int migrate_folio_unmap(new_folio_t get_new_folio,
>>>>           folio_lock(src);
>>>>       }
>>>>       locked = true;
>>>> +    page_was_mlocked = folio_test_mlocked(src);
>>>>
>>>>       if (folio_test_writeback(src)) {
>>>>           /*
>>>> @@ -1206,7 +1217,7 @@ static int migrate_folio_unmap(new_folio_t get_new_folio,
>>>>       dst_locked = true;
>>>>
>>>>       if (unlikely(!is_lru)) {
>>>> -        __migrate_folio_record(dst, page_was_mapped, anon_vma);
>>>> +        __migrate_folio_record(dst, 0, anon_vma);
>>>>           return MIGRATEPAGE_UNMAP;
>>>>       }
>>>>
>>>> @@ -1236,7 +1247,13 @@ static int migrate_folio_unmap(new_folio_t get_new_folio,
>>>>       }
>>>>
>>>>       if (!folio_mapped(src)) {
>>>> -        __migrate_folio_record(dst, page_was_mapped, anon_vma);
>>>> +        unsigned int page_flags = 0;
>>>> +
>>>> +        if (page_was_mapped)
>>>> +            page_flags |= PAGE_WAS_MAPPED;
>>>> +        if (page_was_mlocked)
>>>> +            page_flags |= PAGE_WAS_MLOCKED;
>>>> +        __migrate_folio_record(dst, page_flags, anon_vma);
>>>>           return MIGRATEPAGE_UNMAP;
>>>>       }
>>>>
>>>> @@ -1261,12 +1278,13 @@ static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private,
>>>>                     struct list_head *ret)
>>>>   {
>>>>       int rc;
>>>> -    int page_was_mapped = 0;
>>>> +    int page_was_mapped = 0, page_was_mlocked = 0;
>>>>       struct anon_vma *anon_vma = NULL;
>>>>       bool is_lru = !__folio_test_movable(src);
>>>>       struct list_head *prev;
>>>>
>>>> -    __migrate_folio_extract(dst, &page_was_mapped, &anon_vma);
>>>> +    __migrate_folio_extract(dst, &page_was_mapped,
>>>> +                &page_was_mlocked, &anon_vma);
>>>
>>> It is better to read out the flag, then check page_was_mapped and page_was_mlocked
>>> to avoid future __migrate_folio_extract() interface churns.
>>
>> IHMO, in contrast, it's better to use separate flags in
>> __migrate_folio_record() too to avoid to pack flags in each call site.
> 
> Either way is okay for me. And avoiding to pack flags in each call site seems more reasonable to me.
> 
>>
>>>>       prev = dst->lru.prev;
>>>>       list_del(&dst->lru);
>>>>
>>>> @@ -1287,7 +1305,7 @@ static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private,
>>>>        * isolated from the unevictable LRU: but this case is the easiest.
>>>>        */
>>>>       folio_add_lru(dst);
>>>> -    if (page_was_mapped)
>>>> +    if (page_was_mlocked)
>>>>           lru_add_drain();
>>>
>>> Like I said at the top, this would be if (page_was_mapped || page_was_mlocked).
> 
> I don't think so. Like I said above, we can drain lru list only if page was mlocked.
> 
>>>>       if (page_was_mapped)
>>>> @@ -1321,8 +1339,15 @@ static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private,
>>>>        * right list unless we want to retry.
>>>>        */
>>>>       if (rc == -EAGAIN) {
>>>> +        unsigned int page_flags = 0;
>>>> +
>>>> +        if (page_was_mapped)
>>>> +            page_flags |= PAGE_WAS_MAPPED;
>>>> +        if (page_was_mlocked)
>>>> +            page_flags |= PAGE_WAS_MLOCKED;
>>>> +
>>>>           list_add(&dst->lru, prev);
>>>> -        __migrate_folio_record(dst, page_was_mapped, anon_vma);
>>>> +        __migrate_folio_record(dst, page_flags, anon_vma);
>>>>           return rc;
>>>>       }
>>>>
>>>> @@ -1799,10 +1824,11 @@ static int migrate_pages_batch(struct list_head *from,
>>>>       dst = list_first_entry(&dst_folios, struct folio, lru);
>>>>       dst2 = list_next_entry(dst, lru);
>>>>       list_for_each_entry_safe(folio, folio2, &unmap_folios, lru) {
>>>> -        int page_was_mapped = 0;
>>>> +        int page_was_mapped = 0, page_was_mlocked = 0;
>>>>           struct anon_vma *anon_vma = NULL;
>>>>
>>>> -        __migrate_folio_extract(dst, &page_was_mapped, &anon_vma);
>>>> +        __migrate_folio_extract(dst, &page_was_mapped,
>>>> +                    &page_was_mlocked, &anon_vma);
>>>>           migrate_folio_undo_src(folio, page_was_mapped, anon_vma,
>>>>                          true, ret_folios);
>>>>           list_del(&dst->lru);
>>>> -- 
>>>> 2.39.3
>>
>> -- 
>> Best Regards,
>> Huang, Ying


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] mm: migrate: record the mlocked page status to remove unnecessary lru drain
  2023-10-19  8:22       ` Yin Fengwei
@ 2023-10-19  8:51         ` Baolin Wang
  2023-10-19 12:07           ` Yin, Fengwei
  0 siblings, 1 reply; 18+ messages in thread
From: Baolin Wang @ 2023-10-19  8:51 UTC (permalink / raw)
  To: Yin Fengwei, Huang, Ying, Zi Yan
  Cc: akpm, mgorman, hughd, vbabka, linux-mm, linux-kernel



On 10/19/2023 4:22 PM, Yin Fengwei wrote:
> Hi Baolin,
> 
> On 10/19/23 15:25, Baolin Wang wrote:
>>
>>
>> On 10/19/2023 2:09 PM, Huang, Ying wrote:
>>> Zi Yan <ziy@nvidia.com> writes:
>>>
>>>> On 18 Oct 2023, at 9:04, Baolin Wang wrote:
>>>>
>>>>> When doing compaction, I found the lru_add_drain() is an obvious hotspot
>>>>> when migrating pages. The distribution of this hotspot is as follows:
>>>>>      - 18.75% compact_zone
>>>>>         - 17.39% migrate_pages
>>>>>            - 13.79% migrate_pages_batch
>>>>>               - 11.66% migrate_folio_move
>>>>>                  - 7.02% lru_add_drain
>>>>>                     + 7.02% lru_add_drain_cpu
>>>>>                  + 3.00% move_to_new_folio
>>>>>                    1.23% rmap_walk
>>>>>               + 1.92% migrate_folio_unmap
>>>>>            + 3.20% migrate_pages_sync
>>>>>         + 0.90% isolate_migratepages
>>>>>
>>>>> The lru_add_drain() was added by commit c3096e6782b7 ("mm/migrate:
>>>>> __unmap_and_move() push good newpage to LRU") to drain the newpage to LRU
>>>>> immediately, to help to build up the correct newpage->mlock_count in
>>>>> remove_migration_ptes() for mlocked pages. However, if there are no mlocked
>>>>> pages are migrating, then we can avoid this lru drain operation, especailly
>>>>> for the heavy concurrent scenarios.
>>>>
>>>> lru_add_drain() is also used to drain pages out of folio_batch. Pages in folio_batch
>>>> have an additional pin to prevent migration. See folio_get(folio); in folio_add_lru().
>>>
>>> lru_add_drain() is called after the page reference count checking in
>>> move_to_new_folio().  So, I don't this is an issue.
>>
>> Agree. The purpose of adding lru_add_drain() is to address the 'mlock_count' issue for mlocked pages. Please see commit c3096e6782b7 and related comments. Moreover I haven't seen an increase in the number of page migration failures due to page reference count checking after this patch.
> 
> I agree with your. My understanding also is that the lru_add_drain() is only needed
> for mlocked folio to correct mlock_count. Like to hear the confirmation from Huge.
> 
> 
> But I have question: why do we need use page_was_mlocked instead of check
> folio_test_mlocked(src)? Does page migration clear the mlock flag? Thanks.

Yes, please see the call trace: try_to_migrate_one() ---> 
page_remove_rmap() ---> munlock_vma_folio().


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] mm: migrate: record the mlocked page status to remove unnecessary lru drain
  2023-10-19  8:51         ` Baolin Wang
@ 2023-10-19 12:07           ` Yin, Fengwei
  2023-10-20  2:09             ` Baolin Wang
  0 siblings, 1 reply; 18+ messages in thread
From: Yin, Fengwei @ 2023-10-19 12:07 UTC (permalink / raw)
  To: Baolin Wang, Huang, Ying, Zi Yan
  Cc: akpm, mgorman, hughd, vbabka, linux-mm, linux-kernel



On 10/19/2023 4:51 PM, Baolin Wang wrote:
> 
> 
> On 10/19/2023 4:22 PM, Yin Fengwei wrote:
>> Hi Baolin,
>>
>> On 10/19/23 15:25, Baolin Wang wrote:
>>>
>>>
>>> On 10/19/2023 2:09 PM, Huang, Ying wrote:
>>>> Zi Yan <ziy@nvidia.com> writes:
>>>>
>>>>> On 18 Oct 2023, at 9:04, Baolin Wang wrote:
>>>>>
>>>>>> When doing compaction, I found the lru_add_drain() is an obvious hotspot
>>>>>> when migrating pages. The distribution of this hotspot is as follows:
>>>>>>      - 18.75% compact_zone
>>>>>>         - 17.39% migrate_pages
>>>>>>            - 13.79% migrate_pages_batch
>>>>>>               - 11.66% migrate_folio_move
>>>>>>                  - 7.02% lru_add_drain
>>>>>>                     + 7.02% lru_add_drain_cpu
>>>>>>                  + 3.00% move_to_new_folio
>>>>>>                    1.23% rmap_walk
>>>>>>               + 1.92% migrate_folio_unmap
>>>>>>            + 3.20% migrate_pages_sync
>>>>>>         + 0.90% isolate_migratepages
>>>>>>
>>>>>> The lru_add_drain() was added by commit c3096e6782b7 ("mm/migrate:
>>>>>> __unmap_and_move() push good newpage to LRU") to drain the newpage to LRU
>>>>>> immediately, to help to build up the correct newpage->mlock_count in
>>>>>> remove_migration_ptes() for mlocked pages. However, if there are no mlocked
>>>>>> pages are migrating, then we can avoid this lru drain operation, especailly
>>>>>> for the heavy concurrent scenarios.
>>>>>
>>>>> lru_add_drain() is also used to drain pages out of folio_batch. Pages in folio_batch
>>>>> have an additional pin to prevent migration. See folio_get(folio); in folio_add_lru().
>>>>
>>>> lru_add_drain() is called after the page reference count checking in
>>>> move_to_new_folio().  So, I don't this is an issue.
>>>
>>> Agree. The purpose of adding lru_add_drain() is to address the 'mlock_count' issue for mlocked pages. Please see commit c3096e6782b7 and related comments. Moreover I haven't seen an increase in the number of page migration failures due to page reference count checking after this patch.
>>
>> I agree with your. My understanding also is that the lru_add_drain() is only needed
>> for mlocked folio to correct mlock_count. Like to hear the confirmation from Huge.
>>
>>
>> But I have question: why do we need use page_was_mlocked instead of check
>> folio_test_mlocked(src)? Does page migration clear the mlock flag? Thanks.
> 
> Yes, please see the call trace: try_to_migrate_one() ---> page_remove_rmap() ---> munlock_vma_folio().

Yes. This will clear mlock bit.

What about set dst folio mlocked if source is before try_to_migrate_one()? And
then check whether dst folio is mlocked after? And need clear mlocked if migration
fails. I suppose the change is minor. Just a thought. Thanks.


Regards
Yin, Fengwei


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] mm: migrate: record the mlocked page status to remove unnecessary lru drain
  2023-10-19  6:09   ` Huang, Ying
  2023-10-19  7:25     ` Baolin Wang
@ 2023-10-19 13:23     ` Zi Yan
  1 sibling, 0 replies; 18+ messages in thread
From: Zi Yan @ 2023-10-19 13:23 UTC (permalink / raw)
  To: "Huang, Ying"
  Cc: Baolin Wang, akpm, mgorman, hughd, vbabka, linux-mm,
	linux-kernel, Fengwei Yin

[-- Attachment #1: Type: text/plain, Size: 8858 bytes --]

On 19 Oct 2023, at 2:09, Huang, Ying wrote:

> Zi Yan <ziy@nvidia.com> writes:
>
>> On 18 Oct 2023, at 9:04, Baolin Wang wrote:
>>
>>> When doing compaction, I found the lru_add_drain() is an obvious hotspot
>>> when migrating pages. The distribution of this hotspot is as follows:
>>>    - 18.75% compact_zone
>>>       - 17.39% migrate_pages
>>>          - 13.79% migrate_pages_batch
>>>             - 11.66% migrate_folio_move
>>>                - 7.02% lru_add_drain
>>>                   + 7.02% lru_add_drain_cpu
>>>                + 3.00% move_to_new_folio
>>>                  1.23% rmap_walk
>>>             + 1.92% migrate_folio_unmap
>>>          + 3.20% migrate_pages_sync
>>>       + 0.90% isolate_migratepages
>>>
>>> The lru_add_drain() was added by commit c3096e6782b7 ("mm/migrate:
>>> __unmap_and_move() push good newpage to LRU") to drain the newpage to LRU
>>> immediately, to help to build up the correct newpage->mlock_count in
>>> remove_migration_ptes() for mlocked pages. However, if there are no mlocked
>>> pages are migrating, then we can avoid this lru drain operation, especailly
>>> for the heavy concurrent scenarios.
>>
>> lru_add_drain() is also used to drain pages out of folio_batch. Pages in folio_batch
>> have an additional pin to prevent migration. See folio_get(folio); in folio_add_lru().
>
> lru_add_drain() is called after the page reference count checking in
> move_to_new_folio().  So, I don't this is an issue.

You are right. I missed that. Thanks for pointing this out.

>
>>>
>>> So we can record the source pages' mlocked status in migrate_folio_unmap(),
>>> and only drain the lru list when the mlocked status is set in migrate_folio_move().
>>> In addition, the page was already isolated from lru when migrating, so we
>>> check the mlocked status is stable by folio_test_mlocked() in migrate_folio_unmap().
>>>
>>> After this patch, I can see the hotpot of the lru_add_drain() is gone:
>>>    - 9.41% migrate_pages_batch
>>>       - 6.15% migrate_folio_move
>>>          - 3.64% move_to_new_folio
>>>             + 1.80% migrate_folio_extra
>>>             + 1.70% buffer_migrate_folio
>>>          + 1.41% rmap_walk
>>>          + 0.62% folio_add_lru
>>>       + 3.07% migrate_folio_unmap
>>>
>>> Meanwhile, the compaction latency shows some improvements when running
>>> thpscale:
>>>                             base                   patched
>>> Amean     fault-both-1      1131.22 (   0.00%)     1112.55 *   1.65%*
>>> Amean     fault-both-3      2489.75 (   0.00%)     2324.15 *   6.65%*
>>> Amean     fault-both-5      3257.37 (   0.00%)     3183.18 *   2.28%*
>>> Amean     fault-both-7      4257.99 (   0.00%)     4079.04 *   4.20%*
>>> Amean     fault-both-12     6614.02 (   0.00%)     6075.60 *   8.14%*
>>> Amean     fault-both-18    10607.78 (   0.00%)     8978.86 *  15.36%*
>>> Amean     fault-both-24    14911.65 (   0.00%)    11619.55 *  22.08%*
>>> Amean     fault-both-30    14954.67 (   0.00%)    14925.66 *   0.19%*
>>> Amean     fault-both-32    16654.87 (   0.00%)    15580.31 *   6.45%*
>>>
>>> Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
>>> ---
>>>  mm/migrate.c | 50 ++++++++++++++++++++++++++++++++++++++------------
>>>  1 file changed, 38 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/mm/migrate.c b/mm/migrate.c
>>> index 4caf405b6504..32c96f89710f 100644
>>> --- a/mm/migrate.c
>>> +++ b/mm/migrate.c
>>> @@ -1027,22 +1027,32 @@ union migration_ptr {
>>>  	struct anon_vma *anon_vma;
>>>  	struct address_space *mapping;
>>>  };
>>> +
>>> +enum {
>>> +	PAGE_WAS_MAPPED = 1 << 0,
>>> +	PAGE_WAS_MLOCKED = 1 << 1,
>>> +};
>>> +
>>>  static void __migrate_folio_record(struct folio *dst,
>>> -				   unsigned long page_was_mapped,
>>> +				   unsigned long page_flags,
>>>  				   struct anon_vma *anon_vma)
>>>  {
>>>  	union migration_ptr ptr = { .anon_vma = anon_vma };
>>>  	dst->mapping = ptr.mapping;
>>> -	dst->private = (void *)page_was_mapped;
>>> +	dst->private = (void *)page_flags;
>>>  }
>>>
>>>  static void __migrate_folio_extract(struct folio *dst,
>>>  				   int *page_was_mappedp,
>>> +				   int *page_was_mlocked,
>>>  				   struct anon_vma **anon_vmap)
>>>  {
>>>  	union migration_ptr ptr = { .mapping = dst->mapping };
>>> +	unsigned long page_flags = (unsigned long)dst->private;
>>> +
>>>  	*anon_vmap = ptr.anon_vma;
>>> -	*page_was_mappedp = (unsigned long)dst->private;
>>> +	*page_was_mappedp = page_flags & PAGE_WAS_MAPPED ? 1 : 0;
>>> +	*page_was_mlocked = page_flags & PAGE_WAS_MLOCKED ? 1 : 0;
>>>  	dst->mapping = NULL;
>>>  	dst->private = NULL;
>>>  }
>>> @@ -1103,7 +1113,7 @@ static int migrate_folio_unmap(new_folio_t get_new_folio,
>>>  {
>>>  	struct folio *dst;
>>>  	int rc = -EAGAIN;
>>> -	int page_was_mapped = 0;
>>> +	int page_was_mapped = 0, page_was_mlocked = 0;
>>>  	struct anon_vma *anon_vma = NULL;
>>>  	bool is_lru = !__folio_test_movable(src);
>>>  	bool locked = false;
>>> @@ -1157,6 +1167,7 @@ static int migrate_folio_unmap(new_folio_t get_new_folio,
>>>  		folio_lock(src);
>>>  	}
>>>  	locked = true;
>>> +	page_was_mlocked = folio_test_mlocked(src);
>>>
>>>  	if (folio_test_writeback(src)) {
>>>  		/*
>>> @@ -1206,7 +1217,7 @@ static int migrate_folio_unmap(new_folio_t get_new_folio,
>>>  	dst_locked = true;
>>>
>>>  	if (unlikely(!is_lru)) {
>>> -		__migrate_folio_record(dst, page_was_mapped, anon_vma);
>>> +		__migrate_folio_record(dst, 0, anon_vma);
>>>  		return MIGRATEPAGE_UNMAP;
>>>  	}
>>>
>>> @@ -1236,7 +1247,13 @@ static int migrate_folio_unmap(new_folio_t get_new_folio,
>>>  	}
>>>
>>>  	if (!folio_mapped(src)) {
>>> -		__migrate_folio_record(dst, page_was_mapped, anon_vma);
>>> +		unsigned int page_flags = 0;
>>> +
>>> +		if (page_was_mapped)
>>> +			page_flags |= PAGE_WAS_MAPPED;
>>> +		if (page_was_mlocked)
>>> +			page_flags |= PAGE_WAS_MLOCKED;
>>> +		__migrate_folio_record(dst, page_flags, anon_vma);
>>>  		return MIGRATEPAGE_UNMAP;
>>>  	}
>>>
>>> @@ -1261,12 +1278,13 @@ static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private,
>>>  			      struct list_head *ret)
>>>  {
>>>  	int rc;
>>> -	int page_was_mapped = 0;
>>> +	int page_was_mapped = 0, page_was_mlocked = 0;
>>>  	struct anon_vma *anon_vma = NULL;
>>>  	bool is_lru = !__folio_test_movable(src);
>>>  	struct list_head *prev;
>>>
>>> -	__migrate_folio_extract(dst, &page_was_mapped, &anon_vma);
>>> +	__migrate_folio_extract(dst, &page_was_mapped,
>>> +				&page_was_mlocked, &anon_vma);
>>
>> It is better to read out the flag, then check page_was_mapped and page_was_mlocked
>> to avoid future __migrate_folio_extract() interface churns.
>
> IHMO, in contrast, it's better to use separate flags in
> __migrate_folio_record() too to avoid to pack flags in each call site.

I am OK with it as long as the parameters of these two are symmetric.

>
>>>  	prev = dst->lru.prev;
>>>  	list_del(&dst->lru);
>>>
>>> @@ -1287,7 +1305,7 @@ static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private,
>>>  	 * isolated from the unevictable LRU: but this case is the easiest.
>>>  	 */
>>>  	folio_add_lru(dst);
>>> -	if (page_was_mapped)
>>> +	if (page_was_mlocked)
>>>  		lru_add_drain();
>>
>> Like I said at the top, this would be if (page_was_mapped || page_was_mlocked).
>>
>>>
>>>  	if (page_was_mapped)
>>> @@ -1321,8 +1339,15 @@ static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private,
>>>  	 * right list unless we want to retry.
>>>  	 */
>>>  	if (rc == -EAGAIN) {
>>> +		unsigned int page_flags = 0;
>>> +
>>> +		if (page_was_mapped)
>>> +			page_flags |= PAGE_WAS_MAPPED;
>>> +		if (page_was_mlocked)
>>> +			page_flags |= PAGE_WAS_MLOCKED;
>>> +
>>>  		list_add(&dst->lru, prev);
>>> -		__migrate_folio_record(dst, page_was_mapped, anon_vma);
>>> +		__migrate_folio_record(dst, page_flags, anon_vma);
>>>  		return rc;
>>>  	}
>>>
>>> @@ -1799,10 +1824,11 @@ static int migrate_pages_batch(struct list_head *from,
>>>  	dst = list_first_entry(&dst_folios, struct folio, lru);
>>>  	dst2 = list_next_entry(dst, lru);
>>>  	list_for_each_entry_safe(folio, folio2, &unmap_folios, lru) {
>>> -		int page_was_mapped = 0;
>>> +		int page_was_mapped = 0, page_was_mlocked = 0;
>>>  		struct anon_vma *anon_vma = NULL;
>>>
>>> -		__migrate_folio_extract(dst, &page_was_mapped, &anon_vma);
>>> +		__migrate_folio_extract(dst, &page_was_mapped,
>>> +					&page_was_mlocked, &anon_vma);
>>>  		migrate_folio_undo_src(folio, page_was_mapped, anon_vma,
>>>  				       true, ret_folios);
>>>  		list_del(&dst->lru);
>>> -- 
>>> 2.39.3
>
> --
> Best Regards,
> Huang, Ying


--
Best Regards,
Yan, Zi

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 854 bytes --]

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] mm: migrate: record the mlocked page status to remove unnecessary lru drain
  2023-10-19 12:07           ` Yin, Fengwei
@ 2023-10-20  2:09             ` Baolin Wang
  2023-10-20  2:30               ` Yin, Fengwei
  0 siblings, 1 reply; 18+ messages in thread
From: Baolin Wang @ 2023-10-20  2:09 UTC (permalink / raw)
  To: Yin, Fengwei, Huang, Ying, Zi Yan
  Cc: akpm, mgorman, hughd, vbabka, linux-mm, linux-kernel



On 10/19/2023 8:07 PM, Yin, Fengwei wrote:
> 
> 
> On 10/19/2023 4:51 PM, Baolin Wang wrote:
>>
>>
>> On 10/19/2023 4:22 PM, Yin Fengwei wrote:
>>> Hi Baolin,
>>>
>>> On 10/19/23 15:25, Baolin Wang wrote:
>>>>
>>>>
>>>> On 10/19/2023 2:09 PM, Huang, Ying wrote:
>>>>> Zi Yan <ziy@nvidia.com> writes:
>>>>>
>>>>>> On 18 Oct 2023, at 9:04, Baolin Wang wrote:
>>>>>>
>>>>>>> When doing compaction, I found the lru_add_drain() is an obvious hotspot
>>>>>>> when migrating pages. The distribution of this hotspot is as follows:
>>>>>>>       - 18.75% compact_zone
>>>>>>>          - 17.39% migrate_pages
>>>>>>>             - 13.79% migrate_pages_batch
>>>>>>>                - 11.66% migrate_folio_move
>>>>>>>                   - 7.02% lru_add_drain
>>>>>>>                      + 7.02% lru_add_drain_cpu
>>>>>>>                   + 3.00% move_to_new_folio
>>>>>>>                     1.23% rmap_walk
>>>>>>>                + 1.92% migrate_folio_unmap
>>>>>>>             + 3.20% migrate_pages_sync
>>>>>>>          + 0.90% isolate_migratepages
>>>>>>>
>>>>>>> The lru_add_drain() was added by commit c3096e6782b7 ("mm/migrate:
>>>>>>> __unmap_and_move() push good newpage to LRU") to drain the newpage to LRU
>>>>>>> immediately, to help to build up the correct newpage->mlock_count in
>>>>>>> remove_migration_ptes() for mlocked pages. However, if there are no mlocked
>>>>>>> pages are migrating, then we can avoid this lru drain operation, especailly
>>>>>>> for the heavy concurrent scenarios.
>>>>>>
>>>>>> lru_add_drain() is also used to drain pages out of folio_batch. Pages in folio_batch
>>>>>> have an additional pin to prevent migration. See folio_get(folio); in folio_add_lru().
>>>>>
>>>>> lru_add_drain() is called after the page reference count checking in
>>>>> move_to_new_folio().  So, I don't this is an issue.
>>>>
>>>> Agree. The purpose of adding lru_add_drain() is to address the 'mlock_count' issue for mlocked pages. Please see commit c3096e6782b7 and related comments. Moreover I haven't seen an increase in the number of page migration failures due to page reference count checking after this patch.
>>>
>>> I agree with your. My understanding also is that the lru_add_drain() is only needed
>>> for mlocked folio to correct mlock_count. Like to hear the confirmation from Huge.
>>>
>>>
>>> But I have question: why do we need use page_was_mlocked instead of check
>>> folio_test_mlocked(src)? Does page migration clear the mlock flag? Thanks.
>>
>> Yes, please see the call trace: try_to_migrate_one() ---> page_remove_rmap() ---> munlock_vma_folio().
> 
> Yes. This will clear mlock bit.
> 
> What about set dst folio mlocked if source is before try_to_migrate_one()? And
> then check whether dst folio is mlocked after? And need clear mlocked if migration
> fails. I suppose the change is minor. Just a thought. Thanks.

IMO, this will break the mlock related statistics in mlock_folio() when 
the remove_migration_pte() rebuilds the mlock status and mlock count.

Another concern I can see is that, during the page migration, a 
concurrent munlock() can be called to clean the VM_LOCKED flags for the 
VMAs, so the remove_migration_pte() should not rebuild the mlock status 
and mlock count. But the dst folio's mlcoked status is still remained, 
which is wrong.

So your suggested apporach seems not easy, and I think my patch is 
simple with re-using existing __migrate_folio_record() and 
__migrate_folio_extract() :)


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] mm: migrate: record the mlocked page status to remove unnecessary lru drain
  2023-10-20  2:09             ` Baolin Wang
@ 2023-10-20  2:30               ` Yin, Fengwei
  2023-10-20  2:45                 ` Baolin Wang
  0 siblings, 1 reply; 18+ messages in thread
From: Yin, Fengwei @ 2023-10-20  2:30 UTC (permalink / raw)
  To: Baolin Wang, Huang, Ying, Zi Yan
  Cc: akpm, mgorman, hughd, vbabka, linux-mm, linux-kernel



On 10/20/2023 10:09 AM, Baolin Wang wrote:
> 
> 
> On 10/19/2023 8:07 PM, Yin, Fengwei wrote:
>>
>>
>> On 10/19/2023 4:51 PM, Baolin Wang wrote:
>>>
>>>
>>> On 10/19/2023 4:22 PM, Yin Fengwei wrote:
>>>> Hi Baolin,
>>>>
>>>> On 10/19/23 15:25, Baolin Wang wrote:
>>>>>
>>>>>
>>>>> On 10/19/2023 2:09 PM, Huang, Ying wrote:
>>>>>> Zi Yan <ziy@nvidia.com> writes:
>>>>>>
>>>>>>> On 18 Oct 2023, at 9:04, Baolin Wang wrote:
>>>>>>>
>>>>>>>> When doing compaction, I found the lru_add_drain() is an obvious hotspot
>>>>>>>> when migrating pages. The distribution of this hotspot is as follows:
>>>>>>>>       - 18.75% compact_zone
>>>>>>>>          - 17.39% migrate_pages
>>>>>>>>             - 13.79% migrate_pages_batch
>>>>>>>>                - 11.66% migrate_folio_move
>>>>>>>>                   - 7.02% lru_add_drain
>>>>>>>>                      + 7.02% lru_add_drain_cpu
>>>>>>>>                   + 3.00% move_to_new_folio
>>>>>>>>                     1.23% rmap_walk
>>>>>>>>                + 1.92% migrate_folio_unmap
>>>>>>>>             + 3.20% migrate_pages_sync
>>>>>>>>          + 0.90% isolate_migratepages
>>>>>>>>
>>>>>>>> The lru_add_drain() was added by commit c3096e6782b7 ("mm/migrate:
>>>>>>>> __unmap_and_move() push good newpage to LRU") to drain the newpage to LRU
>>>>>>>> immediately, to help to build up the correct newpage->mlock_count in
>>>>>>>> remove_migration_ptes() for mlocked pages. However, if there are no mlocked
>>>>>>>> pages are migrating, then we can avoid this lru drain operation, especailly
>>>>>>>> for the heavy concurrent scenarios.
>>>>>>>
>>>>>>> lru_add_drain() is also used to drain pages out of folio_batch. Pages in folio_batch
>>>>>>> have an additional pin to prevent migration. See folio_get(folio); in folio_add_lru().
>>>>>>
>>>>>> lru_add_drain() is called after the page reference count checking in
>>>>>> move_to_new_folio().  So, I don't this is an issue.
>>>>>
>>>>> Agree. The purpose of adding lru_add_drain() is to address the 'mlock_count' issue for mlocked pages. Please see commit c3096e6782b7 and related comments. Moreover I haven't seen an increase in the number of page migration failures due to page reference count checking after this patch.
>>>>
>>>> I agree with your. My understanding also is that the lru_add_drain() is only needed
>>>> for mlocked folio to correct mlock_count. Like to hear the confirmation from Huge.
>>>>
>>>>
>>>> But I have question: why do we need use page_was_mlocked instead of check
>>>> folio_test_mlocked(src)? Does page migration clear the mlock flag? Thanks.
>>>
>>> Yes, please see the call trace: try_to_migrate_one() ---> page_remove_rmap() ---> munlock_vma_folio().
>>
>> Yes. This will clear mlock bit.
>>
>> What about set dst folio mlocked if source is before try_to_migrate_one()? And
>> then check whether dst folio is mlocked after? And need clear mlocked if migration
>> fails. I suppose the change is minor. Just a thought. Thanks.
> 
> IMO, this will break the mlock related statistics in mlock_folio() when the remove_migration_pte() rebuilds the mlock status and mlock count.
> 
> Another concern I can see is that, during the page migration, a concurrent munlock() can be called to clean the VM_LOCKED flags for the VMAs, so the remove_migration_pte() should not rebuild the mlock status and mlock count. But the dst folio's mlcoked status is still remained, which is wrong.
> 
> So your suggested apporach seems not easy, and I think my patch is simple with re-using existing __migrate_folio_record() and __migrate_folio_extract() :)

Can these concerns be addressed by clear dst mlocked after lru_add_drain() but before
remove_migration_pte()?


Regards
Yin, Fengwei



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] mm: migrate: record the mlocked page status to remove unnecessary lru drain
  2023-10-20  2:30               ` Yin, Fengwei
@ 2023-10-20  2:45                 ` Baolin Wang
  2023-10-20  2:47                   ` Yin, Fengwei
  2023-10-20  2:54                   ` Yin, Fengwei
  0 siblings, 2 replies; 18+ messages in thread
From: Baolin Wang @ 2023-10-20  2:45 UTC (permalink / raw)
  To: Yin, Fengwei, Huang, Ying, Zi Yan
  Cc: akpm, mgorman, hughd, vbabka, linux-mm, linux-kernel



On 10/20/2023 10:30 AM, Yin, Fengwei wrote:
> 
> 
> On 10/20/2023 10:09 AM, Baolin Wang wrote:
>>
>>
>> On 10/19/2023 8:07 PM, Yin, Fengwei wrote:
>>>
>>>
>>> On 10/19/2023 4:51 PM, Baolin Wang wrote:
>>>>
>>>>
>>>> On 10/19/2023 4:22 PM, Yin Fengwei wrote:
>>>>> Hi Baolin,
>>>>>
>>>>> On 10/19/23 15:25, Baolin Wang wrote:
>>>>>>
>>>>>>
>>>>>> On 10/19/2023 2:09 PM, Huang, Ying wrote:
>>>>>>> Zi Yan <ziy@nvidia.com> writes:
>>>>>>>
>>>>>>>> On 18 Oct 2023, at 9:04, Baolin Wang wrote:
>>>>>>>>
>>>>>>>>> When doing compaction, I found the lru_add_drain() is an obvious hotspot
>>>>>>>>> when migrating pages. The distribution of this hotspot is as follows:
>>>>>>>>>        - 18.75% compact_zone
>>>>>>>>>           - 17.39% migrate_pages
>>>>>>>>>              - 13.79% migrate_pages_batch
>>>>>>>>>                 - 11.66% migrate_folio_move
>>>>>>>>>                    - 7.02% lru_add_drain
>>>>>>>>>                       + 7.02% lru_add_drain_cpu
>>>>>>>>>                    + 3.00% move_to_new_folio
>>>>>>>>>                      1.23% rmap_walk
>>>>>>>>>                 + 1.92% migrate_folio_unmap
>>>>>>>>>              + 3.20% migrate_pages_sync
>>>>>>>>>           + 0.90% isolate_migratepages
>>>>>>>>>
>>>>>>>>> The lru_add_drain() was added by commit c3096e6782b7 ("mm/migrate:
>>>>>>>>> __unmap_and_move() push good newpage to LRU") to drain the newpage to LRU
>>>>>>>>> immediately, to help to build up the correct newpage->mlock_count in
>>>>>>>>> remove_migration_ptes() for mlocked pages. However, if there are no mlocked
>>>>>>>>> pages are migrating, then we can avoid this lru drain operation, especailly
>>>>>>>>> for the heavy concurrent scenarios.
>>>>>>>>
>>>>>>>> lru_add_drain() is also used to drain pages out of folio_batch. Pages in folio_batch
>>>>>>>> have an additional pin to prevent migration. See folio_get(folio); in folio_add_lru().
>>>>>>>
>>>>>>> lru_add_drain() is called after the page reference count checking in
>>>>>>> move_to_new_folio().  So, I don't this is an issue.
>>>>>>
>>>>>> Agree. The purpose of adding lru_add_drain() is to address the 'mlock_count' issue for mlocked pages. Please see commit c3096e6782b7 and related comments. Moreover I haven't seen an increase in the number of page migration failures due to page reference count checking after this patch.
>>>>>
>>>>> I agree with your. My understanding also is that the lru_add_drain() is only needed
>>>>> for mlocked folio to correct mlock_count. Like to hear the confirmation from Huge.
>>>>>
>>>>>
>>>>> But I have question: why do we need use page_was_mlocked instead of check
>>>>> folio_test_mlocked(src)? Does page migration clear the mlock flag? Thanks.
>>>>
>>>> Yes, please see the call trace: try_to_migrate_one() ---> page_remove_rmap() ---> munlock_vma_folio().
>>>
>>> Yes. This will clear mlock bit.
>>>
>>> What about set dst folio mlocked if source is before try_to_migrate_one()? And
>>> then check whether dst folio is mlocked after? And need clear mlocked if migration
>>> fails. I suppose the change is minor. Just a thought. Thanks.
>>
>> IMO, this will break the mlock related statistics in mlock_folio() when the remove_migration_pte() rebuilds the mlock status and mlock count.
>>
>> Another concern I can see is that, during the page migration, a concurrent munlock() can be called to clean the VM_LOCKED flags for the VMAs, so the remove_migration_pte() should not rebuild the mlock status and mlock count. But the dst folio's mlcoked status is still remained, which is wrong.
>>
>> So your suggested apporach seems not easy, and I think my patch is simple with re-using existing __migrate_folio_record() and __migrate_folio_extract() :)
> 
> Can these concerns be addressed by clear dst mlocked after lru_add_drain() but before
> remove_migration_pte()?

IMHO, that seems too hacky to me. I still prefer to rely on the 
migration process of the mlcock pages.


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] mm: migrate: record the mlocked page status to remove unnecessary lru drain
  2023-10-20  2:45                 ` Baolin Wang
@ 2023-10-20  2:47                   ` Yin, Fengwei
  2023-10-20  2:54                   ` Yin, Fengwei
  1 sibling, 0 replies; 18+ messages in thread
From: Yin, Fengwei @ 2023-10-20  2:47 UTC (permalink / raw)
  To: Baolin Wang, Huang, Ying, Zi Yan
  Cc: akpm, mgorman, hughd, vbabka, linux-mm, linux-kernel



On 10/20/2023 10:45 AM, Baolin Wang wrote:
> 
> 
> On 10/20/2023 10:30 AM, Yin, Fengwei wrote:
>>
>>
>> On 10/20/2023 10:09 AM, Baolin Wang wrote:
>>>
>>>
>>> On 10/19/2023 8:07 PM, Yin, Fengwei wrote:
>>>>
>>>>
>>>> On 10/19/2023 4:51 PM, Baolin Wang wrote:
>>>>>
>>>>>
>>>>> On 10/19/2023 4:22 PM, Yin Fengwei wrote:
>>>>>> Hi Baolin,
>>>>>>
>>>>>> On 10/19/23 15:25, Baolin Wang wrote:
>>>>>>>
>>>>>>>
>>>>>>> On 10/19/2023 2:09 PM, Huang, Ying wrote:
>>>>>>>> Zi Yan <ziy@nvidia.com> writes:
>>>>>>>>
>>>>>>>>> On 18 Oct 2023, at 9:04, Baolin Wang wrote:
>>>>>>>>>
>>>>>>>>>> When doing compaction, I found the lru_add_drain() is an obvious hotspot
>>>>>>>>>> when migrating pages. The distribution of this hotspot is as follows:
>>>>>>>>>>        - 18.75% compact_zone
>>>>>>>>>>           - 17.39% migrate_pages
>>>>>>>>>>              - 13.79% migrate_pages_batch
>>>>>>>>>>                 - 11.66% migrate_folio_move
>>>>>>>>>>                    - 7.02% lru_add_drain
>>>>>>>>>>                       + 7.02% lru_add_drain_cpu
>>>>>>>>>>                    + 3.00% move_to_new_folio
>>>>>>>>>>                      1.23% rmap_walk
>>>>>>>>>>                 + 1.92% migrate_folio_unmap
>>>>>>>>>>              + 3.20% migrate_pages_sync
>>>>>>>>>>           + 0.90% isolate_migratepages
>>>>>>>>>>
>>>>>>>>>> The lru_add_drain() was added by commit c3096e6782b7 ("mm/migrate:
>>>>>>>>>> __unmap_and_move() push good newpage to LRU") to drain the newpage to LRU
>>>>>>>>>> immediately, to help to build up the correct newpage->mlock_count in
>>>>>>>>>> remove_migration_ptes() for mlocked pages. However, if there are no mlocked
>>>>>>>>>> pages are migrating, then we can avoid this lru drain operation, especailly
>>>>>>>>>> for the heavy concurrent scenarios.
>>>>>>>>>
>>>>>>>>> lru_add_drain() is also used to drain pages out of folio_batch. Pages in folio_batch
>>>>>>>>> have an additional pin to prevent migration. See folio_get(folio); in folio_add_lru().
>>>>>>>>
>>>>>>>> lru_add_drain() is called after the page reference count checking in
>>>>>>>> move_to_new_folio().  So, I don't this is an issue.
>>>>>>>
>>>>>>> Agree. The purpose of adding lru_add_drain() is to address the 'mlock_count' issue for mlocked pages. Please see commit c3096e6782b7 and related comments. Moreover I haven't seen an increase in the number of page migration failures due to page reference count checking after this patch.
>>>>>>
>>>>>> I agree with your. My understanding also is that the lru_add_drain() is only needed
>>>>>> for mlocked folio to correct mlock_count. Like to hear the confirmation from Huge.
>>>>>>
>>>>>>
>>>>>> But I have question: why do we need use page_was_mlocked instead of check
>>>>>> folio_test_mlocked(src)? Does page migration clear the mlock flag? Thanks.
>>>>>
>>>>> Yes, please see the call trace: try_to_migrate_one() ---> page_remove_rmap() ---> munlock_vma_folio().
>>>>
>>>> Yes. This will clear mlock bit.
>>>>
>>>> What about set dst folio mlocked if source is before try_to_migrate_one()? And
>>>> then check whether dst folio is mlocked after? And need clear mlocked if migration
>>>> fails. I suppose the change is minor. Just a thought. Thanks.
>>>
>>> IMO, this will break the mlock related statistics in mlock_folio() when the remove_migration_pte() rebuilds the mlock status and mlock count.
>>>
>>> Another concern I can see is that, during the page migration, a concurrent munlock() can be called to clean the VM_LOCKED flags for the VMAs, so the remove_migration_pte() should not rebuild the mlock status and mlock count. But the dst folio's mlcoked status is still remained, which is wrong.
>>>
>>> So your suggested apporach seems not easy, and I think my patch is simple with re-using existing __migrate_folio_record() and __migrate_folio_extract() :)
>>
>> Can these concerns be addressed by clear dst mlocked after lru_add_drain() but before
>> remove_migration_pte()?
> 
> IMHO, that seems too hacky to me. I still prefer to rely on the migration process of the mlcock pages.

Fair enough. Thanks.


Regards
Yin, Fengwei


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] mm: migrate: record the mlocked page status to remove unnecessary lru drain
  2023-10-20  2:45                 ` Baolin Wang
  2023-10-20  2:47                   ` Yin, Fengwei
@ 2023-10-20  2:54                   ` Yin, Fengwei
  2023-10-20  3:27                     ` Baolin Wang
  1 sibling, 1 reply; 18+ messages in thread
From: Yin, Fengwei @ 2023-10-20  2:54 UTC (permalink / raw)
  To: Baolin Wang, Huang, Ying, Zi Yan, Yosry Ahmed
  Cc: akpm, mgorman, hughd, vbabka, linux-mm, linux-kernel



On 10/20/2023 10:45 AM, Baolin Wang wrote:
> 
> 
> On 10/20/2023 10:30 AM, Yin, Fengwei wrote:
>>
>>
>> On 10/20/2023 10:09 AM, Baolin Wang wrote:
>>>
>>>
>>> On 10/19/2023 8:07 PM, Yin, Fengwei wrote:
>>>>
>>>>
>>>> On 10/19/2023 4:51 PM, Baolin Wang wrote:
>>>>>
>>>>>
>>>>> On 10/19/2023 4:22 PM, Yin Fengwei wrote:
>>>>>> Hi Baolin,
>>>>>>
>>>>>> On 10/19/23 15:25, Baolin Wang wrote:
>>>>>>>
>>>>>>>
>>>>>>> On 10/19/2023 2:09 PM, Huang, Ying wrote:
>>>>>>>> Zi Yan <ziy@nvidia.com> writes:
>>>>>>>>
>>>>>>>>> On 18 Oct 2023, at 9:04, Baolin Wang wrote:
>>>>>>>>>
>>>>>>>>>> When doing compaction, I found the lru_add_drain() is an obvious hotspot
>>>>>>>>>> when migrating pages. The distribution of this hotspot is as follows:
>>>>>>>>>>        - 18.75% compact_zone
>>>>>>>>>>           - 17.39% migrate_pages
>>>>>>>>>>              - 13.79% migrate_pages_batch
>>>>>>>>>>                 - 11.66% migrate_folio_move
>>>>>>>>>>                    - 7.02% lru_add_drain
>>>>>>>>>>                       + 7.02% lru_add_drain_cpu
>>>>>>>>>>                    + 3.00% move_to_new_folio
>>>>>>>>>>                      1.23% rmap_walk
>>>>>>>>>>                 + 1.92% migrate_folio_unmap
>>>>>>>>>>              + 3.20% migrate_pages_sync
>>>>>>>>>>           + 0.90% isolate_migratepages
>>>>>>>>>>
>>>>>>>>>> The lru_add_drain() was added by commit c3096e6782b7 ("mm/migrate:
>>>>>>>>>> __unmap_and_move() push good newpage to LRU") to drain the newpage to LRU
>>>>>>>>>> immediately, to help to build up the correct newpage->mlock_count in
>>>>>>>>>> remove_migration_ptes() for mlocked pages. However, if there are no mlocked
>>>>>>>>>> pages are migrating, then we can avoid this lru drain operation, especailly
>>>>>>>>>> for the heavy concurrent scenarios.
>>>>>>>>>
>>>>>>>>> lru_add_drain() is also used to drain pages out of folio_batch. Pages in folio_batch
>>>>>>>>> have an additional pin to prevent migration. See folio_get(folio); in folio_add_lru().
>>>>>>>>
>>>>>>>> lru_add_drain() is called after the page reference count checking in
>>>>>>>> move_to_new_folio().  So, I don't this is an issue.
>>>>>>>
>>>>>>> Agree. The purpose of adding lru_add_drain() is to address the 'mlock_count' issue for mlocked pages. Please see commit c3096e6782b7 and related comments. Moreover I haven't seen an increase in the number of page migration failures due to page reference count checking after this patch.
>>>>>>
>>>>>> I agree with your. My understanding also is that the lru_add_drain() is only needed
>>>>>> for mlocked folio to correct mlock_count. Like to hear the confirmation from Huge.
>>>>>>
>>>>>>
>>>>>> But I have question: why do we need use page_was_mlocked instead of check
>>>>>> folio_test_mlocked(src)? Does page migration clear the mlock flag? Thanks.
>>>>>
>>>>> Yes, please see the call trace: try_to_migrate_one() ---> page_remove_rmap() ---> munlock_vma_folio().
>>>>
>>>> Yes. This will clear mlock bit.
>>>>
>>>> What about set dst folio mlocked if source is before try_to_migrate_one()? And
>>>> then check whether dst folio is mlocked after? And need clear mlocked if migration
>>>> fails. I suppose the change is minor. Just a thought. Thanks.
>>>
>>> IMO, this will break the mlock related statistics in mlock_folio() when the remove_migration_pte() rebuilds the mlock status and mlock count.
>>>
>>> Another concern I can see is that, during the page migration, a concurrent munlock() can be called to clean the VM_LOCKED flags for the VMAs, so the remove_migration_pte() should not rebuild the mlock status and mlock count. But the dst folio's mlcoked status is still remained, which is wrong.
>>>
>>> So your suggested apporach seems not easy, and I think my patch is simple with re-using existing __migrate_folio_record() and __migrate_folio_extract() :)
>>
>> Can these concerns be addressed by clear dst mlocked after lru_add_drain() but before
>> remove_migration_pte()?
> 
> IMHO, that seems too hacky to me. I still prefer to rely on the migration process of the mlcock pages.

BTW, Yosry tried to address the overlap of field lru and mlock_count:
https://lore.kernel.org/lkml/20230618065719.1363271-1-yosryahmed@google.com/
But the lore doesn't group all the patches.


Regards
Yin, Fengwei


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] mm: migrate: record the mlocked page status to remove unnecessary lru drain
  2023-10-20  2:54                   ` Yin, Fengwei
@ 2023-10-20  3:27                     ` Baolin Wang
  2023-10-20  3:45                       ` Yosry Ahmed
  0 siblings, 1 reply; 18+ messages in thread
From: Baolin Wang @ 2023-10-20  3:27 UTC (permalink / raw)
  To: Yin, Fengwei, Huang, Ying, Zi Yan, Yosry Ahmed
  Cc: akpm, mgorman, hughd, vbabka, linux-mm, linux-kernel



On 10/20/2023 10:54 AM, Yin, Fengwei wrote:
> 
> 
> On 10/20/2023 10:45 AM, Baolin Wang wrote:
>>
>>
>> On 10/20/2023 10:30 AM, Yin, Fengwei wrote:
>>>
>>>
>>> On 10/20/2023 10:09 AM, Baolin Wang wrote:
>>>>
>>>>
>>>> On 10/19/2023 8:07 PM, Yin, Fengwei wrote:
>>>>>
>>>>>
>>>>> On 10/19/2023 4:51 PM, Baolin Wang wrote:
>>>>>>
>>>>>>
>>>>>> On 10/19/2023 4:22 PM, Yin Fengwei wrote:
>>>>>>> Hi Baolin,
>>>>>>>
>>>>>>> On 10/19/23 15:25, Baolin Wang wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>> On 10/19/2023 2:09 PM, Huang, Ying wrote:
>>>>>>>>> Zi Yan <ziy@nvidia.com> writes:
>>>>>>>>>
>>>>>>>>>> On 18 Oct 2023, at 9:04, Baolin Wang wrote:
>>>>>>>>>>
>>>>>>>>>>> When doing compaction, I found the lru_add_drain() is an obvious hotspot
>>>>>>>>>>> when migrating pages. The distribution of this hotspot is as follows:
>>>>>>>>>>>         - 18.75% compact_zone
>>>>>>>>>>>            - 17.39% migrate_pages
>>>>>>>>>>>               - 13.79% migrate_pages_batch
>>>>>>>>>>>                  - 11.66% migrate_folio_move
>>>>>>>>>>>                     - 7.02% lru_add_drain
>>>>>>>>>>>                        + 7.02% lru_add_drain_cpu
>>>>>>>>>>>                     + 3.00% move_to_new_folio
>>>>>>>>>>>                       1.23% rmap_walk
>>>>>>>>>>>                  + 1.92% migrate_folio_unmap
>>>>>>>>>>>               + 3.20% migrate_pages_sync
>>>>>>>>>>>            + 0.90% isolate_migratepages
>>>>>>>>>>>
>>>>>>>>>>> The lru_add_drain() was added by commit c3096e6782b7 ("mm/migrate:
>>>>>>>>>>> __unmap_and_move() push good newpage to LRU") to drain the newpage to LRU
>>>>>>>>>>> immediately, to help to build up the correct newpage->mlock_count in
>>>>>>>>>>> remove_migration_ptes() for mlocked pages. However, if there are no mlocked
>>>>>>>>>>> pages are migrating, then we can avoid this lru drain operation, especailly
>>>>>>>>>>> for the heavy concurrent scenarios.
>>>>>>>>>>
>>>>>>>>>> lru_add_drain() is also used to drain pages out of folio_batch. Pages in folio_batch
>>>>>>>>>> have an additional pin to prevent migration. See folio_get(folio); in folio_add_lru().
>>>>>>>>>
>>>>>>>>> lru_add_drain() is called after the page reference count checking in
>>>>>>>>> move_to_new_folio().  So, I don't this is an issue.
>>>>>>>>
>>>>>>>> Agree. The purpose of adding lru_add_drain() is to address the 'mlock_count' issue for mlocked pages. Please see commit c3096e6782b7 and related comments. Moreover I haven't seen an increase in the number of page migration failures due to page reference count checking after this patch.
>>>>>>>
>>>>>>> I agree with your. My understanding also is that the lru_add_drain() is only needed
>>>>>>> for mlocked folio to correct mlock_count. Like to hear the confirmation from Huge.
>>>>>>>
>>>>>>>
>>>>>>> But I have question: why do we need use page_was_mlocked instead of check
>>>>>>> folio_test_mlocked(src)? Does page migration clear the mlock flag? Thanks.
>>>>>>
>>>>>> Yes, please see the call trace: try_to_migrate_one() ---> page_remove_rmap() ---> munlock_vma_folio().
>>>>>
>>>>> Yes. This will clear mlock bit.
>>>>>
>>>>> What about set dst folio mlocked if source is before try_to_migrate_one()? And
>>>>> then check whether dst folio is mlocked after? And need clear mlocked if migration
>>>>> fails. I suppose the change is minor. Just a thought. Thanks.
>>>>
>>>> IMO, this will break the mlock related statistics in mlock_folio() when the remove_migration_pte() rebuilds the mlock status and mlock count.
>>>>
>>>> Another concern I can see is that, during the page migration, a concurrent munlock() can be called to clean the VM_LOCKED flags for the VMAs, so the remove_migration_pte() should not rebuild the mlock status and mlock count. But the dst folio's mlcoked status is still remained, which is wrong.
>>>>
>>>> So your suggested apporach seems not easy, and I think my patch is simple with re-using existing __migrate_folio_record() and __migrate_folio_extract() :)
>>>
>>> Can these concerns be addressed by clear dst mlocked after lru_add_drain() but before
>>> remove_migration_pte()?
>>
>> IMHO, that seems too hacky to me. I still prefer to rely on the migration process of the mlcock pages.
> 
> BTW, Yosry tried to address the overlap of field lru and mlock_count:
> https://lore.kernel.org/lkml/20230618065719.1363271-1-yosryahmed@google.com/
> But the lore doesn't group all the patches.

Thanks for the information. I'd like to review and test if this work can 
continue.


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] mm: migrate: record the mlocked page status to remove unnecessary lru drain
  2023-10-20  3:27                     ` Baolin Wang
@ 2023-10-20  3:45                       ` Yosry Ahmed
  2023-10-20  3:52                         ` Yin, Fengwei
  0 siblings, 1 reply; 18+ messages in thread
From: Yosry Ahmed @ 2023-10-20  3:45 UTC (permalink / raw)
  To: Baolin Wang
  Cc: Yin, Fengwei, Huang, Ying, Zi Yan, akpm, mgorman, hughd, vbabka,
	linux-mm, linux-kernel

> >>
> >> IMHO, that seems too hacky to me. I still prefer to rely on the migration process of the mlcock pages.
> >
> > BTW, Yosry tried to address the overlap of field lru and mlock_count:
> > https://lore.kernel.org/lkml/20230618065719.1363271-1-yosryahmed@google.com/
> > But the lore doesn't group all the patches.
>
> Thanks for the information. I'd like to review and test if this work can
> continue.

The motivation for this work was reviving the unevictable LRU for the
memcg recharging RFC series [1]. However, that series was heavily
criticized. I was not intending on following up on it.

If reworking the mlock_count is beneficial for other reasons, I am
happy to respin it if the work needed to make it mergeable is minimal.
Otherwise, I don't think I have the time to revisit (but feel free to
pick up the patches if you'd like).

[1]https://lore.kernel.org/lkml/20230720070825.992023-1-yosryahmed@google.com/


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] mm: migrate: record the mlocked page status to remove unnecessary lru drain
  2023-10-20  3:45                       ` Yosry Ahmed
@ 2023-10-20  3:52                         ` Yin, Fengwei
  2023-10-20  4:02                           ` Yosry Ahmed
  0 siblings, 1 reply; 18+ messages in thread
From: Yin, Fengwei @ 2023-10-20  3:52 UTC (permalink / raw)
  To: Yosry Ahmed, Baolin Wang
  Cc: Huang, Ying, Zi Yan, akpm, mgorman, hughd, vbabka, linux-mm,
	linux-kernel



On 10/20/2023 11:45 AM, Yosry Ahmed wrote:
>>>>
>>>> IMHO, that seems too hacky to me. I still prefer to rely on the migration process of the mlcock pages.
>>>
>>> BTW, Yosry tried to address the overlap of field lru and mlock_count:
>>> https://lore.kernel.org/lkml/20230618065719.1363271-1-yosryahmed@google.com/
>>> But the lore doesn't group all the patches.
>>
>> Thanks for the information. I'd like to review and test if this work can
>> continue.
> 
> The motivation for this work was reviving the unevictable LRU for the
> memcg recharging RFC series [1]. However, that series was heavily
> criticized. I was not intending on following up on it.
> 
> If reworking the mlock_count is beneficial for other reasons, I am
> happy to respin it if the work needed to make it mergeable is minimal.
> Otherwise, I don't think I have the time to revisit (but feel free to
> pick up the patches if you'd like).
> 
> [1]https://lore.kernel.org/lkml/20230720070825.992023-1-yosryahmed@google.com/

I believe reworking the mlock_count is focus here. If there is no overlap
between lru and mlock_count, the whole logic of lru_add_drain() can be
removed here.

And I noticed the link:
https://lore.kernel.org/lkml/20230618065719.1363271-1-yosryahmed@google.com/
only has cover letter and the patches didn't grouped.


Regards
Yin, Fengwei



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] mm: migrate: record the mlocked page status to remove unnecessary lru drain
  2023-10-20  3:52                         ` Yin, Fengwei
@ 2023-10-20  4:02                           ` Yosry Ahmed
  2023-10-20  4:04                             ` Yin, Fengwei
  0 siblings, 1 reply; 18+ messages in thread
From: Yosry Ahmed @ 2023-10-20  4:02 UTC (permalink / raw)
  To: Yin, Fengwei
  Cc: Baolin Wang, Huang, Ying, Zi Yan, akpm, mgorman, hughd, vbabka,
	linux-mm, linux-kernel

On Thu, Oct 19, 2023 at 8:52 PM Yin, Fengwei <fengwei.yin@intel.com> wrote:
>
>
>
> On 10/20/2023 11:45 AM, Yosry Ahmed wrote:
> >>>>
> >>>> IMHO, that seems too hacky to me. I still prefer to rely on the migration process of the mlcock pages.
> >>>
> >>> BTW, Yosry tried to address the overlap of field lru and mlock_count:
> >>> https://lore.kernel.org/lkml/20230618065719.1363271-1-yosryahmed@google.com/
> >>> But the lore doesn't group all the patches.
> >>
> >> Thanks for the information. I'd like to review and test if this work can
> >> continue.
> >
> > The motivation for this work was reviving the unevictable LRU for the
> > memcg recharging RFC series [1]. However, that series was heavily
> > criticized. I was not intending on following up on it.
> >
> > If reworking the mlock_count is beneficial for other reasons, I am
> > happy to respin it if the work needed to make it mergeable is minimal.
> > Otherwise, I don't think I have the time to revisit (but feel free to
> > pick up the patches if you'd like).
> >
> > [1]https://lore.kernel.org/lkml/20230720070825.992023-1-yosryahmed@google.com/
>
> I believe reworking the mlock_count is focus here. If there is no overlap
> between lru and mlock_count, the whole logic of lru_add_drain() can be
> removed here.

All patches except patch 4 are for reworking the mlock_count. Once the
mlock count is reworked, reviving the unevictable LRU is actually very
simple and removes more code than it adds (see patch 4 below).

>
> And I noticed the link:
> https://lore.kernel.org/lkml/20230618065719.1363271-1-yosryahmed@google.com/
> only has cover letter and the patches didn't grouped.

That's weird, here are the patches (in order):
https://lore.kernel.org/lkml/20230618065744.1363948-1-yosryahmed@google.com/
https://lore.kernel.org/lkml/20230618065756.1364399-1-yosryahmed@google.com/
https://lore.kernel.org/lkml/20230618065809.1364900-1-yosryahmed@google.com/
https://lore.kernel.org/lkml/20230618065816.1365301-1-yosryahmed@google.com/
https://lore.kernel.org/lkml/20230618065824.1365750-1-yosryahmed@google.com/

>
>
> Regards
> Yin, Fengwei
>


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] mm: migrate: record the mlocked page status to remove unnecessary lru drain
  2023-10-20  4:02                           ` Yosry Ahmed
@ 2023-10-20  4:04                             ` Yin, Fengwei
  0 siblings, 0 replies; 18+ messages in thread
From: Yin, Fengwei @ 2023-10-20  4:04 UTC (permalink / raw)
  To: Yosry Ahmed
  Cc: Baolin Wang, Huang, Ying, Zi Yan, akpm, mgorman, hughd, vbabka,
	linux-mm, linux-kernel



On 10/20/2023 12:02 PM, Yosry Ahmed wrote:
> On Thu, Oct 19, 2023 at 8:52 PM Yin, Fengwei <fengwei.yin@intel.com> wrote:
>>
>>
>>
>> On 10/20/2023 11:45 AM, Yosry Ahmed wrote:
>>>>>>
>>>>>> IMHO, that seems too hacky to me. I still prefer to rely on the migration process of the mlcock pages.
>>>>>
>>>>> BTW, Yosry tried to address the overlap of field lru and mlock_count:
>>>>> https://lore.kernel.org/lkml/20230618065719.1363271-1-yosryahmed@google.com/
>>>>> But the lore doesn't group all the patches.
>>>>
>>>> Thanks for the information. I'd like to review and test if this work can
>>>> continue.
>>>
>>> The motivation for this work was reviving the unevictable LRU for the
>>> memcg recharging RFC series [1]. However, that series was heavily
>>> criticized. I was not intending on following up on it.
>>>
>>> If reworking the mlock_count is beneficial for other reasons, I am
>>> happy to respin it if the work needed to make it mergeable is minimal.
>>> Otherwise, I don't think I have the time to revisit (but feel free to
>>> pick up the patches if you'd like).
>>>
>>> [1]https://lore.kernel.org/lkml/20230720070825.992023-1-yosryahmed@google.com/
>>
>> I believe reworking the mlock_count is focus here. If there is no overlap
>> between lru and mlock_count, the whole logic of lru_add_drain() can be
>> removed here.
> 
> All patches except patch 4 are for reworking the mlock_count. Once the
> mlock count is reworked, reviving the unevictable LRU is actually very
> simple and removes more code than it adds (see patch 4 below).
> 
>>
>> And I noticed the link:
>> https://lore.kernel.org/lkml/20230618065719.1363271-1-yosryahmed@google.com/
>> only has cover letter and the patches didn't grouped.
> 
> That's weird, here are the patches (in order):
> https://lore.kernel.org/lkml/20230618065744.1363948-1-yosryahmed@google.com/
> https://lore.kernel.org/lkml/20230618065756.1364399-1-yosryahmed@google.com/
> https://lore.kernel.org/lkml/20230618065809.1364900-1-yosryahmed@google.com/
> https://lore.kernel.org/lkml/20230618065816.1365301-1-yosryahmed@google.com/
> https://lore.kernel.org/lkml/20230618065824.1365750-1-yosryahmed@google.com/
Thanks a lot.

Regards
Yin, Fengwei

> 
>>
>>
>> Regards
>> Yin, Fengwei
>>


^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2023-10-20  4:04 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-10-18 13:04 [PATCH] mm: migrate: record the mlocked page status to remove unnecessary lru drain Baolin Wang
2023-10-18 14:00 ` Zi Yan
2023-10-19  6:09   ` Huang, Ying
2023-10-19  7:25     ` Baolin Wang
2023-10-19  8:22       ` Yin Fengwei
2023-10-19  8:51         ` Baolin Wang
2023-10-19 12:07           ` Yin, Fengwei
2023-10-20  2:09             ` Baolin Wang
2023-10-20  2:30               ` Yin, Fengwei
2023-10-20  2:45                 ` Baolin Wang
2023-10-20  2:47                   ` Yin, Fengwei
2023-10-20  2:54                   ` Yin, Fengwei
2023-10-20  3:27                     ` Baolin Wang
2023-10-20  3:45                       ` Yosry Ahmed
2023-10-20  3:52                         ` Yin, Fengwei
2023-10-20  4:02                           ` Yosry Ahmed
2023-10-20  4:04                             ` Yin, Fengwei
2023-10-19 13:23     ` Zi Yan

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox