linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Mina Almasry <almasrymina@google.com>
To: Byungchul Park <byungchul@sk.com>
Cc: willy@infradead.org, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org,  linux-mm@kvack.org,
	kernel_team@skhynix.com, kuba@kernel.org,
	 ilias.apalodimas@linaro.org, harry.yoo@oracle.com,
	hawk@kernel.org,  akpm@linux-foundation.org, ast@kernel.org,
	daniel@iogearbox.net,  davem@davemloft.net,
	john.fastabend@gmail.com, andrew+netdev@lunn.ch,
	 edumazet@google.com, pabeni@redhat.com, vishal.moola@gmail.com
Subject: Re: [RFC 19/19] mm, netmem: remove the page pool members in struct page
Date: Fri, 9 May 2025 10:32:08 -0700	[thread overview]
Message-ID: <CAHS8izMoS4wwmc363TFJU_XCtOX9vOv5ZQwD_k2oHx40D8hAPA@mail.gmail.com> (raw)
In-Reply-To: <20250509115126.63190-20-byungchul@sk.com>

On Fri, May 9, 2025 at 4:51 AM Byungchul Park <byungchul@sk.com> wrote:
>
> Now that all the users of the page pool members in struct page have been
> gone, the members can be removed from struct page.  However, the space
> in struct page needs to be kept using a place holder with the same size,
> until struct netmem_desc has its own instance, not overlayed onto struct
> page, to avoid conficting with other members within struct page.
>
> Remove the page pool members in struct page and replace with a place
> holder.  The place holder should be removed once struct netmem_desc has
> its own instance.
>
> Signed-off-by: Byungchul Park <byungchul@sk.com>
> ---
>  include/linux/mm_types.h  | 13 ++-----------
>  include/net/netmem.h      | 35 +----------------------------------
>  include/net/netmem_type.h | 22 ++++++++++++++++++++++
>  3 files changed, 25 insertions(+), 45 deletions(-)
>  create mode 100644 include/net/netmem_type.h
>
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index e76bade9ebb12..69904a0855358 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -20,6 +20,7 @@
>  #include <linux/seqlock.h>
>  #include <linux/percpu_counter.h>
>  #include <linux/types.h>
> +#include <net/netmem_type.h> /* for page pool */
>
>  #include <asm/mmu.h>
>
> @@ -118,17 +119,7 @@ struct page {
>                          */
>                         unsigned long private;
>                 };
> -               struct {        /* page_pool used by netstack */
> -                       /**
> -                        * @pp_magic: magic value to avoid recycling non
> -                        * page_pool allocated pages.
> -                        */
> -                       unsigned long pp_magic;
> -                       struct page_pool *pp;
> -                       unsigned long _pp_mapping_pad;
> -                       unsigned long dma_addr;
> -                       atomic_long_t pp_ref_count;
> -               };
> +               struct __netmem_desc place_holder_1; /* for page pool */
>                 struct {        /* Tail pages of compound page */
>                         unsigned long compound_head;    /* Bit zero is set */
>                 };
> diff --git a/include/net/netmem.h b/include/net/netmem.h
> index 00064e766b889..c414de6c6ab0d 100644
> --- a/include/net/netmem.h
> +++ b/include/net/netmem.h
> @@ -10,6 +10,7 @@
>
>  #include <linux/mm.h>
>  #include <net/net_debug.h>
> +#include <net/netmem_type.h>
>
>  /* net_iov */
>
> @@ -20,15 +21,6 @@ DECLARE_STATIC_KEY_FALSE(page_pool_mem_providers);
>   */
>  #define NET_IOV 0x01UL
>
> -struct netmem_desc {
> -       unsigned long __unused_padding;
> -       unsigned long pp_magic;
> -       struct page_pool *pp;
> -       struct net_iov_area *owner;
> -       unsigned long dma_addr;
> -       atomic_long_t pp_ref_count;
> -};
> -
>  struct net_iov_area {
>         /* Array of net_iovs for this area. */
>         struct netmem_desc *niovs;
> @@ -38,31 +30,6 @@ struct net_iov_area {
>         unsigned long base_virtual;
>  };
>
> -/* These fields in struct page are used by the page_pool and net stack:
> - *
> - *        struct {
> - *                unsigned long pp_magic;
> - *                struct page_pool *pp;
> - *                unsigned long _pp_mapping_pad;
> - *                unsigned long dma_addr;
> - *                atomic_long_t pp_ref_count;
> - *        };
> - *
> - * We mirror the page_pool fields here so the page_pool can access these fields
> - * without worrying whether the underlying fields belong to a page or net_iov.
> - *
> - * The non-net stack fields of struct page are private to the mm stack and must
> - * never be mirrored to net_iov.
> - */
> -#define NET_IOV_ASSERT_OFFSET(pg, iov)             \
> -       static_assert(offsetof(struct page, pg) == \
> -                     offsetof(struct netmem_desc, iov))
> -NET_IOV_ASSERT_OFFSET(pp_magic, pp_magic);
> -NET_IOV_ASSERT_OFFSET(pp, pp);
> -NET_IOV_ASSERT_OFFSET(dma_addr, dma_addr);
> -NET_IOV_ASSERT_OFFSET(pp_ref_count, pp_ref_count);
> -#undef NET_IOV_ASSERT_OFFSET
> -
>  static inline struct net_iov_area *net_iov_owner(const struct netmem_desc *niov)
>  {
>         return niov->owner;
> diff --git a/include/net/netmem_type.h b/include/net/netmem_type.h
> new file mode 100644
> index 0000000000000..6a3ac8e908515
> --- /dev/null
> +++ b/include/net/netmem_type.h
> @@ -0,0 +1,22 @@
> +/* SPDX-License-Identifier: GPL-2.0
> + *
> + *     Author: Byungchul Park <max.byungchul.park@gmail.com>
> + */
> +
> +#ifndef _NET_NETMEM_TYPE_H
> +#define _NET_NETMEM_TYPE_H
> +
> +#include <linux/stddef.h>
> +
> +struct netmem_desc {
> +       unsigned long __unused_padding;
> +       struct_group_tagged(__netmem_desc, actual_data,
> +               unsigned long pp_magic;
> +               struct page_pool *pp;
> +               struct net_iov_area *owner;
> +               unsigned long dma_addr;
> +               atomic_long_t pp_ref_count;
> +       );
> +};
> +
> +#endif /* _NET_NETMEM_TYPE_H */
> --
> 2.17.1
>

Currently the only restriction on net_iov is that some of its fields
need to be cache aligned with some of the fields of struct page, but
there is no restriction on new fields added to net_iov. We already
have fields in net_iov that have nothing to do with struct page and
shouldn't be part of struct page. Like net_iov_area *owner. I don't
think net_iov_area should be part of struct page and I don't think we
should add restrictions of net_iov.

What I would suggest here is, roughly:

1. Add a new struct:

               struct netmem_desc {
                       unsigned long pp_magic;
                       struct page_pool *pp;
                       unsigned long _pp_mapping_pad;
                       unsigned long dma_addr;
                       atomic_long_t pp_ref_count;
               };

2. Then update struct page to include this entry instead of the definitions:

struct page {
...
               struct netmem_desc place_holder_1; /* for page pool */
...
}

3. And update struct net_iov to also include netmem_desc:

struct net_iov {
    struct netmem_desc desc;
    struct net_iov_area *owner;
    /* More net_iov specific fields in the future */
};

And drop patch 1 which does a rename.

Essentially netmem_desc can be an encapsulation of the shared fields
between struct page and struct net_iov.

-- 
Thanks,
Mina


  reply	other threads:[~2025-05-09 17:32 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-05-09 11:51 [RFC 00/19] Split netmem from " Byungchul Park
2025-05-09 11:51 ` [RFC 01/19] netmem: rename struct net_iov to struct netmem_desc Byungchul Park
2025-05-12 13:11   ` Pavel Begunkov
2025-05-12 13:29     ` Byungchul Park
2025-05-12 19:14       ` Mina Almasry
2025-05-13  2:00         ` Byungchul Park
2025-05-13 12:58           ` Pavel Begunkov
2025-05-13 12:49       ` Pavel Begunkov
2025-05-14  0:07         ` Byungchul Park
2025-05-09 11:51 ` [RFC 02/19] netmem: introduce netmem alloc/put API to wrap page alloc/put API Byungchul Park
2025-05-09 13:39   ` Mina Almasry
2025-05-09 14:08     ` Mina Almasry
2025-05-12 12:30       ` Byungchul Park
2025-05-09 11:51 ` [RFC 03/19] page_pool: use netmem alloc/put API in __page_pool_alloc_page_order() Byungchul Park
2025-05-09 11:51 ` [RFC 04/19] page_pool: rename __page_pool_alloc_page_order() to __page_pool_alloc_large_netmem() Byungchul Park
2025-05-09 11:51 ` [RFC 05/19] page_pool: use netmem alloc/put API in __page_pool_alloc_pages_slow() Byungchul Park
2025-05-09 11:51 ` [RFC 06/19] page_pool: rename page_pool_return_page() to page_pool_return_netmem() Byungchul Park
2025-05-09 11:51 ` [RFC 07/19] page_pool: use netmem alloc/put API in page_pool_return_netmem() Byungchul Park
2025-05-09 11:51 ` [RFC 08/19] page_pool: rename __page_pool_release_page_dma() to __page_pool_release_netmem_dma() Byungchul Park
2025-05-09 11:51 ` [RFC 09/19] page_pool: rename __page_pool_put_page() to __page_pool_put_netmem() Byungchul Park
2025-05-09 11:51 ` [RFC 10/19] page_pool: rename __page_pool_alloc_pages_slow() to __page_pool_alloc_netmems_slow() Byungchul Park
2025-05-09 11:51 ` [RFC 11/19] mlx4: use netmem descriptor and API for page pool Byungchul Park
2025-05-09 11:51 ` [RFC 12/19] netmem: introduce page_pool_recycle_direct_netmem() Byungchul Park
2025-05-09 11:51 ` [RFC 13/19] page_pool: expand scope of is_pp_{netmem,page}() to global Byungchul Park
2025-05-12 12:46   ` Toke Høiland-Jørgensen
2025-05-12 12:55     ` Byungchul Park
2025-05-14  3:00     ` Byungchul Park
2025-05-14 11:17       ` Toke Høiland-Jørgensen
2025-05-09 11:51 ` [RFC 14/19] mm: page_alloc: do not directly access page->pp_magic but use is_pp_page() Byungchul Park
2025-05-09 11:51 ` [RFC 15/19] mlx5: use netmem descriptor and API for page pool Byungchul Park
2025-05-09 11:51 ` [RFC 16/19] netmem: use _Generic to cover const casting for page_to_netmem() Byungchul Park
2025-05-09 11:51 ` [RFC 17/19] netmem: remove __netmem_get_pp() Byungchul Park
2025-05-09 13:47   ` Mina Almasry
2025-05-09 11:51 ` [RFC 18/19] page_pool: make page_pool_get_dma_addr() just wrap page_pool_get_dma_addr_netmem() Byungchul Park
2025-05-09 13:49   ` Mina Almasry
2025-05-10  7:28   ` Ilias Apalodimas
2025-05-09 11:51 ` [RFC 19/19] mm, netmem: remove the page pool members in struct page Byungchul Park
2025-05-09 17:32   ` Mina Almasry [this message]
2025-05-09 18:11     ` Matthew Wilcox
2025-05-09 19:04       ` Mina Almasry
2025-05-09 19:48         ` Matthew Wilcox
2025-05-12 19:10           ` Mina Almasry
2025-05-09 18:02   ` Matthew Wilcox
2025-05-12 12:51     ` Byungchul Park
2025-05-12 14:42       ` Matthew Wilcox
2025-05-13  1:42         ` Byungchul Park
2025-05-13  3:19           ` Matthew Wilcox
2025-05-13 10:24             ` Byungchul Park
2025-05-10  7:26   ` Ilias Apalodimas
2025-05-12 12:58     ` Byungchul Park
2025-05-09 14:09 ` [RFC 00/19] Split netmem from " Mina Almasry
2025-05-12 12:36   ` Byungchul Park
2025-05-12 12:59     ` Pavel Begunkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAHS8izMoS4wwmc363TFJU_XCtOX9vOv5ZQwD_k2oHx40D8hAPA@mail.gmail.com \
    --to=almasrymina@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=andrew+netdev@lunn.ch \
    --cc=ast@kernel.org \
    --cc=byungchul@sk.com \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=harry.yoo@oracle.com \
    --cc=hawk@kernel.org \
    --cc=ilias.apalodimas@linaro.org \
    --cc=john.fastabend@gmail.com \
    --cc=kernel_team@skhynix.com \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=vishal.moola@gmail.com \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox