From: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>
To: dri-devel@lists.freedesktop.org
Cc: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>,
"Andrew Morton" <akpm@linux-foundation.org>,
"Matthew Wilcox (Oracle)" <willy@infradead.org>,
"Miaohe Lin" <linmiaohe@huawei.com>,
"David Hildenbrand" <david@redhat.com>,
"Johannes Weiner" <hannes@cmpxchg.org>,
"Peter Xu" <peterx@redhat.com>, NeilBrown <neilb@suse.de>,
"Daniel Vetter" <daniel.vetter@ffwll.ch>,
"Christian Koenig" <christian.koenig@amd.com>,
"Dave Airlie" <airlied@redhat.com>,
"Dave Hansen" <dave.hansen@intel.com>,
"Matthew Auld" <matthew.auld@intel.com>,
linux-graphics-maintainer@vmware.com, linux-mm@kvack.org,
intel-gfx@lists.freedesktop.org
Subject: [RFC PATCH 07/16] drm/ttm: Reduce the number of used allocation orders for TTM pages
Date: Wed, 15 Feb 2023 17:13:56 +0100 [thread overview]
Message-ID: <20230215161405.187368-8-thomas.hellstrom@linux.intel.com> (raw)
In-Reply-To: <20230215161405.187368-1-thomas.hellstrom@linux.intel.com>
When swapping out, we will split multi-order pages both in order to
move them to the swap-cache and to be able to return memory to the
swap cache as soon as possible on a page-by-page basis.
By reducing the page max order to the system PMD size, we can be nicer
to the system and avoid splitting gigantic pages. On top of this we also
include the 64K page size in the page sizes tried, since that appears to
be a common size for GPU applications.
Looking forward to when we might be able to swap out PMD size folios
without splitting, this will also be a benefit.
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
drivers/gpu/drm/ttm/ttm_pool.c | 58 ++++++++++++++++++++++++++--------
1 file changed, 45 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 1cc7591a9542..8787fb6a218b 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -31,6 +31,8 @@
* cause they are rather slow compared to alloc_pages+map.
*/
+#define pr_fmt(fmt) "[TTM POOL] " fmt
+
#include <linux/module.h>
#include <linux/dma-mapping.h>
#include <linux/debugfs.h>
@@ -47,6 +49,18 @@
#include "ttm_module.h"
+#define TTM_MAX_ORDER (PMD_SHIFT - PAGE_SHIFT)
+#define TTM_64K_ORDER (16 - PAGE_SHIFT)
+#if (TTM_MAX_ORDER < TTM_64K_ORDER)
+#undef TTM_MAX_ORDER
+#define TTM_MAX_ORDER TTM_64K_ORDER
+#endif
+#if ((MAX_ORDER - 1) < TTM_MAX_ORDER)
+#undef TTM_MAX_ORDER
+#define TTM_MAX_ORDER (MAX_ORDER - 1)
+#endif
+#define TTM_DIM_ORDER (TTM_MAX_ORDER + 1)
+
/**
* struct ttm_pool_dma - Helper object for coherent DMA mappings
*
@@ -65,16 +79,18 @@ module_param(page_pool_size, ulong, 0644);
static atomic_long_t allocated_pages;
-static struct ttm_pool_type global_write_combined[MAX_ORDER];
-static struct ttm_pool_type global_uncached[MAX_ORDER];
+static struct ttm_pool_type global_write_combined[TTM_DIM_ORDER];
+static struct ttm_pool_type global_uncached[TTM_DIM_ORDER];
-static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER];
-static struct ttm_pool_type global_dma32_uncached[MAX_ORDER];
+static struct ttm_pool_type global_dma32_write_combined[TTM_DIM_ORDER];
+static struct ttm_pool_type global_dma32_uncached[TTM_DIM_ORDER];
static spinlock_t shrinker_lock;
static struct list_head shrinker_list;
static struct shrinker mm_shrinker;
+static unsigned int ttm_pool_orders[] = {TTM_MAX_ORDER, 0, 0};
+
/* Allocate pages of size 1 << order with the given gfp_flags */
static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags,
unsigned int order)
@@ -400,6 +416,17 @@ static void __ttm_pool_free(struct ttm_pool *pool, struct ttm_tt *tt,
}
}
+static unsigned int ttm_pool_select_order(unsigned int order, pgoff_t num_pages)
+{
+ unsigned int *cur_order = ttm_pool_orders;
+
+ order = min_t(unsigned int, __fls(num_pages), order);
+ while (order < *cur_order)
+ ++cur_order;
+
+ return *cur_order;
+}
+
/**
* ttm_pool_alloc - Fill a ttm_tt object
*
@@ -439,9 +466,8 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
else
gfp_flags |= GFP_HIGHUSER;
- for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages));
- num_pages;
- order = min_t(unsigned int, order, __fls(num_pages))) {
+ order = ttm_pool_select_order(ttm_pool_orders[0], num_pages);
+ for (; num_pages; order = ttm_pool_select_order(order, num_pages)) {
struct ttm_pool_type *pt;
page_caching = tt->caching;
@@ -558,7 +584,7 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
if (use_dma_alloc) {
for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
- for (j = 0; j < MAX_ORDER; ++j)
+ for (j = 0; j < TTM_DIM_ORDER; ++j)
ttm_pool_type_init(&pool->caching[i].orders[j],
pool, i, j);
}
@@ -578,7 +604,7 @@ void ttm_pool_fini(struct ttm_pool *pool)
if (pool->use_dma_alloc) {
for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
- for (j = 0; j < MAX_ORDER; ++j)
+ for (j = 0; j < TTM_DIM_ORDER; ++j)
ttm_pool_type_fini(&pool->caching[i].orders[j]);
}
@@ -632,7 +658,7 @@ static void ttm_pool_debugfs_header(struct seq_file *m)
unsigned int i;
seq_puts(m, "\t ");
- for (i = 0; i < MAX_ORDER; ++i)
+ for (i = 0; i < TTM_DIM_ORDER; ++i)
seq_printf(m, " ---%2u---", i);
seq_puts(m, "\n");
}
@@ -643,7 +669,7 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt,
{
unsigned int i;
- for (i = 0; i < MAX_ORDER; ++i)
+ for (i = 0; i < TTM_DIM_ORDER; ++i)
seq_printf(m, " %8u", ttm_pool_type_count(&pt[i]));
seq_puts(m, "\n");
}
@@ -749,10 +775,16 @@ int ttm_pool_mgr_init(unsigned long num_pages)
if (!page_pool_size)
page_pool_size = num_pages;
+ if (TTM_64K_ORDER < TTM_MAX_ORDER)
+ ttm_pool_orders[1] = TTM_64K_ORDER;
+
+ pr_debug("Used orders are %u %u %u\n", ttm_pool_orders[0],
+ ttm_pool_orders[1], ttm_pool_orders[2]);
+
spin_lock_init(&shrinker_lock);
INIT_LIST_HEAD(&shrinker_list);
- for (i = 0; i < MAX_ORDER; ++i) {
+ for (i = 0; i < TTM_DIM_ORDER; ++i) {
ttm_pool_type_init(&global_write_combined[i], NULL,
ttm_write_combined, i);
ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i);
@@ -785,7 +817,7 @@ void ttm_pool_mgr_fini(void)
{
unsigned int i;
- for (i = 0; i < MAX_ORDER; ++i) {
+ for (i = 0; i < TTM_DIM_ORDER; ++i) {
ttm_pool_type_fini(&global_write_combined[i]);
ttm_pool_type_fini(&global_uncached[i]);
--
2.34.1
next prev parent reply other threads:[~2023-02-15 16:15 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-02-15 16:13 [RFC PATCH 00/16] Add a TTM shrinker Thomas Hellström
2023-02-15 16:13 ` [RFC PATCH 01/16] drm/ttm: Fix a NULL pointer dereference Thomas Hellström
2023-02-15 17:25 ` Christian König
2023-02-15 16:13 ` [RFC PATCH 02/16] drm/ttm/pool: Fix ttm_pool_alloc error path Thomas Hellström
2023-02-15 17:31 ` Christian König
2023-02-15 18:02 ` Thomas Hellström
2023-02-15 18:26 ` Christian König
2023-02-15 18:51 ` Thomas Hellström
2023-02-15 16:13 ` [RFC PATCH 03/16] drm/ttm: Use the BIT macro for the TTM_TT_FLAGs Thomas Hellström
2023-02-15 17:33 ` Christian König
2023-02-15 16:13 ` [RFC PATCH 04/16] drm/ttm, drm/vmwgfx: Update the TTM swapout interface Thomas Hellström
2023-02-15 17:39 ` Christian König
2023-02-15 18:19 ` Thomas Hellström
2023-02-15 18:32 ` Christian König
2023-02-15 16:13 ` [RFC PATCH 05/16] drm/ttm: Unexport ttm_global_swapout() Thomas Hellström
2023-02-15 16:13 ` [RFC PATCH 06/16] drm/ttm: Don't use watermark accounting on shrinkable pools Thomas Hellström
2023-02-15 16:13 ` Thomas Hellström [this message]
2023-02-15 17:42 ` [RFC PATCH 07/16] drm/ttm: Reduce the number of used allocation orders for TTM pages Christian König
2023-02-15 18:12 ` Thomas Hellström
2023-02-15 18:30 ` Christian König
2023-02-15 19:00 ` Thomas Hellström
2023-02-16 7:11 ` Christian König
2023-02-16 7:24 ` Thomas Hellström
2023-02-15 16:13 ` [RFC PATCH 08/16] drm/ttm: Add a shrinker and shrinker accounting Thomas Hellström
2023-02-15 16:13 ` [RFC PATCH 09/16] drm/ttm: Introduce shrink throttling Thomas Hellström
2023-02-15 16:13 ` [RFC PATCH 10/16] drm/ttm: Remove pinned bos from shrinkable accounting Thomas Hellström
2023-02-15 16:14 ` [RFC PATCH 11/16] drm/ttm: Add a simple api to set / clear purgeable ttm_tt content Thomas Hellström
2023-02-15 16:14 ` [RFC PATCH 12/16] mm: Add interfaces to back up and recover folio contents using swap Thomas Hellström
2023-02-15 16:14 ` [RFC PATCH 13/16] drm/ttm: Make the call to ttm_tt_populate() interruptible when faulting Thomas Hellström
2023-02-15 16:14 ` [RFC PATCH 14/16] drm/ttm: Provide helpers for shrinking Thomas Hellström
2023-02-15 16:14 ` [RFC PATCH 15/16] drm/ttm: Use fault-injection to test error paths Thomas Hellström
2023-02-15 16:14 ` [RFC PATCH 16/16] drm/i915, drm/ttm: Use the TTM shrinker rather than the external shmem pool Thomas Hellström
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230215161405.187368-8-thomas.hellstrom@linux.intel.com \
--to=thomas.hellstrom@linux.intel.com \
--cc=airlied@redhat.com \
--cc=akpm@linux-foundation.org \
--cc=christian.koenig@amd.com \
--cc=daniel.vetter@ffwll.ch \
--cc=dave.hansen@intel.com \
--cc=david@redhat.com \
--cc=dri-devel@lists.freedesktop.org \
--cc=hannes@cmpxchg.org \
--cc=intel-gfx@lists.freedesktop.org \
--cc=linmiaohe@huawei.com \
--cc=linux-graphics-maintainer@vmware.com \
--cc=linux-mm@kvack.org \
--cc=matthew.auld@intel.com \
--cc=neilb@suse.de \
--cc=peterx@redhat.com \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox