Re: [PATCH v3 13/13] mm: zswap: Compress batching with Intel IAA in zswap_store() of large folios.

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Johannes Weiner <hannes@cmpxchg.org>
To: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	yosryahmed@google.com, nphamcs@gmail.com,
	chengming.zhou@linux.dev, usamaarif642@gmail.com,
	ryan.roberts@arm.com, ying.huang@intel.com, 21cnbao@gmail.com,
	akpm@linux-foundation.org, linux-crypto@vger.kernel.org,
	herbert@gondor.apana.org.au, davem@davemloft.net,
	clabbe@baylibre.com, ardb@kernel.org, ebiggers@google.com,
	surenb@google.com, kristen.c.accardi@intel.com,
	zanussi@kernel.org, wajdi.k.feghali@intel.com,
	vinodh.gopal@intel.com
Subject: Re: [PATCH v3 13/13] mm: zswap: Compress batching with Intel IAA in zswap_store() of large folios.
Date: Thu, 7 Nov 2024 13:53:40 -0500	[thread overview]
Message-ID: <20241107185340.GG1172372@cmpxchg.org> (raw)
In-Reply-To: <20241106192105.6731-14-kanchana.p.sridhar@intel.com>

On Wed, Nov 06, 2024 at 11:21:05AM -0800, Kanchana P Sridhar wrote:
> +static void zswap_zpool_store_sub_batch(
> +	struct zswap_store_pipeline_state *zst)

There is a zswap_store_sub_batch() below, which does something
completely different. Naming is hard, but please invest a bit more
time into this to make this readable.

> +{
> +	u8 i;
> +
> +	for (i = 0; i < zst->nr_comp_pages; ++i) {
> +		struct zswap_store_sub_batch_page *sbp = &zst->sub_batch[i];
> +		struct zpool *zpool;
> +		unsigned long handle;
> +		char *buf;
> +		gfp_t gfp;
> +		int err;
> +
> +		/* Skip pages that had compress errors. */
> +		if (sbp->error)
> +			continue;
> +
> +		zpool = zst->pool->zpool;
> +		gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
> +		if (zpool_malloc_support_movable(zpool))
> +			gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
> +		err = zpool_malloc(zpool, zst->comp_dlens[i], gfp, &handle);
> +
> +		if (err) {
> +			if (err == -ENOSPC)
> +				zswap_reject_compress_poor++;
> +			else
> +				zswap_reject_alloc_fail++;
> +
> +			/*
> +			 * An error should be propagated to other pages of the
> +			 * same folio in the sub-batch, and zpool resources for
> +			 * those pages (in sub-batch order prior to this zpool
> +			 * error) should be de-allocated.
> +			 */
> +			zswap_store_propagate_errors(zst, sbp->batch_idx);
> +			continue;
> +		}
> +
> +		buf = zpool_map_handle(zpool, handle, ZPOOL_MM_WO);
> +		memcpy(buf, zst->comp_dsts[i], zst->comp_dlens[i]);
> +		zpool_unmap_handle(zpool, handle);
> +
> +		sbp->entry->handle = handle;
> +		sbp->entry->length = zst->comp_dlens[i];
> +	}
> +}
> +
> +/*
> + * Returns true if the entry was successfully
> + * stored in the xarray, and false otherwise.
> + */
> +static bool zswap_store_entry(swp_entry_t page_swpentry,
> +			      struct zswap_entry *entry)
> +{
> +	struct zswap_entry *old = xa_store(swap_zswap_tree(page_swpentry),
> +					   swp_offset(page_swpentry),
> +					   entry, GFP_KERNEL);
> +	if (xa_is_err(old)) {
> +		int err = xa_err(old);
> +
> +		WARN_ONCE(err != -ENOMEM, "unexpected xarray error: %d\n", err);
> +		zswap_reject_alloc_fail++;
> +		return false;
> +	}
> +
> +	/*
> +	 * We may have had an existing entry that became stale when
> +	 * the folio was redirtied and now the new version is being
> +	 * swapped out. Get rid of the old.
> +	 */
> +	if (old)
> +		zswap_entry_free(old);
> +
> +	return true;
> +}
> +
> +static void zswap_batch_compress_post_proc(
> +	struct zswap_store_pipeline_state *zst)
> +{
> +	int nr_objcg_pages = 0, nr_pages = 0;
> +	struct obj_cgroup *objcg = NULL;
> +	size_t compressed_bytes = 0;
> +	u8 i;
> +
> +	zswap_zpool_store_sub_batch(zst);
> +
> +	for (i = 0; i < zst->nr_comp_pages; ++i) {
> +		struct zswap_store_sub_batch_page *sbp = &zst->sub_batch[i];
> +
> +		if (sbp->error)
> +			continue;
> +
> +		if (!zswap_store_entry(sbp->swpentry, sbp->entry)) {
> +			zswap_store_propagate_errors(zst, sbp->batch_idx);
> +			continue;
> +		}
> +
> +		/*
> +		 * The entry is successfully compressed and stored in the tree,
> +		 * there is no further possibility of failure. Grab refs to the
> +		 * pool and objcg. These refs will be dropped by
> +		 * zswap_entry_free() when the entry is removed from the tree.
> +		 */
> +		zswap_pool_get(zst->pool);
> +		if (sbp->objcg)
> +			obj_cgroup_get(sbp->objcg);
> +
> +		/*
> +		 * We finish initializing the entry while it's already in xarray.
> +		 * This is safe because:
> +		 *
> +		 * 1. Concurrent stores and invalidations are excluded by folio
> +		 *    lock.
> +		 *
> +		 * 2. Writeback is excluded by the entry not being on the LRU yet.
> +		 *    The publishing order matters to prevent writeback from seeing
> +		 *    an incoherent entry.
> +		 */
> +		sbp->entry->pool = zst->pool;
> +		sbp->entry->swpentry = sbp->swpentry;
> +		sbp->entry->objcg = sbp->objcg;
> +		sbp->entry->referenced = true;
> +		if (sbp->entry->length) {
> +			INIT_LIST_HEAD(&sbp->entry->lru);
> +			zswap_lru_add(&zswap_list_lru, sbp->entry);
> +		}
> +
> +		if (!objcg && sbp->objcg) {
> +			objcg = sbp->objcg;
> +		} else if (objcg && sbp->objcg && (objcg != sbp->objcg)) {
> +			obj_cgroup_charge_zswap(objcg, compressed_bytes);
> +			count_objcg_events(objcg, ZSWPOUT, nr_objcg_pages);
> +			compressed_bytes = 0;
> +			nr_objcg_pages = 0;
> +			objcg = sbp->objcg;
> +		}
> +
> +		if (sbp->objcg) {
> +			compressed_bytes += sbp->entry->length;
> +			++nr_objcg_pages;
> +		}
> +
> +		++nr_pages;
> +	} /* for sub-batch pages. */
> +
> +	if (objcg) {
> +		obj_cgroup_charge_zswap(objcg, compressed_bytes);
> +		count_objcg_events(objcg, ZSWPOUT, nr_objcg_pages);
> +	}
> +
> +	atomic_long_add(nr_pages, &zswap_stored_pages);
> +	count_vm_events(ZSWPOUT, nr_pages);
> +}
> +
> +static void zswap_store_sub_batch(struct zswap_store_pipeline_state *zst)
> +{
> +	u8 i;
> +
> +	for (i = 0; i < zst->nr_comp_pages; ++i) {
> +		zst->comp_dsts[i] = zst->acomp_ctx->buffers[i];
> +		zst->comp_dlens[i] = PAGE_SIZE;
> +	} /* for sub-batch pages. */
> +
> +	/*
> +	 * Batch compress sub-batch "N". If IAA is the compressor, the
> +	 * hardware will compress multiple pages in parallel.
> +	 */
> +	zswap_compress_batch(zst);
> +
> +	zswap_batch_compress_post_proc(zst);

The control flow here is a mess. Keep loops over the same batch at the
same function level. IOW, pull the nr_comp_pages loop out of
zswap_batch_compress_post_proc() and call the function from the loop.

Also give it a more descriptive name. If that's hard to do, then
you're probably doing too many different things in it. Create
functions for a specific purpose, don't carve up sequences at
arbitrary points.

My impression after trying to read this is that the existing
zswap_store() sequence could be a subset of the batched store, where
you can reuse most code to get the pool, charge the cgroup, allocate
entries, store entries, bump the stats etc. for both cases. Alas, your
naming choices make it a bit difficult to be sure.

Please explore this direction. Don't worry about the CONFIG symbol for
now, we can still look at this later.

Right now, it's basically

	if (special case)
		lots of duplicative code in slightly different order
	regular store sequence

and that isn't going to be maintainable.

Look for a high-level sequence that makes sense for both cases. E.g.:

	if (!zswap_enabled)
		goto check_old;

	get objcg

	check limits

	allocate memcg list lru

	for each batch {
		for each entry {
			allocate entry
			acquire objcg ref
			acquire pool ref
		}
		compress
		for each entry {
			store in tree
			add to lru
			bump stats and counters
		}
	}

	put objcg

	return true;

check_error:
	...

and then set up the two loops such that they also makes sense when the
folio is just a single page.

next prev parent reply	other threads:[~2024-11-07 18:53 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-11-06 19:20 [PATCH v3 00/13] zswap IAA compress batching Kanchana P Sridhar
2024-11-06 19:20 ` [PATCH v3 01/13] crypto: acomp - Define two new interfaces for compress/decompress batching Kanchana P Sridhar
2024-11-06 19:20 ` [PATCH v3 02/13] crypto: iaa - Add an acomp_req flag CRYPTO_ACOMP_REQ_POLL to enable async mode Kanchana P Sridhar
2024-11-06 19:20 ` [PATCH v3 03/13] crypto: iaa - Implement compress/decompress batching API in iaa_crypto Kanchana P Sridhar
2024-11-06 19:20 ` [PATCH v3 04/13] crypto: iaa - Make async mode the default Kanchana P Sridhar
2024-11-06 19:20 ` [PATCH v3 05/13] crypto: iaa - Disable iaa_verify_compress by default Kanchana P Sridhar
2024-11-06 19:20 ` [PATCH v3 06/13] crypto: iaa - Change cpu-to-iaa mappings to evenly balance cores to IAAs Kanchana P Sridhar
2024-11-06 19:20 ` [PATCH v3 07/13] crypto: iaa - Distribute compress jobs to all IAA devices on a NUMA node Kanchana P Sridhar
2024-11-06 19:21 ` [PATCH v3 08/13] mm: zswap: acomp_ctx mutex lock/unlock optimizations Kanchana P Sridhar
2024-11-08 20:14   ` Yosry Ahmed
2024-11-08 21:34     ` Sridhar, Kanchana P
2024-11-06 19:21 ` [PATCH v3 09/13] mm: zswap: Modify struct crypto_acomp_ctx to be configurable in nr of acomp_reqs Kanchana P Sridhar
2024-11-07 17:20   ` Johannes Weiner
2024-11-07 22:21     ` Sridhar, Kanchana P
2024-11-08 20:22       ` Yosry Ahmed
2024-11-08 21:39         ` Sridhar, Kanchana P
2024-11-08 22:54           ` Yosry Ahmed
2024-11-09  1:03             ` Sridhar, Kanchana P
2024-11-06 19:21 ` [PATCH v3 10/13] mm: zswap: Add a per-cpu "acomp_batch_ctx" to struct zswap_pool Kanchana P Sridhar
2024-11-08 20:23   ` Yosry Ahmed
2024-11-09  1:04     ` Sridhar, Kanchana P
2024-11-06 19:21 ` [PATCH v3 11/13] mm: zswap: Allocate acomp_batch_ctx resources for a given zswap_pool Kanchana P Sridhar
2024-11-07 17:31   ` Johannes Weiner
2024-11-07 22:22     ` Sridhar, Kanchana P
2024-11-06 19:21 ` [PATCH v3 12/13] mm: Add sysctl vm.compress-batching switch for compress batching during swapout Kanchana P Sridhar
2024-11-06 20:17   ` Andrew Morton
2024-11-06 20:39     ` Sridhar, Kanchana P
2024-11-07 17:34   ` Johannes Weiner
2024-11-07 22:24     ` Sridhar, Kanchana P
2024-11-08 20:23     ` Yosry Ahmed
2024-11-09  1:05       ` Sridhar, Kanchana P
2024-11-06 19:21 ` [PATCH v3 13/13] mm: zswap: Compress batching with Intel IAA in zswap_store() of large folios Kanchana P Sridhar
2024-11-07 18:16   ` Johannes Weiner
2024-11-07 22:32     ` Sridhar, Kanchana P
2024-11-07 18:53   ` Johannes Weiner [this message]
2024-11-07 22:50     ` Sridhar, Kanchana P
2024-11-06 20:25 ` [PATCH v3 00/13] zswap IAA compress batching Andrew Morton
2024-11-06 20:44   ` Sridhar, Kanchana P

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241107185340.GG1172372@cmpxchg.org \
    --to=hannes@cmpxchg.org \
    --cc=21cnbao@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=ardb@kernel.org \
    --cc=chengming.zhou@linux.dev \
    --cc=clabbe@baylibre.com \
    --cc=davem@davemloft.net \
    --cc=ebiggers@google.com \
    --cc=herbert@gondor.apana.org.au \
    --cc=kanchana.p.sridhar@intel.com \
    --cc=kristen.c.accardi@intel.com \
    --cc=linux-crypto@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=nphamcs@gmail.com \
    --cc=ryan.roberts@arm.com \
    --cc=surenb@google.com \
    --cc=usamaarif642@gmail.com \
    --cc=vinodh.gopal@intel.com \
    --cc=wajdi.k.feghali@intel.com \
    --cc=ying.huang@intel.com \
    --cc=yosryahmed@google.com \
    --cc=zanussi@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox