linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Vlastimil Babka <vbabka@suse.cz>
To: Oscar Salvador <osalvador@suse.de>,
	Andrew Morton <akpm@linux-foundation.org>
Cc: David Hildenbrand <david@redhat.com>,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	Hyeonggon Yoo <42.hyeyoo@gmail.com>,
	mkoutny@suse.com, Dan Williams <dan.j.williams@intel.com>,
	Jonathan Cameron <Jonathan.Cameron@huawei.com>
Subject: Re: [PATCH 1/2] mm,memory_hotplug: Implement numa node notifier
Date: Wed, 2 Apr 2025 18:03:04 +0200	[thread overview]
Message-ID: <4e122668-6f6a-4874-85df-e6869b9ccb24@suse.cz> (raw)
In-Reply-To: <20250401092716.537512-2-osalvador@suse.de>

On 4/1/25 11:27, Oscar Salvador wrote:
> There are at least four consumers of hotplug_memory_notifier that what they
> really are interested in is whether any numa node changed its state, e.g: going
> from being memory aware to becoming memoryless.
> 
> Implement a specific notifier for numa nodes when their state gets changed,
> and have those consumers that only care about numa node state changes use it.
> 
> Signed-off-by: Oscar Salvador <osalvador@suse.de>

<snip>

> -static void node_states_set_node(int node, struct memory_notify *arg)
> +static void node_states_set_node(int node, struct node_notify *arg)
>  {
>  	if (arg->status_change_nid_normal >= 0)
>  		node_set_state(node, N_NORMAL_MEMORY);
> @@ -1177,7 +1177,9 @@ int online_pages(unsigned long pfn, unsigned long nr_pages,
>  	int need_zonelists_rebuild = 0;
>  	const int nid = zone_to_nid(zone);
>  	int ret;
> -	struct memory_notify arg;
> +	struct memory_notify mem_arg;
> +	struct node_notify node_arg;
> +	bool cancel_mem_notifier_on_err = false, cancel_node_notifier_on_err = false;
>  
>  	/*
>  	 * {on,off}lining is constrained to full memory sections (or more
> @@ -1194,11 +1196,23 @@ int online_pages(unsigned long pfn, unsigned long nr_pages,
>  	/* associate pfn range with the zone */
>  	move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_ISOLATE);
>  
> -	arg.start_pfn = pfn;
> -	arg.nr_pages = nr_pages;
> -	node_states_check_changes_online(nr_pages, zone, &arg);
> +	mem_arg.start_pfn = pfn;
> +	mem_arg.nr_pages = nr_pages;
> +	node_states_check_changes_online(nr_pages, zone, &node_arg);
>  
> -	ret = memory_notify(MEM_GOING_ONLINE, &arg);
> +	if (node_arg.status_change_nid >= 0) {

Hmm, don't we need to add "|| node_arg.status_change_nid_normal >= 0"? Or we
fail to notify addition of normal memory to a node that already has !normal
memory?

> +		/* Node is becoming memory aware. Notify consumers */
> +		cancel_node_notifier_on_err = true;
> +		ret = node_notify(NODE_BECOMING_MEM_AWARE, &node_arg);
> +		ret = notifier_to_errno(ret);
> +		if (ret)
> +			goto failed_addition;
> +	}
> +
> +	cancel_mem_notifier_on_err = true;
> +	mem_arg.status_change_nid = node_arg.status_change_nid;
> +	mem_arg.status_change_nid_normal = node_arg.status_change_nid_normal;
> +	ret = memory_notify(MEM_GOING_ONLINE, &mem_arg);
>  	ret = notifier_to_errno(ret);
>  	if (ret)
>  		goto failed_addition;
> @@ -1224,7 +1238,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages,
>  	online_pages_range(pfn, nr_pages);
>  	adjust_present_page_count(pfn_to_page(pfn), group, nr_pages);
>  
> -	node_states_set_node(nid, &arg);
> +	node_states_set_node(nid, &node_arg);
>  	if (need_zonelists_rebuild)
>  		build_all_zonelists(NULL);
>  
> @@ -1245,16 +1259,26 @@ int online_pages(unsigned long pfn, unsigned long nr_pages,
>  	kswapd_run(nid);
>  	kcompactd_run(nid);
>  
> +	if (node_arg.status_change_nid >= 0)
> +		/*
> +		 * Node went from memoryless to have memory. Notifiy interested
> +		 * consumers
> +		 */
> +		node_notify(NODE_BECAME_MEM_AWARE, &node_arg);
> +
>  	writeback_set_ratelimit();
>  
> -	memory_notify(MEM_ONLINE, &arg);
> +	memory_notify(MEM_ONLINE, &mem_arg);
>  	return 0;
>  
>  failed_addition:
>  	pr_debug("online_pages [mem %#010llx-%#010llx] failed\n",
>  		 (unsigned long long) pfn << PAGE_SHIFT,
>  		 (((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1);
> -	memory_notify(MEM_CANCEL_ONLINE, &arg);
> +	if (cancel_node_notifier_on_err)
> +		node_notify(NODE_CANCEL_MEM_AWARE, &node_arg);
> +	if (cancel_mem_notifier_on_err)
> +		memory_notify(MEM_CANCEL_ONLINE, &mem_arg);

Switch the order of those just for symmetry? :)

>  	remove_pfn_range_from_zone(zone, pfn, nr_pages);
>  	return ret;
>  }
> @@ -1898,7 +1922,7 @@ early_param("movable_node", cmdline_parse_movable_node);
>  
>  /* check which state of node_states will be changed when offline memory */
>  static void node_states_check_changes_offline(unsigned long nr_pages,
> -		struct zone *zone, struct memory_notify *arg)
> +		struct zone *zone, struct node_notify *arg)
>  {
>  	struct pglist_data *pgdat = zone->zone_pgdat;
>  	unsigned long present_pages = 0;
> @@ -1935,7 +1959,7 @@ static void node_states_check_changes_offline(unsigned long nr_pages,
>  		arg->status_change_nid = zone_to_nid(zone);
>  }
>  
> -static void node_states_clear_node(int node, struct memory_notify *arg)
> +static void node_states_clear_node(int node, struct node_notify *arg)
>  {
>  	if (arg->status_change_nid_normal >= 0)
>  		node_clear_state(node, N_NORMAL_MEMORY);
> @@ -1963,7 +1987,9 @@ int offline_pages(unsigned long start_pfn, unsigned long nr_pages,
>  	unsigned long pfn, managed_pages, system_ram_pages = 0;
>  	const int node = zone_to_nid(zone);
>  	unsigned long flags;
> -	struct memory_notify arg;
> +	struct memory_notify mem_arg;
> +	struct node_notify node_arg;
> +	bool cancel_mem_notifier_on_err = false, cancel_node_notifier_on_err = false;
>  	char *reason;
>  	int ret;
>  
> @@ -2022,11 +2048,22 @@ int offline_pages(unsigned long start_pfn, unsigned long nr_pages,
>  		goto failed_removal_pcplists_disabled;
>  	}
>  
> -	arg.start_pfn = start_pfn;
> -	arg.nr_pages = nr_pages;
> -	node_states_check_changes_offline(nr_pages, zone, &arg);
> +	mem_arg.start_pfn = start_pfn;
> +	mem_arg.nr_pages = nr_pages;
> +	node_states_check_changes_offline(nr_pages, zone, &node_arg);
> +
> +	if (node_arg.status_change_nid >= 0) {

Ditto.

> +		cancel_node_notifier_on_err = true;
> +		ret = node_notify(NODE_BECOMING_MEMORYLESS, &node_arg);
> +		ret = notifier_to_errno(ret);
> +		if (ret)
> +			goto failed_removal_isolated;
> +	}
>  
> -	ret = memory_notify(MEM_GOING_OFFLINE, &arg);
> +	cancel_mem_notifier_on_err = true;
> +	mem_arg.status_change_nid = node_arg.status_change_nid;
> +	mem_arg.status_change_nid_normal = node_arg.status_change_nid_normal;
> +	ret = memory_notify(MEM_GOING_OFFLINE, &mem_arg);
>  	ret = notifier_to_errno(ret);
>  	if (ret) {
>  		reason = "notifier failure";
> @@ -2106,27 +2143,32 @@ int offline_pages(unsigned long start_pfn, unsigned long nr_pages,
>  	 * Make sure to mark the node as memory-less before rebuilding the zone
>  	 * list. Otherwise this node would still appear in the fallback lists.
>  	 */
> -	node_states_clear_node(node, &arg);
> +	node_states_clear_node(node, &node_arg);
>  	if (!populated_zone(zone)) {
>  		zone_pcp_reset(zone);
>  		build_all_zonelists(NULL);
>  	}
>  
> -	if (arg.status_change_nid >= 0) {
> +	if (node_arg.status_change_nid >= 0) {
>  		kcompactd_stop(node);
>  		kswapd_stop(node);
> +		/*Node went memoryless. Notifiy interested consumers */
> +		node_notify(NODE_BECAME_MEMORYLESS, &node_arg);
>  	}
>  
>  	writeback_set_ratelimit();
>  
> -	memory_notify(MEM_OFFLINE, &arg);
> +	memory_notify(MEM_OFFLINE, &mem_arg);
>  	remove_pfn_range_from_zone(zone, start_pfn, nr_pages);
>  	return 0;
>  
>  failed_removal_isolated:
>  	/* pushback to free area */
>  	undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
> -	memory_notify(MEM_CANCEL_OFFLINE, &arg);
> +	if (cancel_node_notifier_on_err)
> +		node_notify(NODE_CANCEL_MEMORYLESS, &node_arg);
> +	if (cancel_mem_notifier_on_err)
> +		memory_notify(MEM_CANCEL_OFFLINE, &mem_arg);

Ditto.

>  failed_removal_pcplists_disabled:
>  	lru_cache_enable();
>  	zone_pcp_enable(zone);
> diff --git a/mm/slub.c b/mm/slub.c
> index 184fd2b14758..74350f6c8ddd 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -5928,10 +5928,10 @@ static int slab_mem_going_offline_callback(void *arg)
>  
>  static void slab_mem_offline_callback(void *arg)
>  {
> -	struct memory_notify *marg = arg;
> +	struct node_notify *narg = arg;
>  	int offline_node;
>  
> -	offline_node = marg->status_change_nid_normal;
> +	offline_node = narg->status_change_nid_normal;
>  
>  	/*
>  	 * If the node still has available memory. we need kmem_cache_node
> @@ -5954,8 +5954,8 @@ static int slab_mem_going_online_callback(void *arg)
>  {
>  	struct kmem_cache_node *n;
>  	struct kmem_cache *s;
> -	struct memory_notify *marg = arg;
> -	int nid = marg->status_change_nid_normal;
> +	struct node_notify *narg = arg;
> +	int nid = narg->status_change_nid_normal;
>  	int ret = 0;
>  
>  	/*
> @@ -6007,18 +6007,18 @@ static int slab_memory_callback(struct notifier_block *self,
>  	int ret = 0;
>  
>  	switch (action) {
> -	case MEM_GOING_ONLINE:
> +	case NODE_BECOMING_MEM_AWARE:
>  		ret = slab_mem_going_online_callback(arg);
>  		break;
> -	case MEM_GOING_OFFLINE:
> +	case NODE_BECOMING_MEMORYLESS:
>  		ret = slab_mem_going_offline_callback(arg);
>  		break;
> -	case MEM_OFFLINE:
> -	case MEM_CANCEL_ONLINE:
> +	case NODE_BECAME_MEMORYLESS:
> +	case NODE_CANCEL_MEM_AWARE:
>  		slab_mem_offline_callback(arg);
>  		break;
> -	case MEM_ONLINE:
> -	case MEM_CANCEL_OFFLINE:
> +	case NODE_BECAME_MEM_AWARE:
> +	case NODE_CANCEL_MEMORYLESS:
>  		break;
>  	}
>  	if (ret)
> @@ -6094,7 +6094,7 @@ void __init kmem_cache_init(void)
>  			sizeof(struct kmem_cache_node),
>  			SLAB_HWCACHE_ALIGN | SLAB_NO_OBJ_EXT, 0, 0);
>  
> -	hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
> +	hotplug_node_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
>  
>  	/* Able to allocate the per node structures */
>  	slab_state = PARTIAL;



  parent reply	other threads:[~2025-04-02 16:03 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-04-01  9:27 [PATCH 0/2] " Oscar Salvador
2025-04-01  9:27 ` [PATCH 1/2] mm,memory_hotplug: " Oscar Salvador
2025-04-01 14:19   ` Harry Yoo
2025-04-02 16:03   ` Vlastimil Babka [this message]
2025-04-02 16:57     ` Oscar Salvador
2025-04-03 12:44   ` Jonathan Cameron
2025-04-04 10:09   ` David Hildenbrand
2025-04-04 12:56     ` Oscar Salvador
2025-04-04 13:14       ` David Hildenbrand
2025-04-01  9:27 ` [PATCH 2/2] mm,memory_hotplug: Replace status_change_nid parameter in memory_notify Oscar Salvador
2025-04-02  2:53   ` Harry Yoo
2025-04-02 16:09   ` Vlastimil Babka
2025-04-02 16:06 ` [PATCH 0/2] Implement numa node notifier Vlastimil Babka
2025-04-02 17:03   ` Oscar Salvador
2025-04-03 13:02     ` David Hildenbrand
2025-04-03 13:08       ` David Hildenbrand
2025-04-03 13:57         ` Harry Yoo
2025-04-04  8:47         ` Vlastimil Babka
2025-04-03 22:06       ` Harry Yoo
2025-04-04  8:50         ` Vlastimil Babka
2025-04-04 10:02           ` Harry Yoo
2025-04-03 12:29 ` Jonathan Cameron

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4e122668-6f6a-4874-85df-e6869b9ccb24@suse.cz \
    --to=vbabka@suse.cz \
    --cc=42.hyeyoo@gmail.com \
    --cc=Jonathan.Cameron@huawei.com \
    --cc=akpm@linux-foundation.org \
    --cc=dan.j.williams@intel.com \
    --cc=david@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mkoutny@suse.com \
    --cc=osalvador@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox