Re: [PATCH v16 1/2] ACPI:RAS2: Add driver for the ACPI RAS2 feature table

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Borislav Petkov <bp@alien8.de>
To: shiju.jose@huawei.com
Cc: rafael@kernel.org, akpm@linux-foundation.org, rppt@kernel.org,
	dferguson@amperecomputing.com, linux-edac@vger.kernel.org,
	linux-acpi@vger.kernel.org, linux-mm@kvack.org,
	linux-doc@vger.kernel.org, tony.luck@intel.com, lenb@kernel.org,
	leo.duran@amd.com, Yazen.Ghannam@amd.com, mchehab@kernel.org,
	jonathan.cameron@huawei.com, linuxarm@huawei.com,
	rientjes@google.com, jiaqiyan@google.com, Jon.Grimm@amd.com,
	dave.hansen@linux.intel.com, naoya.horiguchi@nec.com,
	james.morse@arm.com, jthoughton@google.com,
	somasundaram.a@hpe.com, erdemaktas@google.com, pgonda@google.com,
	duenwen@google.com, gthelen@google.com,
	wschwartz@amperecomputing.com, wbs@os.amperecomputing.com,
	nifan.cxl@gmail.com, tanxiaofei@huawei.com,
	prime.zeng@hisilicon.com, roberto.sassu@huawei.com,
	kangkang.shen@futurewei.com, wanghuiqiang@huawei.com
Subject: Re: [PATCH v16 1/2] ACPI:RAS2: Add driver for the ACPI RAS2 feature table
Date: Mon, 26 Jan 2026 18:15:52 +0100	[thread overview]
Message-ID: <20260126171552.GJaXehSJp33nFnpvVd@fat_crate.local> (raw)
In-Reply-To: <20260123175512.2066-2-shiju.jose@huawei.com>

On Fri, Jan 23, 2026 at 05:55:07PM +0000, shiju.jose@huawei.com wrote:
> +static int parse_ras2_table(struct acpi_table_ras2 *ras2_tab)
> +{
> +	struct acpi_ras2_pcc_desc *pcc_desc_list;
> +	struct ras2_mem_ctx **pctx_list;
> +	struct ras2_mem_ctx *ras2_ctx;
> +	u16 i;
> +
> +	if (ras2_tab->header.length < sizeof(*ras2_tab)) {
> +		pr_warn(FW_WARN "ACPI RAS2 table present but broken (too short, size=%u)\n",
> +			ras2_tab->header.length);
> +		return -EINVAL;
> +	}
> +
> +	if (!ras2_tab->num_pcc_descs || ras2_tab->num_pcc_descs > RAS2_MAX_NUM_PCC_DESCS) {
> +		pr_warn(FW_WARN "No/Invalid number of PCC descs(%d) in ACPI RAS2 table\n",
> +			ras2_tab->num_pcc_descs);
> +		return -EINVAL;
> +	}
> +
> +	pctx_list = kcalloc(ras2_tab->num_pcc_descs, sizeof(*pctx_list), GFP_KERNEL);
> +	if (!pctx_list)
> +		return -ENOMEM;
> +
> +	pcc_desc_list = (struct acpi_ras2_pcc_desc *)(ras2_tab + 1);
> +	for (i = 0; i < ras2_tab->num_pcc_descs; i++, pcc_desc_list++) {
> +		if (pcc_desc_list->feature_type != RAS2_FEAT_TYPE_MEMORY)
> +			continue;
> +
> +		ras2_ctx = add_aux_device(RAS2_MEM_DEV_ID_NAME, pcc_desc_list->channel_id,
> +					  pcc_desc_list->instance);
> +		if (IS_ERR(ras2_ctx)) {
> +			pr_warn("Failed to add RAS2 auxiliary device rc=%ld\n", PTR_ERR(ras2_ctx));
> +			for (; i > 0; i--) {
> +				if (pctx_list[i - 1])
> +					auxiliary_device_uninit(&pctx_list[i - 1]->adev);

This is wrong - there should be a function called remove_aux_device() which
unwinds everything add_aux_device() does for all those devices.

In addition, I did a bunch of cleanups ontop, see below. I can't test them so
pls have a look and run them on your hw and if all good, merge them with your
patch.

Thx.

---

diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 7f846c22fc30..0010b38e8f81 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -299,9 +299,10 @@ config ACPI_RAS2
 	depends on MAILBOX
 	depends on PCC
 	help
-	  This driver adds support for RAS2 feature table provides interfaces
-	  for platform RAS features, e.g., for HW-based memory scrubbing.
-	  Say 'y/n' to enable/disable ACPI RAS2 support.
+	  Add support for the RAS2 feature table and provide interfaces for
+	  platform RAS features, such as hardware-based memory scrubbing.
+ 
+	  If unsure, select N.
 
 config ACPI_PROCESSOR
 	tristate "Processor"
diff --git a/drivers/acpi/ras2.c b/drivers/acpi/ras2.c
index a9a9c480ee29..6eed1ada18e1 100644
--- a/drivers/acpi/ras2.c
+++ b/drivers/acpi/ras2.c
@@ -6,10 +6,11 @@
  *
  * Support for RAS2 table - ACPI 6.5 Specification, section 5.2.21, which
  * provides interfaces for platform RAS features, e.g., for HW-based memory
- * scrubbing, and logical to PA translation service. RAS2 uses PCC channel
- * subspace for communicating with the ACPI compliant HW platform.
+ * scrubbing, and logical to physical address translation service. RAS2 uses
+ * PCC channel subspace for communicating with the ACPI compliant HW platform.
  */
 
+#undef pr_fmt
 #define pr_fmt(fmt) "ACPI RAS2: " fmt
 
 #include <linux/delay.h>
@@ -26,13 +27,13 @@
  * @comm_addr:		Pointer to RAS2 PCC shared memory region
  * @pcc_lock:		PCC lock to provide mutually exclusive access
  *			to PCC channel subspace
- * @deadline_us:	Poll PCC status register timeout in micro secs
- *			for PCC command complete
+ * @deadline_us:	Poll PCC status register timeout in microsecs
+ *			for PCC command completion
  * @pcc_mpar:		Maximum Periodic Access Rate (MPAR) for PCC channel
- * @pcc_mrtt:		Minimum Request Turnaround Time (MRTT) in micro secs
+ * @pcc_mrtt:		Minimum Request Turnaround Time (MRTT) in microsecs
  *			OS must wait after completion of a PCC command before
- *			issue next command
- * @last_cmd_cmpl_time:	completion time of last PCC command
+ *			issuing next command
+ * @last_cmd_cmpl_time:	Completion time of last PCC command
  * @last_mpar_reset:	Time of last MPAR count reset
  * @mpar_count:		MPAR count
  * @pcc_id:		Identifier of the RAS2 platform communication channel
@@ -56,63 +57,67 @@ struct ras2_sspcc {
 };
 
 /*
- * Arbitrary retries for PCC commands because the remote processor
- * could be much slower to reply. Keeping it high enough to cover
- * emulators where the processors run painfully slow.
+ * Arbitrary retries for PCC commands because the remote processor could be
+ * much slower to reply. Keep it high enough to cover emulators where the
+ * processors run painfully slow.
  */
 #define PCC_NUM_RETRIES 600ULL
+#define PCC_MIN_POLL_USECS 3
 
 #define RAS2_MAX_NUM_PCC_DESCS 100
 #define RAS2_FEAT_TYPE_MEMORY 0x00
 
-static int decode_cap_error(u32 cap_status)
-{
-	switch (cap_status) {
-	case ACPI_RAS2_NOT_VALID:
-	case ACPI_RAS2_NOT_SUPPORTED:
-		return -EPERM;
-	case ACPI_RAS2_BUSY:
-		return -EBUSY;
-	case ACPI_RAS2_FAILED:
-	case ACPI_RAS2_ABORTED:
-	case ACPI_RAS2_INVALID_DATA:
-		return -EINVAL;
-	default:
-		return 0;
-	}
-}
-
 static int check_pcc_chan(struct ras2_sspcc *sspcc)
 {
 	struct acpi_ras2_shmem __iomem *gen_comm_base = sspcc->comm_addr;
+	u32 cap_status;
 	u16 status;
 	int rc;
 
 	/*
-	 * As per ACPI spec, the PCC space will be initialized by
+	 * As per ACPI spec, the PCC space will be initialized by the
 	 * platform and should have set the command completion bit when
 	 * PCC can be used by OSPM.
 	 *
-	 * Poll PCC status register every 3us for maximum of 600ULL * PCC
-	 * channel latency until PCC command complete bit is set.
+	 * Poll PCC status register every PCC_MIN_POLL_USECS for maximum of
+	 * PCC_NUM_RETRIES * PCC channel latency until PCC command complete
+	 * bit is set.
 	 */
 	rc = readw_relaxed_poll_timeout(&gen_comm_base->status, status,
-					status & PCC_STATUS_CMD_COMPLETE, 3, sspcc->deadline_us);
+					status & PCC_STATUS_CMD_COMPLETE,
+					PCC_MIN_POLL_USECS, sspcc->deadline_us);
 	if (rc) {
-		pr_warn("PCC check channel timeout for last command: 0x%x pcc_id=%d rc=%d\n",
-			 sspcc->last_cmd, sspcc->pcc_id, rc);
+		pr_warn("PCC ID: 0x%x: PCC check channel timeout for last command: 0x%x rc=%d\n",
+		        sspcc->pcc_id, sspcc->last_cmd, rc);
 		return rc;
 	}
 
 	if (status & PCC_STATUS_ERROR) {
-		pr_warn("Error in executing last command: 0x%x for pcc_id=%d\n",
-			sspcc->last_cmd, sspcc->pcc_id);
+		pr_warn("PCC ID: 0x%x: Error in executing last command: 0x%x\n",
+			sspcc->pcc_id, sspcc->last_cmd);
+
 		status &= ~PCC_STATUS_ERROR;
 		writew_relaxed(status, &gen_comm_base->status);
 		return -EIO;
 	}
 
-	rc = decode_cap_error(readw_relaxed(&gen_comm_base->set_caps_status));
+	cap_status = readw_relaxed(&gen_comm_base->set_caps_status);
+	switch (cap_status) {
+	case ACPI_RAS2_NOT_VALID:
+	case ACPI_RAS2_NOT_SUPPORTED:
+		rc = -EPERM;
+		break;
+	case ACPI_RAS2_BUSY:
+		rc = -EBUSY;
+		break;
+	case ACPI_RAS2_FAILED:
+	case ACPI_RAS2_ABORTED:
+	case ACPI_RAS2_INVALID_DATA:
+		rc = -EINVAL;
+		break;
+	default:
+		rc = 0;
+	}
 
 	writew_relaxed(0x0, &gen_comm_base->set_caps_status);
 
@@ -128,15 +133,18 @@ static int check_pcc_chan(struct ras2_sspcc *sspcc)
  */
 int ras2_send_pcc_cmd(struct ras2_mem_ctx *ras2_ctx, u16 cmd)
 {
-	struct ras2_sspcc *sspcc = ras2_ctx->sspcc;
-	struct acpi_ras2_shmem __iomem *gen_comm_base = sspcc->comm_addr;
+	struct acpi_ras2_shmem __iomem *gen_comm_base;
 	struct mbox_chan *pcc_channel;
+	struct ras2_sspcc *sspcc;
 	unsigned int time_delta;
 	int rc;
 
 	if (!ras2_ctx)
 		return -EINVAL;
 
+	sspcc = ras2_ctx->sspcc;
+	gen_comm_base = sspcc->comm_addr;
+
 	rc = check_pcc_chan(sspcc);
 	if (rc < 0)
 		return rc;
@@ -144,9 +152,9 @@ int ras2_send_pcc_cmd(struct ras2_mem_ctx *ras2_ctx, u16 cmd)
 	pcc_channel = sspcc->pcc_chan->mchan;
 
 	/*
-	 * Handle the Minimum Request Turnaround Time (MRTT).
-	 * "The minimum amount of time that OSPM must wait after the completion
-	 * of a command before issuing the next command, in microseconds."
+	 * Handle the Minimum Request Turnaround Time (MRTT): the minimum
+	 * amount of time that OSPM must wait after the completion of
+	 * a command before issuing the next command, in microseconds.
 	 */
 	if (sspcc->pcc_mrtt) {
 		time_delta = ktime_us_delta(ktime_get(), sspcc->last_cmd_cmpl_time);
@@ -155,24 +163,26 @@ int ras2_send_pcc_cmd(struct ras2_mem_ctx *ras2_ctx, u16 cmd)
 	}
 
 	/*
-	 * Handle the non-zero Maximum Periodic Access Rate (MPAR).
-	 * "The maximum number of periodic requests that the subspace channel can
-	 * support, reported in commands per minute. 0 indicates no limitation."
+	 * Handle the non-zero Maximum Periodic Access Rate (MPAR): the
+	 * maximum number of periodic requests that the subspace channel can
+	 * support, reported in commands per minute. 0 indicates no
+	 * limitation.
 	 *
-	 * This parameter should be ideally zero or large enough so that it can
-	 * handle maximum number of requests that all the cores in the system can
-	 * collectively generate. If it is not, follow the spec and just not
-	 * send the request to the platform after hitting the MPAR limit in
-	 * any 60s window.
+	 * This parameter should be ideally zero or large enough so that it
+	 * can handle maximum number of requests that all the cores in the
+	 * system can collectively generate. If it is not, follow the spec and
+	 * just not send the request to the platform after hitting the MPAR
+	 * limit in any 60s window.
 	 */
 	if (sspcc->pcc_mpar) {
 		if (!sspcc->mpar_count) {
 			time_delta = ktime_ms_delta(ktime_get(), sspcc->last_mpar_reset);
 			if (time_delta < 60 * MSEC_PER_SEC) {
 				dev_dbg(ras2_ctx->dev,
-					"PCC command: 0x%x not sent due to MPAR limit", cmd);
+					"PCC command 0x%x not sent due to MPAR limit", cmd);
 				return -EIO;
 			}
+
 			sspcc->last_mpar_reset = ktime_get();
 			sspcc->mpar_count = sspcc->pcc_mpar;
 		}
@@ -187,22 +197,24 @@ int ras2_send_pcc_cmd(struct ras2_mem_ctx *ras2_ctx, u16 cmd)
 
 	/* Ring doorbell */
 	rc = mbox_send_message(pcc_channel, &cmd);
+
 	/*
-	 * mbox_send_message() return non-negative integer for successful submission
-	 * and negative value on failure.
+	 * mbox_send_message() returns a non-negative integer for successful submission
+	 * and a negative value on failure.
 	 */
-	rc = rc < 0 ? rc : 0;
 	if (rc < 0) {
 		dev_warn(ras2_ctx->dev,
 			 "Error sending PCC mbox message command: 0x%x, rc:%d\n", cmd, rc);
 		return rc;
+	} else {
+		rc = 0;
 	}
 
 	sspcc->last_cmd = cmd;
 
 	/*
 	 * If Minimum Request Turnaround Time is non-zero, need to record the
-	 * completion time of both READ and WRITE command for proper handling
+	 * completion time of both READ and WRITE commands for proper handling
 	 * of MRTT, so need to check for pcc_mrtt in addition to PCC_CMD_EXEC_RAS2.
 	 */
 	if (cmd == PCC_CMD_EXEC_RAS2 || sspcc->pcc_mrtt) {
@@ -222,7 +234,7 @@ int ras2_send_pcc_cmd(struct ras2_mem_ctx *ras2_ctx, u16 cmd)
 
 	return rc;
 }
-EXPORT_SYMBOL_GPL(ras2_send_pcc_cmd);
+EXPORT_SYMBOL_FOR_MODULES(ras2_send_pcc_cmd, "acpi_ras2");
 
 static int register_pcc_channel(struct ras2_mem_ctx *ras2_ctx, int pcc_id)
 {
@@ -283,18 +295,20 @@ static struct ras2_mem_ctx *add_aux_device(char *name, int channel, u32 pxm_inst
 {
 	struct ras2_mem_ctx *ras2_ctx;
 	struct ras2_sspcc *sspcc;
+	u32 comp_nid;
 	int id, rc;
 
+	comp_nid = pxm_to_node(pxm_inst);
+	if (comp_nid == NUMA_NO_NODE) {
+		pr_debug("Invalid NUMA node, channel=%d pxm_inst=%d\n", channel, pxm_inst);
+		return ERR_PTR(-EINVAL);
+	}
+
 	ras2_ctx = kzalloc(sizeof(*ras2_ctx), GFP_KERNEL);
 	if (!ras2_ctx)
 		return ERR_PTR(-ENOMEM);
 
-	ras2_ctx->sys_comp_nid = pxm_to_node(pxm_inst);
-	if (ras2_ctx->sys_comp_nid == NUMA_NO_NODE) {
-		pr_debug("Invalid NUMA node, channel=%d pxm_inst=%d\n", channel, pxm_inst);
-		rc = -EINVAL;
-		goto ctx_free;
-	}
+	ras2_ctx->sys_comp_nid = comp_nid;
 
 	rc = register_pcc_channel(ras2_ctx, channel);
 	if (rc < 0) {
@@ -321,7 +335,7 @@ static struct ras2_mem_ctx *add_aux_device(char *name, int channel, u32 pxm_inst
 	rc = auxiliary_device_add(&ras2_ctx->adev);
 	if (rc) {
 		auxiliary_device_uninit(&ras2_ctx->adev);
-		return ERR_PTR(rc);
+		goto ida_free;
 	}
 
 	return ras2_ctx;

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

next prev parent reply	other threads:[~2026-01-26 17:16 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-23 17:55 [PATCH v16 0/2] ACPI: Add support for " shiju.jose
2026-01-23 17:55 ` [PATCH v16 1/2] ACPI:RAS2: Add driver for the " shiju.jose
2026-01-26 17:15   ` Borislav Petkov [this message]
2026-01-28 16:40     ` Shiju Jose
2026-01-23 17:55 ` [PATCH v16 2/2] ras: mem: Add ACPI RAS2 memory driver shiju.jose
2026-02-12 18:09 ` [PATCH v16 0/2] ACPI: Add support for ACPI RAS2 feature table Daniel Ferguson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260126171552.GJaXehSJp33nFnpvVd@fat_crate.local \
    --to=bp@alien8.de \
    --cc=Jon.Grimm@amd.com \
    --cc=Yazen.Ghannam@amd.com \
    --cc=akpm@linux-foundation.org \
    --cc=dave.hansen@linux.intel.com \
    --cc=dferguson@amperecomputing.com \
    --cc=duenwen@google.com \
    --cc=erdemaktas@google.com \
    --cc=gthelen@google.com \
    --cc=james.morse@arm.com \
    --cc=jiaqiyan@google.com \
    --cc=jonathan.cameron@huawei.com \
    --cc=jthoughton@google.com \
    --cc=kangkang.shen@futurewei.com \
    --cc=lenb@kernel.org \
    --cc=leo.duran@amd.com \
    --cc=linux-acpi@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-edac@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linuxarm@huawei.com \
    --cc=mchehab@kernel.org \
    --cc=naoya.horiguchi@nec.com \
    --cc=nifan.cxl@gmail.com \
    --cc=pgonda@google.com \
    --cc=prime.zeng@hisilicon.com \
    --cc=rafael@kernel.org \
    --cc=rientjes@google.com \
    --cc=roberto.sassu@huawei.com \
    --cc=rppt@kernel.org \
    --cc=shiju.jose@huawei.com \
    --cc=somasundaram.a@hpe.com \
    --cc=tanxiaofei@huawei.com \
    --cc=tony.luck@intel.com \
    --cc=wanghuiqiang@huawei.com \
    --cc=wbs@os.amperecomputing.com \
    --cc=wschwartz@amperecomputing.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox