diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl index 087f762ebfd534..e76c3600607f8c 100644 --- a/Documentation/ABI/testing/sysfs-bus-cxl +++ b/Documentation/ABI/testing/sysfs-bus-cxl @@ -178,6 +178,21 @@ Description: hardware decoder target list. +What: /sys/bus/cxl/devices/portX/decoders_committed +Date: October, 2023 +KernelVersion: v6.7 +Contact: linux-cxl@vger.kernel.org +Description: + (RO) A memory device is considered active when any of its + decoders are in the "committed" state (See CXL 3.0 8.2.4.19.7 + CXL HDM Decoder n Control Register). Hotplug and destructive + operations like "sanitize" are blocked while device is actively + decoding a Host Physical Address range. Note that this number + may be elevated without any regionX objects active or even + enumerated, as this may be due to decoders established by + platform firwmare or a previous kernel (kexec). + + What: /sys/bus/cxl/devices/decoderX.Y Date: June, 2021 KernelVersion: v5.14 @@ -369,6 +384,21 @@ Description: provided it is currently idle / not bound to a driver. +What: /sys/bus/cxl/devices/decoderX.Y/qos_class +Date: May, 2023 +KernelVersion: v6.5 +Contact: linux-cxl@vger.kernel.org +Description: + (RO) For CXL host platforms that support "QoS Telemmetry" this + root-decoder-only attribute conveys a platform specific cookie + that identifies a QoS performance class for the CXL Window. + This class-id can be compared against a similar "qos_class" + published for each memory-type that an endpoint supports. While + it is not required that endpoints map their local memory-class + to a matching platform class, mismatches are not recommended and + there are platform specific side-effects that may result. + + What: /sys/bus/cxl/devices/regionZ/uuid Date: May, 2022 KernelVersion: v6.0 diff --git a/MAINTAINERS b/MAINTAINERS index 796182c8e1702f..2e93b42f5bd109 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -294,6 +294,8 @@ F: drivers/pnp/pnpacpi/ F: include/acpi/ F: include/linux/acpi.h F: include/linux/fwnode.h +F: include/linux/fw_table.h +F: lib/fw_table.c F: tools/power/acpi/ ACPI APEI @@ -5244,6 +5246,7 @@ L: linux-cxl@vger.kernel.org S: Maintained F: drivers/cxl/ F: include/uapi/linux/cxl_mem.h +F: tools/testing/cxl/ COMPUTE EXPRESS LINK PMU (CPMU) M: Jonathan Cameron diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 554e487cbfab24..f819e760ff195a 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -12,6 +12,7 @@ menuconfig ACPI select PNP select NLS select CRC32 + select FIRMWARE_TABLE default y if X86 help Advanced Configuration and Power Interface (ACPI) support for diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index 8ab0a82b4da41d..c1516337f66828 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -37,18 +37,6 @@ static struct acpi_table_desc initial_tables[ACPI_MAX_TABLES] __initdata; static int acpi_apic_instance __initdata_or_acpilib; -enum acpi_subtable_type { - ACPI_SUBTABLE_COMMON, - ACPI_SUBTABLE_HMAT, - ACPI_SUBTABLE_PRMT, - ACPI_SUBTABLE_CEDT, -}; - -struct acpi_subtable_entry { - union acpi_subtable_headers *hdr; - enum acpi_subtable_type type; -}; - /* * Disable table checksum verification for the early stage due to the size * limitation of the current x86 early mapping implementation. @@ -237,167 +225,6 @@ void acpi_table_print_madt_entry(struct acpi_subtable_header *header) } } -static unsigned long __init_or_acpilib -acpi_get_entry_type(struct acpi_subtable_entry *entry) -{ - switch (entry->type) { - case ACPI_SUBTABLE_COMMON: - return entry->hdr->common.type; - case ACPI_SUBTABLE_HMAT: - return entry->hdr->hmat.type; - case ACPI_SUBTABLE_PRMT: - return 0; - case ACPI_SUBTABLE_CEDT: - return entry->hdr->cedt.type; - } - return 0; -} - -static unsigned long __init_or_acpilib -acpi_get_entry_length(struct acpi_subtable_entry *entry) -{ - switch (entry->type) { - case ACPI_SUBTABLE_COMMON: - return entry->hdr->common.length; - case ACPI_SUBTABLE_HMAT: - return entry->hdr->hmat.length; - case ACPI_SUBTABLE_PRMT: - return entry->hdr->prmt.length; - case ACPI_SUBTABLE_CEDT: - return entry->hdr->cedt.length; - } - return 0; -} - -static unsigned long __init_or_acpilib -acpi_get_subtable_header_length(struct acpi_subtable_entry *entry) -{ - switch (entry->type) { - case ACPI_SUBTABLE_COMMON: - return sizeof(entry->hdr->common); - case ACPI_SUBTABLE_HMAT: - return sizeof(entry->hdr->hmat); - case ACPI_SUBTABLE_PRMT: - return sizeof(entry->hdr->prmt); - case ACPI_SUBTABLE_CEDT: - return sizeof(entry->hdr->cedt); - } - return 0; -} - -static enum acpi_subtable_type __init_or_acpilib -acpi_get_subtable_type(char *id) -{ - if (strncmp(id, ACPI_SIG_HMAT, 4) == 0) - return ACPI_SUBTABLE_HMAT; - if (strncmp(id, ACPI_SIG_PRMT, 4) == 0) - return ACPI_SUBTABLE_PRMT; - if (strncmp(id, ACPI_SIG_CEDT, 4) == 0) - return ACPI_SUBTABLE_CEDT; - return ACPI_SUBTABLE_COMMON; -} - -static __init_or_acpilib bool has_handler(struct acpi_subtable_proc *proc) -{ - return proc->handler || proc->handler_arg; -} - -static __init_or_acpilib int call_handler(struct acpi_subtable_proc *proc, - union acpi_subtable_headers *hdr, - unsigned long end) -{ - if (proc->handler) - return proc->handler(hdr, end); - if (proc->handler_arg) - return proc->handler_arg(hdr, proc->arg, end); - return -EINVAL; -} - -/** - * acpi_parse_entries_array - for each proc_num find a suitable subtable - * - * @id: table id (for debugging purposes) - * @table_size: size of the root table - * @table_header: where does the table start? - * @proc: array of acpi_subtable_proc struct containing entry id - * and associated handler with it - * @proc_num: how big proc is? - * @max_entries: how many entries can we process? - * - * For each proc_num find a subtable with proc->id and run proc->handler - * on it. Assumption is that there's only single handler for particular - * entry id. - * - * The table_size is not the size of the complete ACPI table (the length - * field in the header struct), but only the size of the root table; i.e., - * the offset from the very first byte of the complete ACPI table, to the - * first byte of the very first subtable. - * - * On success returns sum of all matching entries for all proc handlers. - * Otherwise, -ENODEV or -EINVAL is returned. - */ -static int __init_or_acpilib acpi_parse_entries_array( - char *id, unsigned long table_size, - struct acpi_table_header *table_header, struct acpi_subtable_proc *proc, - int proc_num, unsigned int max_entries) -{ - struct acpi_subtable_entry entry; - unsigned long table_end, subtable_len, entry_len; - int count = 0; - int errs = 0; - int i; - - table_end = (unsigned long)table_header + table_header->length; - - /* Parse all entries looking for a match. */ - - entry.type = acpi_get_subtable_type(id); - entry.hdr = (union acpi_subtable_headers *) - ((unsigned long)table_header + table_size); - subtable_len = acpi_get_subtable_header_length(&entry); - - while (((unsigned long)entry.hdr) + subtable_len < table_end) { - if (max_entries && count >= max_entries) - break; - - for (i = 0; i < proc_num; i++) { - if (acpi_get_entry_type(&entry) != proc[i].id) - continue; - if (!has_handler(&proc[i]) || - (!errs && - call_handler(&proc[i], entry.hdr, table_end))) { - errs++; - continue; - } - - proc[i].count++; - break; - } - if (i != proc_num) - count++; - - /* - * If entry->length is 0, break from this loop to avoid - * infinite loop. - */ - entry_len = acpi_get_entry_length(&entry); - if (entry_len == 0) { - pr_err("[%4.4s:0x%02x] Invalid zero length\n", id, proc->id); - return -EINVAL; - } - - entry.hdr = (union acpi_subtable_headers *) - ((unsigned long)entry.hdr + entry_len); - } - - if (max_entries && count > max_entries) { - pr_warn("[%4.4s:0x%02x] found the maximum %i entries\n", - id, proc->id, count); - } - - return errs ? -EINVAL : count; -} - int __init_or_acpilib acpi_table_parse_entries_array( char *id, unsigned long table_size, struct acpi_subtable_proc *proc, int proc_num, unsigned int max_entries) diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 40d055560e52fd..2034eb4ce83fb7 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -289,6 +289,9 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, } } } + + cxlrd->qos_class = cfmws->qtg_id; + rc = cxl_decoder_add(cxld, target_map); err_xormap: if (rc) diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 45e7e044cf4a04..86d7ba23235e3b 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -73,8 +73,10 @@ struct cxl_rcrb_info; resource_size_t __rcrb_to_component(struct device *dev, struct cxl_rcrb_info *ri, enum cxl_rcrb which); +u16 cxl_rcrb_to_aer(struct device *dev, resource_size_t rcrb); extern struct rw_semaphore cxl_dpa_rwsem; +extern struct rw_semaphore cxl_region_rwsem; int cxl_memdev_init(void); void cxl_memdev_exit(void); diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 4449b34a80cc9b..1cc9be85ba4cd1 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -81,26 +81,6 @@ static void parse_hdm_decoder_caps(struct cxl_hdm *cxlhdm) cxlhdm->interleave_mask |= GENMASK(14, 12); } -static int map_hdm_decoder_regs(struct cxl_port *port, void __iomem *crb, - struct cxl_component_regs *regs) -{ - struct cxl_register_map map = { - .dev = &port->dev, - .resource = port->component_reg_phys, - .base = crb, - .max_size = CXL_COMPONENT_REG_BLOCK_SIZE, - }; - - cxl_probe_component_regs(&port->dev, crb, &map.component_map); - if (!map.component_map.hdm_decoder.valid) { - dev_dbg(&port->dev, "HDM decoder registers not implemented\n"); - /* unique error code to indicate no HDM decoder capability */ - return -ENODEV; - } - - return cxl_map_component_regs(&map, regs, BIT(CXL_CM_CAP_CAP_ID_HDM)); -} - static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info) { struct cxl_hdm *cxlhdm; @@ -153,9 +133,9 @@ static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info) struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port, struct cxl_endpoint_dvsec_info *info) { + struct cxl_register_map *reg_map = &port->reg_map; struct device *dev = &port->dev; struct cxl_hdm *cxlhdm; - void __iomem *crb; int rc; cxlhdm = devm_kzalloc(dev, sizeof(*cxlhdm), GFP_KERNEL); @@ -164,19 +144,29 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port, cxlhdm->port = port; dev_set_drvdata(dev, cxlhdm); - crb = ioremap(port->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE); - if (!crb && info && info->mem_enabled) { + /* Memory devices can configure device HDM using DVSEC range regs. */ + if (reg_map->resource == CXL_RESOURCE_NONE) { + if (!info || !info->mem_enabled) { + dev_err(dev, "No component registers mapped\n"); + return ERR_PTR(-ENXIO); + } + cxlhdm->decoder_count = info->ranges; return cxlhdm; - } else if (!crb) { - dev_err(dev, "No component registers mapped\n"); - return ERR_PTR(-ENXIO); } - rc = map_hdm_decoder_regs(port, crb, &cxlhdm->regs); - iounmap(crb); - if (rc) + if (!reg_map->component_map.hdm_decoder.valid) { + dev_dbg(&port->dev, "HDM decoder registers not implemented\n"); + /* unique error code to indicate no HDM decoder capability */ + return ERR_PTR(-ENODEV); + } + + rc = cxl_map_component_regs(reg_map, &cxlhdm->regs, + BIT(CXL_CM_CAP_CAP_ID_HDM)); + if (rc) { + dev_err(dev, "Failed to map HDM capability.\n"); return ERR_PTR(rc); + } parse_hdm_decoder_caps(cxlhdm); if (cxlhdm->decoder_count == 0) { @@ -575,17 +565,11 @@ static void cxld_set_type(struct cxl_decoder *cxld, u32 *ctrl) CXL_HDM_DECODER0_CTRL_HOSTONLY); } -static int cxlsd_set_targets(struct cxl_switch_decoder *cxlsd, u64 *tgt) +static void cxlsd_set_targets(struct cxl_switch_decoder *cxlsd, u64 *tgt) { struct cxl_dport **t = &cxlsd->target[0]; int ways = cxlsd->cxld.interleave_ways; - if (dev_WARN_ONCE(&cxlsd->cxld.dev, - ways > 8 || ways > cxlsd->nr_targets, - "ways: %d overflows targets: %d\n", ways, - cxlsd->nr_targets)) - return -ENXIO; - *tgt = FIELD_PREP(GENMASK(7, 0), t[0]->port_id); if (ways > 1) *tgt |= FIELD_PREP(GENMASK(15, 8), t[1]->port_id); @@ -601,8 +585,6 @@ static int cxlsd_set_targets(struct cxl_switch_decoder *cxlsd, u64 *tgt) *tgt |= FIELD_PREP(GENMASK_ULL(55, 48), t[6]->port_id); if (ways > 7) *tgt |= FIELD_PREP(GENMASK_ULL(63, 56), t[7]->port_id); - - return 0; } /* @@ -643,13 +625,33 @@ static int cxl_decoder_commit(struct cxl_decoder *cxld) if (cxld->flags & CXL_DECODER_F_ENABLE) return 0; - if (port->commit_end + 1 != id) { + if (cxl_num_decoders_committed(port) != id) { dev_dbg(&port->dev, "%s: out of order commit, expected decoder%d.%d\n", - dev_name(&cxld->dev), port->id, port->commit_end + 1); + dev_name(&cxld->dev), port->id, + cxl_num_decoders_committed(port)); return -EBUSY; } + /* + * For endpoint decoders hosted on CXL memory devices that + * support the sanitize operation, make sure sanitize is not in-flight. + */ + if (is_endpoint_decoder(&cxld->dev)) { + struct cxl_endpoint_decoder *cxled = + to_cxl_endpoint_decoder(&cxld->dev); + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_memdev_state *mds = + to_cxl_memdev_state(cxlmd->cxlds); + + if (mds && mds->security.sanitize_active) { + dev_dbg(&cxlmd->dev, + "attempted to commit %s during sanitize\n", + dev_name(&cxld->dev)); + return -EBUSY; + } + } + down_read(&cxl_dpa_rwsem); /* common decoder settings */ ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(cxld->id)); @@ -670,13 +672,7 @@ static int cxl_decoder_commit(struct cxl_decoder *cxld) void __iomem *tl_lo = hdm + CXL_HDM_DECODER0_TL_LOW(id); u64 targets; - rc = cxlsd_set_targets(cxlsd, &targets); - if (rc) { - dev_dbg(&port->dev, "%s: target configuration error\n", - dev_name(&cxld->dev)); - goto err; - } - + cxlsd_set_targets(cxlsd, &targets); writel(upper_32_bits(targets), tl_hi); writel(lower_32_bits(targets), tl_lo); } else { @@ -694,7 +690,6 @@ static int cxl_decoder_commit(struct cxl_decoder *cxld) port->commit_end++; rc = cxld_await_commit(hdm, cxld->id); -err: if (rc) { dev_dbg(&port->dev, "%s: error %d committing decoder\n", dev_name(&cxld->dev), rc); @@ -844,7 +839,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, cxld->target_type = CXL_DECODER_HOSTONLYMEM; else cxld->target_type = CXL_DECODER_DEVMEM; - if (cxld->id != port->commit_end + 1) { + if (cxld->id != cxl_num_decoders_committed(port)) { dev_warn(&port->dev, "decoder%d.%d: Committed out of order\n", port->id, cxld->id); diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 4df4f614f490ef..36270dcfb42ef2 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -1125,20 +1125,7 @@ int cxl_dev_state_identify(struct cxl_memdev_state *mds) } EXPORT_SYMBOL_NS_GPL(cxl_dev_state_identify, CXL); -/** - * cxl_mem_sanitize() - Send a sanitization command to the device. - * @mds: The device data for the operation - * @cmd: The specific sanitization command opcode - * - * Return: 0 if the command was executed successfully, regardless of - * whether or not the actual security operation is done in the background, - * such as for the Sanitize case. - * Error return values can be the result of the mailbox command, -EINVAL - * when security requirements are not met or invalid contexts. - * - * See CXL 3.0 @8.2.9.8.5.1 Sanitize and @8.2.9.8.5.2 Secure Erase. - */ -int cxl_mem_sanitize(struct cxl_memdev_state *mds, u16 cmd) +static int __cxl_mem_sanitize(struct cxl_memdev_state *mds, u16 cmd) { int rc; u32 sec_out = 0; @@ -1183,7 +1170,45 @@ int cxl_mem_sanitize(struct cxl_memdev_state *mds, u16 cmd) return 0; } -EXPORT_SYMBOL_NS_GPL(cxl_mem_sanitize, CXL); + + +/** + * cxl_mem_sanitize() - Send a sanitization command to the device. + * @cxlmd: The device for the operation + * @cmd: The specific sanitization command opcode + * + * Return: 0 if the command was executed successfully, regardless of + * whether or not the actual security operation is done in the background, + * such as for the Sanitize case. + * Error return values can be the result of the mailbox command, -EINVAL + * when security requirements are not met or invalid contexts, or -EBUSY + * if the sanitize operation is already in flight. + * + * See CXL 3.0 @8.2.9.8.5.1 Sanitize and @8.2.9.8.5.2 Secure Erase. + */ +int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 cmd) +{ + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + struct cxl_port *endpoint; + int rc; + + /* synchronize with cxl_mem_probe() and decoder write operations */ + device_lock(&cxlmd->dev); + endpoint = cxlmd->endpoint; + down_read(&cxl_region_rwsem); + /* + * Require an endpoint to be safe otherwise the driver can not + * be sure that the device is unmapped. + */ + if (endpoint && cxl_num_decoders_committed(endpoint) == 0) + rc = __cxl_mem_sanitize(mds, cmd); + else + rc = -EBUSY; + up_read(&cxl_region_rwsem); + device_unlock(&cxlmd->dev); + + return rc; +} static int add_dpa_res(struct device *dev, struct resource *parent, struct resource *res, resource_size_t start, @@ -1224,8 +1249,7 @@ int cxl_mem_create_range_info(struct cxl_memdev_state *mds) return 0; } - cxlds->dpa_res = - (struct resource)DEFINE_RES_MEM(0, mds->total_bytes); + cxlds->dpa_res = DEFINE_RES_MEM(0, mds->total_bytes); if (mds->partition_align_bytes == 0) { rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->ram_res, 0, @@ -1377,6 +1401,8 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev) mutex_init(&mds->mbox_mutex); mutex_init(&mds->event.log_lock); mds->cxlds.dev = dev; + mds->cxlds.reg_map.host = dev; + mds->cxlds.reg_map.resource = CXL_RESOURCE_NONE; mds->cxlds.type = CXL_DEVTYPE_CLASSMEM; return mds; diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index 14b547c07f5476..fc5c2b414793bb 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -125,13 +125,16 @@ static ssize_t security_state_show(struct device *dev, struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); - u64 reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET); - u32 pct = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_PCT_MASK, reg); - u16 cmd = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_OPCODE_MASK, reg); unsigned long state = mds->security.state; + int rc = 0; - if (cmd == CXL_MBOX_OP_SANITIZE && pct != 100) - return sysfs_emit(buf, "sanitize\n"); + /* sync with latest submission state */ + mutex_lock(&mds->mbox_mutex); + if (mds->security.sanitize_active) + rc = sysfs_emit(buf, "sanitize\n"); + mutex_unlock(&mds->mbox_mutex); + if (rc) + return rc; if (!(state & CXL_PMEM_SEC_STATE_USER_PASS_SET)) return sysfs_emit(buf, "disabled\n"); @@ -152,24 +155,17 @@ static ssize_t security_sanitize_store(struct device *dev, const char *buf, size_t len) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); - struct cxl_port *port = cxlmd->endpoint; bool sanitize; ssize_t rc; if (kstrtobool(buf, &sanitize) || !sanitize) return -EINVAL; - if (!port || !is_cxl_endpoint(port)) - return -EINVAL; - - /* ensure no regions are mapped to this memdev */ - if (port->commit_end != -1) - return -EBUSY; - - rc = cxl_mem_sanitize(mds, CXL_MBOX_OP_SANITIZE); + rc = cxl_mem_sanitize(cxlmd, CXL_MBOX_OP_SANITIZE); + if (rc) + return rc; - return rc ? rc : len; + return len; } static struct device_attribute dev_attr_security_sanitize = __ATTR(sanitize, 0200, NULL, security_sanitize_store); @@ -179,24 +175,17 @@ static ssize_t security_erase_store(struct device *dev, const char *buf, size_t len) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); - struct cxl_port *port = cxlmd->endpoint; ssize_t rc; bool erase; if (kstrtobool(buf, &erase) || !erase) return -EINVAL; - if (!port || !is_cxl_endpoint(port)) - return -EINVAL; - - /* ensure no regions are mapped to this memdev */ - if (port->commit_end != -1) - return -EBUSY; - - rc = cxl_mem_sanitize(mds, CXL_MBOX_OP_SECURE_ERASE); + rc = cxl_mem_sanitize(cxlmd, CXL_MBOX_OP_SECURE_ERASE); + if (rc) + return rc; - return rc ? rc : len; + return len; } static struct device_attribute dev_attr_security_erase = __ATTR(erase, 0200, NULL, security_erase_store); @@ -242,7 +231,7 @@ int cxl_trigger_poison_list(struct cxl_memdev *cxlmd) if (rc) return rc; - if (port->commit_end == -1) { + if (cxl_num_decoders_committed(port) == 0) { /* No regions mapped to this memdev */ rc = cxl_get_poison_by_memdev(cxlmd); } else { @@ -293,7 +282,7 @@ static struct cxl_region *cxl_dpa_to_region(struct cxl_memdev *cxlmd, u64 dpa) .dpa = dpa, }; port = cxlmd->endpoint; - if (port && is_cxl_endpoint(port) && port->commit_end != -1) + if (port && is_cxl_endpoint(port) && cxl_num_decoders_committed(port)) device_for_each_child(&port->dev, &ctx, __cxl_dpa_to_region); return ctx.cxlr; @@ -556,21 +545,11 @@ void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds, } EXPORT_SYMBOL_NS_GPL(clear_exclusive_cxl_commands, CXL); -static void cxl_memdev_security_shutdown(struct device *dev) -{ - struct cxl_memdev *cxlmd = to_cxl_memdev(dev); - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); - - if (mds->security.poll) - cancel_delayed_work_sync(&mds->security.poll_dwork); -} - static void cxl_memdev_shutdown(struct device *dev) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); down_write(&cxl_memdev_rwsem); - cxl_memdev_security_shutdown(dev); cxlmd->cxlds = NULL; up_write(&cxl_memdev_rwsem); } @@ -580,8 +559,8 @@ static void cxl_memdev_unregister(void *_cxlmd) struct cxl_memdev *cxlmd = _cxlmd; struct device *dev = &cxlmd->dev; - cxl_memdev_shutdown(dev); cdev_device_del(&cxlmd->cdev, dev); + cxl_memdev_shutdown(dev); put_device(dev); } @@ -961,17 +940,16 @@ static const struct fw_upload_ops cxl_memdev_fw_ops = { .cleanup = cxl_fw_cleanup, }; -static void devm_cxl_remove_fw_upload(void *fwl) +static void cxl_remove_fw_upload(void *fwl) { firmware_upload_unregister(fwl); } -int cxl_memdev_setup_fw_upload(struct cxl_memdev_state *mds) +int devm_cxl_setup_fw_upload(struct device *host, struct cxl_memdev_state *mds) { struct cxl_dev_state *cxlds = &mds->cxlds; struct device *dev = &cxlds->cxlmd->dev; struct fw_upload *fwl; - int rc; if (!test_bit(CXL_MEM_COMMAND_ID_GET_FW_INFO, mds->enabled_cmds)) return 0; @@ -979,19 +957,10 @@ int cxl_memdev_setup_fw_upload(struct cxl_memdev_state *mds) fwl = firmware_upload_register(THIS_MODULE, dev, dev_name(dev), &cxl_memdev_fw_ops, mds); if (IS_ERR(fwl)) - return dev_err_probe(dev, PTR_ERR(fwl), - "Failed to register firmware loader\n"); - - rc = devm_add_action_or_reset(cxlds->dev, devm_cxl_remove_fw_upload, - fwl); - if (rc) - dev_err(dev, - "Failed to add firmware loader remove action: %d\n", - rc); - - return rc; + return PTR_ERR(fwl); + return devm_add_action_or_reset(host, cxl_remove_fw_upload, fwl); } -EXPORT_SYMBOL_NS_GPL(cxl_memdev_setup_fw_upload, CXL); +EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_fw_upload, CXL); static const struct file_operations cxl_memdev_fops = { .owner = THIS_MODULE, @@ -1002,36 +971,8 @@ static const struct file_operations cxl_memdev_fops = { .llseek = noop_llseek, }; -static void put_sanitize(void *data) -{ - struct cxl_memdev_state *mds = data; - - sysfs_put(mds->security.sanitize_node); -} - -static int cxl_memdev_security_init(struct cxl_memdev *cxlmd) -{ - struct cxl_dev_state *cxlds = cxlmd->cxlds; - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); - struct device *dev = &cxlmd->dev; - struct kernfs_node *sec; - - sec = sysfs_get_dirent(dev->kobj.sd, "security"); - if (!sec) { - dev_err(dev, "sysfs_get_dirent 'security' failed\n"); - return -ENODEV; - } - mds->security.sanitize_node = sysfs_get_dirent(sec, "state"); - sysfs_put(sec); - if (!mds->security.sanitize_node) { - dev_err(dev, "sysfs_get_dirent 'state' failed\n"); - return -ENODEV; - } - - return devm_add_action_or_reset(cxlds->dev, put_sanitize, mds); - } - -struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds) +struct cxl_memdev *devm_cxl_add_memdev(struct device *host, + struct cxl_dev_state *cxlds) { struct cxl_memdev *cxlmd; struct device *dev; @@ -1059,11 +1000,7 @@ struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds) if (rc) goto err; - rc = cxl_memdev_security_init(cxlmd); - if (rc) - goto err; - - rc = devm_add_action_or_reset(cxlds->dev, cxl_memdev_unregister, cxlmd); + rc = devm_add_action_or_reset(host, cxl_memdev_unregister, cxlmd); if (rc) return ERR_PTR(rc); return cxlmd; @@ -1079,6 +1016,50 @@ struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds) } EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, CXL); +static void sanitize_teardown_notifier(void *data) +{ + struct cxl_memdev_state *mds = data; + struct kernfs_node *state; + + /* + * Prevent new irq triggered invocations of the workqueue and + * flush inflight invocations. + */ + mutex_lock(&mds->mbox_mutex); + state = mds->security.sanitize_node; + mds->security.sanitize_node = NULL; + mutex_unlock(&mds->mbox_mutex); + + cancel_delayed_work_sync(&mds->security.poll_dwork); + sysfs_put(state); +} + +int devm_cxl_sanitize_setup_notifier(struct device *host, + struct cxl_memdev *cxlmd) +{ + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); + struct kernfs_node *sec; + + if (!test_bit(CXL_SEC_ENABLED_SANITIZE, mds->security.enabled_cmds)) + return 0; + + /* + * Note, the expectation is that @cxlmd would have failed to be + * created if these sysfs_get_dirent calls fail. + */ + sec = sysfs_get_dirent(cxlmd->dev.kobj.sd, "security"); + if (!sec) + return -ENOENT; + mds->security.sanitize_node = sysfs_get_dirent(sec, "state"); + sysfs_put(sec); + if (!mds->security.sanitize_node) + return -ENOENT; + + return devm_add_action_or_reset(host, sanitize_teardown_notifier, mds); +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_sanitize_setup_notifier, CXL); + __init int cxl_memdev_init(void) { dev_t devt; diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index c7a7887ebdcff8..eff20e83d0a64e 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -595,6 +596,16 @@ static int cxl_cdat_read_table(struct device *dev, return 0; } +static unsigned char cdat_checksum(void *buf, size_t size) +{ + unsigned char sum, *data = buf; + size_t i; + + for (sum = 0, i = 0; i < size; i++) + sum += data[i]; + return sum; +} + /** * read_cdat_data - Read the CDAT data on this port * @port: Port to read data from @@ -603,18 +614,30 @@ static int cxl_cdat_read_table(struct device *dev, */ void read_cdat_data(struct cxl_port *port) { - struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev); - struct device *host = cxlmd->dev.parent; + struct device *uport = port->uport_dev; struct device *dev = &port->dev; struct pci_doe_mb *cdat_doe; + struct pci_dev *pdev = NULL; + struct cxl_memdev *cxlmd; size_t cdat_length; void *cdat_table; int rc; - if (!dev_is_pci(host)) + if (is_cxl_memdev(uport)) { + struct device *host; + + cxlmd = to_cxl_memdev(uport); + host = cxlmd->dev.parent; + if (dev_is_pci(host)) + pdev = to_pci_dev(host); + } else if (dev_is_pci(uport)) { + pdev = to_pci_dev(uport); + } + + if (!pdev) return; - cdat_doe = pci_find_doe_mailbox(to_pci_dev(host), - PCI_DVSEC_VENDOR_ID_CXL, + + cdat_doe = pci_find_doe_mailbox(pdev, PCI_DVSEC_VENDOR_ID_CXL, CXL_DOE_PROTOCOL_TABLE_ACCESS); if (!cdat_doe) { dev_dbg(dev, "No CDAT mailbox\n"); @@ -634,44 +657,54 @@ void read_cdat_data(struct cxl_port *port) return; rc = cxl_cdat_read_table(dev, cdat_doe, cdat_table, &cdat_length); - if (rc) { - /* Don't leave table data allocated on error */ - devm_kfree(dev, cdat_table); - dev_err(dev, "CDAT data read error\n"); - return; - } + if (rc) + goto err; + + cdat_table = cdat_table + sizeof(__le32); + if (cdat_checksum(cdat_table, cdat_length)) + goto err; - port->cdat.table = cdat_table + sizeof(__le32); + port->cdat.table = cdat_table; port->cdat.length = cdat_length; + return; + +err: + /* Don't leave table data allocated on error */ + devm_kfree(dev, cdat_table); + dev_err(dev, "Failed to read/validate CDAT.\n"); } EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL); -void cxl_cor_error_detected(struct pci_dev *pdev) +static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, + void __iomem *ras_base) { - struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); void __iomem *addr; u32 status; - if (!cxlds->regs.ras) + if (!ras_base) return; - addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET; + addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET; status = readl(addr); if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); trace_cxl_aer_correctable_error(cxlds->cxlmd, status); } } -EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, CXL); + +static void cxl_handle_endpoint_cor_ras(struct cxl_dev_state *cxlds) +{ + return __cxl_handle_cor_ras(cxlds, cxlds->regs.ras); +} /* CXL spec rev3.0 8.2.4.16.1 */ -static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log) +static void header_log_copy(void __iomem *ras_base, u32 *log) { void __iomem *addr; u32 *log_addr; int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32); - addr = cxlds->regs.ras + CXL_RAS_HEADER_LOG_OFFSET; + addr = ras_base + CXL_RAS_HEADER_LOG_OFFSET; log_addr = log; for (i = 0; i < log_u32_size; i++) { @@ -685,17 +718,18 @@ static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log) * Log the state of the RAS status registers and prepare them to log the * next error status. Return 1 if reset needed. */ -static bool cxl_report_and_clear(struct cxl_dev_state *cxlds) +static bool __cxl_handle_ras(struct cxl_dev_state *cxlds, + void __iomem *ras_base) { u32 hl[CXL_HEADERLOG_SIZE_U32]; void __iomem *addr; u32 status; u32 fe; - if (!cxlds->regs.ras) + if (!ras_base) return false; - addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; + addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; status = readl(addr); if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK)) return false; @@ -703,7 +737,7 @@ static bool cxl_report_and_clear(struct cxl_dev_state *cxlds) /* If multiple errors, log header points to first error from ctrl reg */ if (hweight32(status) > 1) { void __iomem *rcc_addr = - cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET; + ras_base + CXL_RAS_CAP_CONTROL_OFFSET; fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, readl(rcc_addr))); @@ -711,13 +745,201 @@ static bool cxl_report_and_clear(struct cxl_dev_state *cxlds) fe = status; } - header_log_copy(cxlds, hl); + header_log_copy(ras_base, hl); trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, hl); writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); return true; } +static bool cxl_handle_endpoint_ras(struct cxl_dev_state *cxlds) +{ + return __cxl_handle_ras(cxlds, cxlds->regs.ras); +} + +#ifdef CONFIG_PCIEAER_CXL + +static void cxl_dport_map_rch_aer(struct cxl_dport *dport) +{ + struct cxl_rcrb_info *ri = &dport->rcrb; + void __iomem *dport_aer = NULL; + resource_size_t aer_phys; + struct device *host; + + if (dport->rch && ri->aer_cap) { + host = dport->reg_map.host; + aer_phys = ri->aer_cap + ri->base; + dport_aer = devm_cxl_iomap_block(host, aer_phys, + sizeof(struct aer_capability_regs)); + } + + dport->regs.dport_aer = dport_aer; +} + +static void cxl_dport_map_regs(struct cxl_dport *dport) +{ + struct cxl_register_map *map = &dport->reg_map; + struct device *dev = dport->dport_dev; + + if (!map->component_map.ras.valid) + dev_dbg(dev, "RAS registers not found\n"); + else if (cxl_map_component_regs(map, &dport->regs.component, + BIT(CXL_CM_CAP_CAP_ID_RAS))) + dev_dbg(dev, "Failed to map RAS capability.\n"); + + if (dport->rch) + cxl_dport_map_rch_aer(dport); +} + +static void cxl_disable_rch_root_ints(struct cxl_dport *dport) +{ + void __iomem *aer_base = dport->regs.dport_aer; + struct pci_host_bridge *bridge; + u32 aer_cmd_mask, aer_cmd; + + if (!aer_base) + return; + + bridge = to_pci_host_bridge(dport->dport_dev); + + /* + * Disable RCH root port command interrupts. + * CXL 3.0 12.2.1.1 - RCH Downstream Port-detected Errors + * + * This sequence may not be necessary. CXL spec states disabling + * the root cmd register's interrupts is required. But, PCI spec + * shows these are disabled by default on reset. + */ + if (bridge->native_aer) { + aer_cmd_mask = (PCI_ERR_ROOT_CMD_COR_EN | + PCI_ERR_ROOT_CMD_NONFATAL_EN | + PCI_ERR_ROOT_CMD_FATAL_EN); + aer_cmd = readl(aer_base + PCI_ERR_ROOT_COMMAND); + aer_cmd &= ~aer_cmd_mask; + writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND); + } +} + +void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport) +{ + struct device *dport_dev = dport->dport_dev; + struct pci_host_bridge *host_bridge; + + host_bridge = to_pci_host_bridge(dport_dev); + if (host_bridge->native_aer) + dport->rcrb.aer_cap = cxl_rcrb_to_aer(dport_dev, dport->rcrb.base); + + dport->reg_map.host = host; + cxl_dport_map_regs(dport); + + if (dport->rch) + cxl_disable_rch_root_ints(dport); +} +EXPORT_SYMBOL_NS_GPL(cxl_setup_parent_dport, CXL); + +static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds, + struct cxl_dport *dport) +{ + return __cxl_handle_cor_ras(cxlds, dport->regs.ras); +} + +static bool cxl_handle_rdport_ras(struct cxl_dev_state *cxlds, + struct cxl_dport *dport) +{ + return __cxl_handle_ras(cxlds, dport->regs.ras); +} + +/* + * Copy the AER capability registers using 32 bit read accesses. + * This is necessary because RCRB AER capability is MMIO mapped. Clear the + * status after copying. + * + * @aer_base: base address of AER capability block in RCRB + * @aer_regs: destination for copying AER capability + */ +static bool cxl_rch_get_aer_info(void __iomem *aer_base, + struct aer_capability_regs *aer_regs) +{ + int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32); + u32 *aer_regs_buf = (u32 *)aer_regs; + int n; + + if (!aer_base) + return false; + + /* Use readl() to guarantee 32-bit accesses */ + for (n = 0; n < read_cnt; n++) + aer_regs_buf[n] = readl(aer_base + n * sizeof(u32)); + + writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS); + writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS); + + return true; +} + +/* Get AER severity. Return false if there is no error. */ +static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs, + int *severity) +{ + if (aer_regs->uncor_status & ~aer_regs->uncor_mask) { + if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV) + *severity = AER_FATAL; + else + *severity = AER_NONFATAL; + return true; + } + + if (aer_regs->cor_status & ~aer_regs->cor_mask) { + *severity = AER_CORRECTABLE; + return true; + } + + return false; +} + +static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) +{ + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + struct aer_capability_regs aer_regs; + struct cxl_dport *dport; + struct cxl_port *port; + int severity; + + port = cxl_pci_find_port(pdev, &dport); + if (!port) + return; + + put_device(&port->dev); + + if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs)) + return; + + if (!cxl_rch_get_aer_severity(&aer_regs, &severity)) + return; + + pci_print_aer(pdev, severity, &aer_regs); + + if (severity == AER_CORRECTABLE) + cxl_handle_rdport_cor_ras(cxlds, dport); + else + cxl_handle_rdport_ras(cxlds, dport); +} + +#else +static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { } +#endif + +void cxl_cor_error_detected(struct pci_dev *pdev) +{ + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + + if (cxlds->rcd) + cxl_handle_rdport_errors(cxlds); + + cxl_handle_endpoint_cor_ras(cxlds); +} +EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, CXL); + pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, pci_channel_state_t state) { @@ -726,13 +948,16 @@ pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, struct device *dev = &cxlmd->dev; bool ue; + if (cxlds->rcd) + cxl_handle_rdport_errors(cxlds); + /* * A frozen channel indicates an impending reset which is fatal to * CXL.mem operation, and will likely crash the system. On the off * chance the situation is recoverable dump the status of the RAS * capability registers and bounce the active state of the memdev. */ - ue = cxl_report_and_clear(cxlds); + ue = cxl_handle_endpoint_ras(cxlds); switch (state) { case pci_channel_io_normal: diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 7ca01a834e188c..38441634e4c683 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -28,9 +28,22 @@ * instantiated by the core. */ +/* + * All changes to the interleave configuration occur with this lock held + * for write. + */ +DECLARE_RWSEM(cxl_region_rwsem); + static DEFINE_IDA(cxl_port_ida); static DEFINE_XARRAY(cxl_root_buses); +int cxl_num_decoders_committed(struct cxl_port *port) +{ + lockdep_assert_held(&cxl_region_rwsem); + + return port->commit_end + 1; +} + static ssize_t devtype_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -278,6 +291,15 @@ static ssize_t interleave_ways_show(struct device *dev, static DEVICE_ATTR_RO(interleave_ways); +static ssize_t qos_class_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev); + + return sysfs_emit(buf, "%d\n", cxlrd->qos_class); +} +static DEVICE_ATTR_RO(qos_class); + static struct attribute *cxl_decoder_base_attrs[] = { &dev_attr_start.attr, &dev_attr_size.attr, @@ -297,6 +319,7 @@ static struct attribute *cxl_decoder_root_attrs[] = { &dev_attr_cap_type2.attr, &dev_attr_cap_type3.attr, &dev_attr_target_list.attr, + &dev_attr_qos_class.attr, SET_CXL_REGION_ATTR(create_pmem_region) SET_CXL_REGION_ATTR(create_ram_region) SET_CXL_REGION_ATTR(delete_region) @@ -521,8 +544,33 @@ static void cxl_port_release(struct device *dev) kfree(port); } +static ssize_t decoders_committed_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cxl_port *port = to_cxl_port(dev); + int rc; + + down_read(&cxl_region_rwsem); + rc = sysfs_emit(buf, "%d\n", cxl_num_decoders_committed(port)); + up_read(&cxl_region_rwsem); + + return rc; +} + +static DEVICE_ATTR_RO(decoders_committed); + +static struct attribute *cxl_port_attrs[] = { + &dev_attr_decoders_committed.attr, + NULL, +}; + +static struct attribute_group cxl_port_attribute_group = { + .attrs = cxl_port_attrs, +}; + static const struct attribute_group *cxl_port_attribute_groups[] = { &cxl_base_attribute_group, + &cxl_port_attribute_group, NULL, }; @@ -619,7 +667,6 @@ static int devm_cxl_link_parent_dport(struct device *host, static struct lock_class_key cxl_port_key; static struct cxl_port *cxl_port_alloc(struct device *uport_dev, - resource_size_t component_reg_phys, struct cxl_dport *parent_dport) { struct cxl_port *port; @@ -670,7 +717,6 @@ static struct cxl_port *cxl_port_alloc(struct device *uport_dev, } else dev->parent = uport_dev; - port->component_reg_phys = component_reg_phys; ida_init(&port->decoder_ida); port->hdm_end = -1; port->commit_end = -1; @@ -691,19 +737,21 @@ static struct cxl_port *cxl_port_alloc(struct device *uport_dev, return ERR_PTR(rc); } -static int cxl_setup_comp_regs(struct device *dev, struct cxl_register_map *map, +static int cxl_setup_comp_regs(struct device *host, struct cxl_register_map *map, resource_size_t component_reg_phys) { - if (component_reg_phys == CXL_RESOURCE_NONE) - return 0; - *map = (struct cxl_register_map) { - .dev = dev, - .reg_type = CXL_REGLOC_RBI_COMPONENT, + .host = host, + .reg_type = CXL_REGLOC_RBI_EMPTY, .resource = component_reg_phys, - .max_size = CXL_COMPONENT_REG_BLOCK_SIZE, }; + if (component_reg_phys == CXL_RESOURCE_NONE) + return 0; + + map->reg_type = CXL_REGLOC_RBI_COMPONENT; + map->max_size = CXL_COMPONENT_REG_BLOCK_SIZE; + return cxl_setup_regs(map); } @@ -712,17 +760,27 @@ static int cxl_port_setup_regs(struct cxl_port *port, { if (dev_is_platform(port->uport_dev)) return 0; - return cxl_setup_comp_regs(&port->dev, &port->comp_map, + return cxl_setup_comp_regs(&port->dev, &port->reg_map, component_reg_phys); } -static int cxl_dport_setup_regs(struct cxl_dport *dport, +static int cxl_dport_setup_regs(struct device *host, struct cxl_dport *dport, resource_size_t component_reg_phys) { + int rc; + if (dev_is_platform(dport->dport_dev)) return 0; - return cxl_setup_comp_regs(dport->dport_dev, &dport->comp_map, - component_reg_phys); + + /* + * use @dport->dport_dev for the context for error messages during + * register probing, and fixup @host after the fact, since @host may be + * NULL. + */ + rc = cxl_setup_comp_regs(dport->dport_dev, &dport->reg_map, + component_reg_phys); + dport->reg_map.host = host; + return rc; } static struct cxl_port *__devm_cxl_add_port(struct device *host, @@ -734,21 +792,36 @@ static struct cxl_port *__devm_cxl_add_port(struct device *host, struct device *dev; int rc; - port = cxl_port_alloc(uport_dev, component_reg_phys, parent_dport); + port = cxl_port_alloc(uport_dev, parent_dport); if (IS_ERR(port)) return port; dev = &port->dev; - if (is_cxl_memdev(uport_dev)) + if (is_cxl_memdev(uport_dev)) { + struct cxl_memdev *cxlmd = to_cxl_memdev(uport_dev); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + rc = dev_set_name(dev, "endpoint%d", port->id); - else if (parent_dport) + if (rc) + goto err; + + /* + * The endpoint driver already enumerated the component and RAS + * registers. Reuse that enumeration while prepping them to be + * mapped by the cxl_port driver. + */ + port->reg_map = cxlds->reg_map; + port->reg_map.host = &port->dev; + } else if (parent_dport) { rc = dev_set_name(dev, "port%d", port->id); - else - rc = dev_set_name(dev, "root%d", port->id); - if (rc) - goto err; + if (rc) + goto err; - rc = cxl_port_setup_regs(port, component_reg_phys); + rc = cxl_port_setup_regs(port, component_reg_phys); + if (rc) + goto err; + } else + rc = dev_set_name(dev, "root%d", port->id); if (rc) goto err; @@ -983,7 +1056,16 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, if (!dport) return ERR_PTR(-ENOMEM); - if (rcrb != CXL_RESOURCE_NONE) { + dport->dport_dev = dport_dev; + dport->port_id = port_id; + dport->port = port; + + if (rcrb == CXL_RESOURCE_NONE) { + rc = cxl_dport_setup_regs(&port->dev, dport, + component_reg_phys); + if (rc) + return ERR_PTR(rc); + } else { dport->rcrb.base = rcrb; component_reg_phys = __rcrb_to_component(dport_dev, &dport->rcrb, CXL_RCRB_DOWNSTREAM); @@ -992,6 +1074,14 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, return ERR_PTR(-ENXIO); } + /* + * RCH @dport is not ready to map until associated with its + * memdev + */ + rc = cxl_dport_setup_regs(NULL, dport, component_reg_phys); + if (rc) + return ERR_PTR(rc); + dport->rch = true; } @@ -999,14 +1089,6 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, dev_dbg(dport_dev, "Component Registers found for dport: %pa\n", &component_reg_phys); - dport->dport_dev = dport_dev; - dport->port_id = port_id; - dport->port = port; - - rc = cxl_dport_setup_regs(dport, component_reg_phys); - if (rc) - return ERR_PTR(rc); - cond_cxl_root_lock(port); rc = add_dport(port, dport); cond_cxl_root_unlock(port); @@ -1217,35 +1299,39 @@ static struct device *grandparent(struct device *dev) return NULL; } +static struct device *endpoint_host(struct cxl_port *endpoint) +{ + struct cxl_port *port = to_cxl_port(endpoint->dev.parent); + + if (is_cxl_root(port)) + return port->uport_dev; + return &port->dev; +} + static void delete_endpoint(void *data) { struct cxl_memdev *cxlmd = data; struct cxl_port *endpoint = cxlmd->endpoint; - struct cxl_port *parent_port; - struct device *parent; + struct device *host = endpoint_host(endpoint); - parent_port = cxl_mem_find_port(cxlmd, NULL); - if (!parent_port) - goto out; - parent = &parent_port->dev; - - device_lock(parent); - if (parent->driver && !endpoint->dead) { - devm_release_action(parent, cxl_unlink_parent_dport, endpoint); - devm_release_action(parent, cxl_unlink_uport, endpoint); - devm_release_action(parent, unregister_port, endpoint); + device_lock(host); + if (host->driver && !endpoint->dead) { + devm_release_action(host, cxl_unlink_parent_dport, endpoint); + devm_release_action(host, cxl_unlink_uport, endpoint); + devm_release_action(host, unregister_port, endpoint); } cxlmd->endpoint = NULL; - device_unlock(parent); - put_device(parent); -out: + device_unlock(host); put_device(&endpoint->dev); + put_device(host); } int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint) { + struct device *host = endpoint_host(endpoint); struct device *dev = &cxlmd->dev; + get_device(host); get_device(&endpoint->dev); cxlmd->endpoint = endpoint; cxlmd->depth = endpoint->depth; @@ -1468,7 +1554,11 @@ int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd) struct cxl_dport *dport; struct cxl_port *port; - if (!dport_dev) + /* + * The terminal "grandparent" in PCI is NULL and @platform_bus + * for platform devices + */ + if (!dport_dev || dport_dev == &platform_bus) return 0; uport_dev = dport_dev->parent; @@ -1691,6 +1781,7 @@ struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port, } atomic_set(&cxlrd->region_id, rc); + cxlrd->qos_class = CXL_QOS_CLASS_INVALID; return cxlrd; } EXPORT_SYMBOL_NS_GPL(cxl_root_decoder_alloc, CXL); @@ -2062,3 +2153,4 @@ static void cxl_core_exit(void) subsys_initcall(cxl_core_init); module_exit(cxl_core_exit); MODULE_LICENSE("GPL v2"); +MODULE_IMPORT_NS(CXL); diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 6d63b8798c2992..56e575c79bb491 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -28,12 +28,6 @@ * 3. Decoder targets */ -/* - * All changes to the interleave configuration occur with this lock held - * for write. - */ -static DECLARE_RWSEM(cxl_region_rwsem); - static struct cxl_region *to_cxl_region(struct device *dev); static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, @@ -129,7 +123,7 @@ static int cxl_region_invalidate_memregion(struct cxl_region *cxlr) { if (!cpu_cache_has_invalidate_memregion()) { if (IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST)) { - dev_warn_once( + dev_info_once( &cxlr->dev, "Bypassing cpu_cache_invalidate_memregion() for testing!\n"); return 0; @@ -294,7 +288,7 @@ static ssize_t commit_store(struct device *dev, struct device_attribute *attr, */ rc = cxl_region_invalidate_memregion(cxlr); if (rc) - return rc; + goto out; if (commit) { rc = cxl_region_decode_commit(cxlr); @@ -1133,7 +1127,14 @@ static int cxl_port_setup_targets(struct cxl_port *port, } if (is_cxl_root(parent_port)) { - parent_ig = cxlrd->cxlsd.cxld.interleave_granularity; + /* + * Root decoder IG is always set to value in CFMWS which + * may be different than this region's IG. We can use the + * region's IG here since interleave_granularity_store() + * does not allow interleaved host-bridges with + * root IG != region IG. + */ + parent_ig = p->interleave_granularity; parent_iw = cxlrd->cxlsd.cxld.interleave_ways; /* * For purposes of address bit routing, use power-of-2 math for @@ -1195,6 +1196,14 @@ static int cxl_port_setup_targets(struct cxl_port *port, return rc; } + if (iw > 8 || iw > cxlsd->nr_targets) { + dev_dbg(&cxlr->dev, + "%s:%s:%s: ways: %d overflows targets: %d\n", + dev_name(port->uport_dev), dev_name(&port->dev), + dev_name(&cxld->dev), iw, cxlsd->nr_targets); + return -ENXIO; + } + if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) { if (cxld->interleave_ways != iw || cxld->interleave_granularity != ig || @@ -1480,6 +1489,14 @@ static int cxl_region_attach_auto(struct cxl_region *cxlr, return 0; } +static int cmp_interleave_pos(const void *a, const void *b) +{ + struct cxl_endpoint_decoder *cxled_a = *(typeof(cxled_a) *)a; + struct cxl_endpoint_decoder *cxled_b = *(typeof(cxled_b) *)b; + + return cxled_a->pos - cxled_b->pos; +} + static struct cxl_port *next_port(struct cxl_port *port) { if (!port->parent_dport) @@ -1487,119 +1504,127 @@ static struct cxl_port *next_port(struct cxl_port *port) return port->parent_dport->port; } -static int decoder_match_range(struct device *dev, void *data) +static int match_switch_decoder_by_range(struct device *dev, void *data) { - struct cxl_endpoint_decoder *cxled = data; struct cxl_switch_decoder *cxlsd; + struct range *r1, *r2 = data; if (!is_switch_decoder(dev)) return 0; cxlsd = to_cxl_switch_decoder(dev); - return range_contains(&cxlsd->cxld.hpa_range, &cxled->cxld.hpa_range); -} - -static void find_positions(const struct cxl_switch_decoder *cxlsd, - const struct cxl_port *iter_a, - const struct cxl_port *iter_b, int *a_pos, - int *b_pos) -{ - int i; + r1 = &cxlsd->cxld.hpa_range; - for (i = 0, *a_pos = -1, *b_pos = -1; i < cxlsd->nr_targets; i++) { - if (cxlsd->target[i] == iter_a->parent_dport) - *a_pos = i; - else if (cxlsd->target[i] == iter_b->parent_dport) - *b_pos = i; - if (*a_pos >= 0 && *b_pos >= 0) - break; - } + if (is_root_decoder(dev)) + return range_contains(r1, r2); + return (r1->start == r2->start && r1->end == r2->end); } -static int cmp_decode_pos(const void *a, const void *b) +static int find_pos_and_ways(struct cxl_port *port, struct range *range, + int *pos, int *ways) { - struct cxl_endpoint_decoder *cxled_a = *(typeof(cxled_a) *)a; - struct cxl_endpoint_decoder *cxled_b = *(typeof(cxled_b) *)b; - struct cxl_memdev *cxlmd_a = cxled_to_memdev(cxled_a); - struct cxl_memdev *cxlmd_b = cxled_to_memdev(cxled_b); - struct cxl_port *port_a = cxled_to_port(cxled_a); - struct cxl_port *port_b = cxled_to_port(cxled_b); - struct cxl_port *iter_a, *iter_b, *port = NULL; struct cxl_switch_decoder *cxlsd; + struct cxl_port *parent; struct device *dev; - int a_pos, b_pos; - unsigned int seq; - - /* Exit early if any prior sorting failed */ - if (cxled_a->pos < 0 || cxled_b->pos < 0) - return 0; + int rc = -ENXIO; - /* - * Walk up the hierarchy to find a shared port, find the decoder that - * maps the range, compare the relative position of those dport - * mappings. - */ - for (iter_a = port_a; iter_a; iter_a = next_port(iter_a)) { - struct cxl_port *next_a, *next_b; + parent = next_port(port); + if (!parent) + return rc; - next_a = next_port(iter_a); - if (!next_a) - break; + dev = device_find_child(&parent->dev, range, + match_switch_decoder_by_range); + if (!dev) { + dev_err(port->uport_dev, + "failed to find decoder mapping %#llx-%#llx\n", + range->start, range->end); + return rc; + } + cxlsd = to_cxl_switch_decoder(dev); + *ways = cxlsd->cxld.interleave_ways; - for (iter_b = port_b; iter_b; iter_b = next_port(iter_b)) { - next_b = next_port(iter_b); - if (next_a != next_b) - continue; - port = next_a; + for (int i = 0; i < *ways; i++) { + if (cxlsd->target[i] == port->parent_dport) { + *pos = i; + rc = 0; break; } - - if (port) - break; } + put_device(dev); - if (!port) { - dev_err(cxlmd_a->dev.parent, - "failed to find shared port with %s\n", - dev_name(cxlmd_b->dev.parent)); - goto err; - } + return rc; +} - dev = device_find_child(&port->dev, cxled_a, decoder_match_range); - if (!dev) { - struct range *range = &cxled_a->cxld.hpa_range; +/** + * cxl_calc_interleave_pos() - calculate an endpoint position in a region + * @cxled: endpoint decoder member of given region + * + * The endpoint position is calculated by traversing the topology from + * the endpoint to the root decoder and iteratively applying this + * calculation: + * + * position = position * parent_ways + parent_pos; + * + * ...where @position is inferred from switch and root decoder target lists. + * + * Return: position >= 0 on success + * -ENXIO on failure + */ +static int cxl_calc_interleave_pos(struct cxl_endpoint_decoder *cxled) +{ + struct cxl_port *iter, *port = cxled_to_port(cxled); + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct range *range = &cxled->cxld.hpa_range; + int parent_ways = 0, parent_pos = 0, pos = 0; + int rc; - dev_err(port->uport_dev, - "failed to find decoder that maps %#llx-%#llx\n", - range->start, range->end); - goto err; - } + /* + * Example: the expected interleave order of the 4-way region shown + * below is: mem0, mem2, mem1, mem3 + * + * root_port + * / \ + * host_bridge_0 host_bridge_1 + * | | | | + * mem0 mem1 mem2 mem3 + * + * In the example the calculator will iterate twice. The first iteration + * uses the mem position in the host-bridge and the ways of the host- + * bridge to generate the first, or local, position. The second + * iteration uses the host-bridge position in the root_port and the ways + * of the root_port to refine the position. + * + * A trace of the calculation per endpoint looks like this: + * mem0: pos = 0 * 2 + 0 mem2: pos = 0 * 2 + 0 + * pos = 0 * 2 + 0 pos = 0 * 2 + 1 + * pos: 0 pos: 1 + * + * mem1: pos = 0 * 2 + 1 mem3: pos = 0 * 2 + 1 + * pos = 1 * 2 + 0 pos = 1 * 2 + 1 + * pos: 2 pos = 3 + * + * Note that while this example is simple, the method applies to more + * complex topologies, including those with switches. + */ - cxlsd = to_cxl_switch_decoder(dev); - do { - seq = read_seqbegin(&cxlsd->target_lock); - find_positions(cxlsd, iter_a, iter_b, &a_pos, &b_pos); - } while (read_seqretry(&cxlsd->target_lock, seq)); + /* Iterate from endpoint to root_port refining the position */ + for (iter = port; iter; iter = next_port(iter)) { + if (is_cxl_root(iter)) + break; - put_device(dev); + rc = find_pos_and_ways(iter, range, &parent_pos, &parent_ways); + if (rc) + return rc; - if (a_pos < 0 || b_pos < 0) { - dev_err(port->uport_dev, - "failed to find shared decoder for %s and %s\n", - dev_name(cxlmd_a->dev.parent), - dev_name(cxlmd_b->dev.parent)); - goto err; + pos = pos * parent_ways + parent_pos; } - dev_dbg(port->uport_dev, "%s comes %s %s\n", - dev_name(cxlmd_a->dev.parent), - a_pos - b_pos < 0 ? "before" : "after", - dev_name(cxlmd_b->dev.parent)); + dev_dbg(&cxlmd->dev, + "decoder:%s parent:%s port:%s range:%#llx-%#llx pos:%d\n", + dev_name(&cxled->cxld.dev), dev_name(cxlmd->dev.parent), + dev_name(&port->dev), range->start, range->end, pos); - return a_pos - b_pos; -err: - cxled_a->pos = -1; - return 0; + return pos; } static int cxl_region_sort_targets(struct cxl_region *cxlr) @@ -1607,22 +1632,21 @@ static int cxl_region_sort_targets(struct cxl_region *cxlr) struct cxl_region_params *p = &cxlr->params; int i, rc = 0; - sort(p->targets, p->nr_targets, sizeof(p->targets[0]), cmp_decode_pos, - NULL); - for (i = 0; i < p->nr_targets; i++) { struct cxl_endpoint_decoder *cxled = p->targets[i]; + cxled->pos = cxl_calc_interleave_pos(cxled); /* - * Record that sorting failed, but still continue to restore - * cxled->pos with its ->targets[] position so that follow-on - * code paths can reliably do p->targets[cxled->pos] to - * self-reference their entry. + * Record that sorting failed, but still continue to calc + * cxled->pos so that follow-on code paths can reliably + * do p->targets[cxled->pos] to self-reference their entry. */ if (cxled->pos < 0) rc = -ENXIO; - cxled->pos = i; } + /* Keep the cxlr target list in interleave position order */ + sort(p->targets, p->nr_targets, sizeof(p->targets[0]), + cmp_interleave_pos, NULL); dev_dbg(&cxlr->dev, "region sort %s\n", rc ? "failed" : "successful"); return rc; @@ -1658,6 +1682,12 @@ static int cxl_region_attach(struct cxl_region *cxlr, return -ENXIO; } + if (p->nr_targets >= p->interleave_ways) { + dev_dbg(&cxlr->dev, "region already has %d endpoints\n", + p->nr_targets); + return -EINVAL; + } + ep_port = cxled_to_port(cxled); root_port = cxlrd_to_port(cxlrd); dport = cxl_find_dport_by_dev(root_port, ep_port->host_bridge); @@ -1750,7 +1780,7 @@ static int cxl_region_attach(struct cxl_region *cxlr, if (p->nr_targets == p->interleave_ways) { rc = cxl_region_setup_targets(cxlr); if (rc) - goto err_decrement; + return rc; p->state = CXL_CONFIG_ACTIVE; } @@ -1761,13 +1791,27 @@ static int cxl_region_attach(struct cxl_region *cxlr, .end = p->res->end, }; - return 0; + if (p->nr_targets != p->interleave_ways) + return 0; -err_decrement: - p->nr_targets--; - cxled->pos = -1; - p->targets[pos] = NULL; - return rc; + /* + * Test the auto-discovery position calculator function + * against this successfully created user-defined region. + * A fail message here means that this interleave config + * will fail when presented as CXL_REGION_F_AUTO. + */ + for (int i = 0; i < p->nr_targets; i++) { + struct cxl_endpoint_decoder *cxled = p->targets[i]; + int test_pos; + + test_pos = cxl_calc_interleave_pos(cxled); + dev_dbg(&cxled->cxld.dev, + "Test cxl_calc_interleave_pos(): %s test_pos:%d cxled->pos:%d\n", + (test_pos == cxled->pos) ? "success" : "fail", + test_pos, cxled->pos); + } + + return 0; } static int cxl_region_detach(struct cxl_endpoint_decoder *cxled) @@ -2696,7 +2740,7 @@ static int devm_cxl_add_dax_region(struct cxl_region *cxlr) return rc; } -static int match_decoder_by_range(struct device *dev, void *data) +static int match_root_decoder_by_range(struct device *dev, void *data) { struct range *r1, *r2 = data; struct cxl_root_decoder *cxlrd; @@ -2827,7 +2871,7 @@ int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled) int rc; cxlrd_dev = device_find_child(&root->dev, &cxld->hpa_range, - match_decoder_by_range); + match_root_decoder_by_range); if (!cxlrd_dev) { dev_err(cxlmd->dev.parent, "%s:%s no CXL window for range %#llx:%#llx\n", diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index 6281127b3e9d97..372786f809555f 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -204,7 +204,7 @@ int cxl_map_component_regs(const struct cxl_register_map *map, struct cxl_component_regs *regs, unsigned long map_mask) { - struct device *dev = map->dev; + struct device *host = map->host; struct mapinfo { const struct cxl_reg_map *rmap; void __iomem **addr; @@ -216,16 +216,16 @@ int cxl_map_component_regs(const struct cxl_register_map *map, for (i = 0; i < ARRAY_SIZE(mapinfo); i++) { struct mapinfo *mi = &mapinfo[i]; - resource_size_t phys_addr; + resource_size_t addr; resource_size_t length; if (!mi->rmap->valid) continue; if (!test_bit(mi->rmap->id, &map_mask)) continue; - phys_addr = map->resource + mi->rmap->offset; + addr = map->resource + mi->rmap->offset; length = mi->rmap->size; - *(mi->addr) = devm_cxl_iomap_block(dev, phys_addr, length); + *(mi->addr) = devm_cxl_iomap_block(host, addr, length); if (!*(mi->addr)) return -ENOMEM; } @@ -237,7 +237,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_map_component_regs, CXL); int cxl_map_device_regs(const struct cxl_register_map *map, struct cxl_device_regs *regs) { - struct device *dev = map->dev; + struct device *host = map->host; resource_size_t phys_addr = map->resource; struct mapinfo { const struct cxl_reg_map *rmap; @@ -259,7 +259,7 @@ int cxl_map_device_regs(const struct cxl_register_map *map, addr = phys_addr + mi->rmap->offset; length = mi->rmap->size; - *(mi->addr) = devm_cxl_iomap_block(dev, addr, length); + *(mi->addr) = devm_cxl_iomap_block(host, addr, length); if (!*(mi->addr)) return -ENOMEM; } @@ -309,7 +309,7 @@ int cxl_find_regblock_instance(struct pci_dev *pdev, enum cxl_regloc_type type, int regloc, i; *map = (struct cxl_register_map) { - .dev = &pdev->dev, + .host = &pdev->dev, .resource = CXL_RESOURCE_NONE, }; @@ -386,10 +386,9 @@ int cxl_count_regblock(struct pci_dev *pdev, enum cxl_regloc_type type) } EXPORT_SYMBOL_NS_GPL(cxl_count_regblock, CXL); -int cxl_map_pmu_regs(struct pci_dev *pdev, struct cxl_pmu_regs *regs, - struct cxl_register_map *map) +int cxl_map_pmu_regs(struct cxl_register_map *map, struct cxl_pmu_regs *regs) { - struct device *dev = &pdev->dev; + struct device *dev = map->host; resource_size_t phys_addr; phys_addr = map->resource; @@ -403,15 +402,15 @@ EXPORT_SYMBOL_NS_GPL(cxl_map_pmu_regs, CXL); static int cxl_map_regblock(struct cxl_register_map *map) { - struct device *dev = map->dev; + struct device *host = map->host; map->base = ioremap(map->resource, map->max_size); if (!map->base) { - dev_err(dev, "failed to map registers\n"); + dev_err(host, "failed to map registers\n"); return -ENOMEM; } - dev_dbg(dev, "Mapped CXL Memory Device resource %pa\n", &map->resource); + dev_dbg(host, "Mapped CXL Memory Device resource %pa\n", &map->resource); return 0; } @@ -425,28 +424,28 @@ static int cxl_probe_regs(struct cxl_register_map *map) { struct cxl_component_reg_map *comp_map; struct cxl_device_reg_map *dev_map; - struct device *dev = map->dev; + struct device *host = map->host; void __iomem *base = map->base; switch (map->reg_type) { case CXL_REGLOC_RBI_COMPONENT: comp_map = &map->component_map; - cxl_probe_component_regs(dev, base, comp_map); - dev_dbg(dev, "Set up component registers\n"); + cxl_probe_component_regs(host, base, comp_map); + dev_dbg(host, "Set up component registers\n"); break; case CXL_REGLOC_RBI_MEMDEV: dev_map = &map->device_map; - cxl_probe_device_regs(dev, base, dev_map); + cxl_probe_device_regs(host, base, dev_map); if (!dev_map->status.valid || !dev_map->mbox.valid || !dev_map->memdev.valid) { - dev_err(dev, "registers not found: %s%s%s\n", + dev_err(host, "registers not found: %s%s%s\n", !dev_map->status.valid ? "status " : "", !dev_map->mbox.valid ? "mbox " : "", !dev_map->memdev.valid ? "memdev " : ""); return -ENXIO; } - dev_dbg(dev, "Probing device registers...\n"); + dev_dbg(host, "Probing device registers...\n"); break; default: break; @@ -470,6 +469,42 @@ int cxl_setup_regs(struct cxl_register_map *map) } EXPORT_SYMBOL_NS_GPL(cxl_setup_regs, CXL); +u16 cxl_rcrb_to_aer(struct device *dev, resource_size_t rcrb) +{ + void __iomem *addr; + u16 offset = 0; + u32 cap_hdr; + + if (WARN_ON_ONCE(rcrb == CXL_RESOURCE_NONE)) + return 0; + + if (!request_mem_region(rcrb, SZ_4K, dev_name(dev))) + return 0; + + addr = ioremap(rcrb, SZ_4K); + if (!addr) + goto out; + + cap_hdr = readl(addr + offset); + while (PCI_EXT_CAP_ID(cap_hdr) != PCI_EXT_CAP_ID_ERR) { + offset = PCI_EXT_CAP_NEXT(cap_hdr); + + /* Offset 0 terminates capability list. */ + if (!offset) + break; + cap_hdr = readl(addr + offset); + } + + if (offset) + dev_dbg(dev, "found AER extended capability (0x%x)\n", offset); + + iounmap(addr); +out: + release_mem_region(rcrb, SZ_4K); + + return offset; +} + resource_size_t __rcrb_to_component(struct device *dev, struct cxl_rcrb_info *ri, enum cxl_rcrb which) { diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 76d92561af2949..687043ece1018c 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -221,6 +221,14 @@ struct cxl_regs { struct_group_tagged(cxl_pmu_regs, pmu_regs, void __iomem *pmu; ); + + /* + * RCH downstream port specific RAS register + * @aer: CXL 3.0 8.2.1.1 RCH Downstream Port RCRB + */ + struct_group_tagged(cxl_rch_regs, rch_regs, + void __iomem *dport_aer; + ); }; struct cxl_reg_map { @@ -247,7 +255,7 @@ struct cxl_pmu_reg_map { /** * struct cxl_register_map - DVSEC harvested register block mapping parameters - * @dev: device for devm operations and logging + * @host: device for devm operations and logging * @base: virtual base of the register-block-BAR + @block_offset * @resource: physical resource base of the register block * @max_size: maximum mapping size to perform register search @@ -257,7 +265,7 @@ struct cxl_pmu_reg_map { * @pmu_map: cxl_reg_maps for CXL Performance Monitoring Units */ struct cxl_register_map { - struct device *dev; + struct device *host; void __iomem *base; resource_size_t resource; resource_size_t max_size; @@ -278,8 +286,7 @@ int cxl_map_component_regs(const struct cxl_register_map *map, unsigned long map_mask); int cxl_map_device_regs(const struct cxl_register_map *map, struct cxl_device_regs *regs); -int cxl_map_pmu_regs(struct pci_dev *pdev, struct cxl_pmu_regs *regs, - struct cxl_register_map *map); +int cxl_map_pmu_regs(struct cxl_register_map *map, struct cxl_pmu_regs *regs); enum cxl_regloc_type; int cxl_count_regblock(struct pci_dev *pdev, enum cxl_regloc_type type); @@ -321,6 +328,7 @@ enum cxl_decoder_type { */ #define CXL_DECODER_MAX_INTERLEAVE 16 +#define CXL_QOS_CLASS_INVALID -1 /** * struct cxl_decoder - Common CXL HDM Decoder Attributes @@ -432,6 +440,7 @@ typedef struct cxl_dport *(*cxl_calc_hb_fn)(struct cxl_root_decoder *cxlrd, * @calc_hb: which host bridge covers the n'th position by granularity * @platform_data: platform specific configuration data * @range_lock: sync region autodiscovery by address range + * @qos_class: QoS performance class cookie * @cxlsd: base cxl switch decoder */ struct cxl_root_decoder { @@ -440,6 +449,7 @@ struct cxl_root_decoder { cxl_calc_hb_fn calc_hb; void *platform_data; struct mutex range_lock; + int qos_class; struct cxl_switch_decoder cxlsd; }; @@ -572,11 +582,10 @@ struct cxl_dax_region { * @regions: cxl_region_ref instances, regions mapped by this port * @parent_dport: dport that points to this port in the parent * @decoder_ida: allocator for decoder ids - * @comp_map: component register capability mappings + * @reg_map: component and ras register mapping parameters * @nr_dports: number of entries in @dports * @hdm_end: track last allocated HDM decoder instance for allocation ordering * @commit_end: cursor to track highest committed decoder for commit ordering - * @component_reg_phys: component register capability base address (optional) * @dead: last ep has been removed, force port re-creation * @depth: How deep this port is relative to the root. depth 0 is the root. * @cdat: Cached CDAT data @@ -592,11 +601,10 @@ struct cxl_port { struct xarray regions; struct cxl_dport *parent_dport; struct ida decoder_ida; - struct cxl_register_map comp_map; + struct cxl_register_map reg_map; int nr_dports; int hdm_end; int commit_end; - resource_size_t component_reg_phys; bool dead; unsigned int depth; struct cxl_cdat { @@ -620,19 +628,21 @@ struct cxl_rcrb_info { /** * struct cxl_dport - CXL downstream port * @dport_dev: PCI bridge or firmware device representing the downstream link - * @comp_map: component register capability mappings + * @reg_map: component and ras register mapping parameters * @port_id: unique hardware identifier for dport in decoder target list * @rcrb: Data about the Root Complex Register Block layout * @rch: Indicate whether this dport was enumerated in RCH or VH mode * @port: reference to cxl_port that contains this downstream port + * @regs: Dport parsed register blocks */ struct cxl_dport { struct device *dport_dev; - struct cxl_register_map comp_map; + struct cxl_register_map reg_map; int port_id; struct cxl_rcrb_info rcrb; bool rch; struct cxl_port *port; + struct cxl_regs regs; }; /** @@ -679,6 +689,7 @@ static inline bool is_cxl_root(struct cxl_port *port) return port->uport_dev == port->dev.parent; } +int cxl_num_decoders_committed(struct cxl_port *port); bool is_cxl_port(const struct device *dev); struct cxl_port *to_cxl_port(const struct device *dev); struct pci_bus; @@ -706,6 +717,13 @@ struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port, struct device *dport_dev, int port_id, resource_size_t rcrb); +#ifdef CONFIG_PCIEAER_CXL +void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport); +#else +static inline void cxl_setup_parent_dport(struct device *host, + struct cxl_dport *dport) { } +#endif + struct cxl_decoder *to_cxl_decoder(struct device *dev); struct cxl_root_decoder *to_cxl_root_decoder(struct device *dev); struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev); diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 706f8a6d1ef43c..a2fcbca253f398 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -84,9 +84,12 @@ static inline bool is_cxl_endpoint(struct cxl_port *port) return is_cxl_memdev(port->uport_dev); } -struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds); +struct cxl_memdev *devm_cxl_add_memdev(struct device *host, + struct cxl_dev_state *cxlds); +int devm_cxl_sanitize_setup_notifier(struct device *host, + struct cxl_memdev *cxlmd); struct cxl_memdev_state; -int cxl_memdev_setup_fw_upload(struct cxl_memdev_state *mds); +int devm_cxl_setup_fw_upload(struct device *host, struct cxl_memdev_state *mds); int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, resource_size_t base, resource_size_t len, resource_size_t skipped); @@ -360,16 +363,16 @@ struct cxl_fw_state { * * @state: state of last security operation * @enabled_cmds: All security commands enabled in the CEL - * @poll: polling for sanitization is enabled, device has no mbox irq support * @poll_tmo_secs: polling timeout + * @sanitize_active: sanitize completion pending * @poll_dwork: polling work item * @sanitize_node: sanitation sysfs file to notify */ struct cxl_security_state { unsigned long state; DECLARE_BITMAP(enabled_cmds, CXL_SEC_ENABLED_MAX); - bool poll; int poll_tmo_secs; + bool sanitize_active; struct delayed_work poll_dwork; struct kernfs_node *sanitize_node; }; @@ -397,6 +400,7 @@ enum cxl_devtype { * * @dev: The device associated with this CXL state * @cxlmd: The device representing the CXL.mem capabilities of @dev + * @reg_map: component and ras register mapping parameters * @regs: Parsed register blocks * @cxl_dvsec: Offset to the PCIe device DVSEC * @rcd: operating in RCD mode (CXL 3.0 9.11.8 CXL Devices Attached to an RCH) @@ -404,13 +408,13 @@ enum cxl_devtype { * @dpa_res: Overall DPA resource tree for the device * @pmem_res: Active Persistent memory capacity configuration * @ram_res: Active Volatile memory capacity configuration - * @component_reg_phys: register base of component registers * @serial: PCIe Device Serial Number * @type: Generic Memory Class device or Vendor Specific Memory device */ struct cxl_dev_state { struct device *dev; struct cxl_memdev *cxlmd; + struct cxl_register_map reg_map; struct cxl_regs regs; int cxl_dvsec; bool rcd; @@ -418,7 +422,6 @@ struct cxl_dev_state { struct resource dpa_res; struct resource pmem_res; struct resource ram_res; - resource_size_t component_reg_phys; u64 serial; enum cxl_devtype type; }; @@ -883,7 +886,7 @@ static inline void cxl_mem_active_dec(void) } #endif -int cxl_mem_sanitize(struct cxl_memdev_state *mds, u16 cmd); +int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 cmd); struct cxl_hdm { struct cxl_component_regs regs; diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 317c7548e4e9b4..e087febf9af047 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -49,7 +49,6 @@ static int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd, struct cxl_dport *parent_dport) { struct cxl_port *parent_port = parent_dport->port; - struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_port *endpoint, *iter, *down; int rc; @@ -65,8 +64,8 @@ static int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd, ep->next = down; } - endpoint = devm_cxl_add_port(host, &cxlmd->dev, - cxlds->component_reg_phys, + /* Note: endpoint port component registers are derived from @cxlds */ + endpoint = devm_cxl_add_port(host, &cxlmd->dev, CXL_RESOURCE_NONE, parent_dport); if (IS_ERR(endpoint)) return PTR_ERR(endpoint); @@ -158,6 +157,8 @@ static int cxl_mem_probe(struct device *dev) else endpoint_parent = &parent_port->dev; + cxl_setup_parent_dport(dev, dport); + device_lock(endpoint_parent); if (!endpoint_parent->driver) { dev_err(dev, "CXL port topology %s not enabled\n", diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 44a21ab7add51b..0155fb66b580d7 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -85,25 +85,28 @@ static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds) status & CXLMDEV_DEV_FATAL ? " fatal" : "", \ status & CXLMDEV_FW_HALT ? " firmware-halt" : "") +/* + * Threaded irq dev_id's must be globally unique. cxl_dev_id provides a unique + * wrapper object for each irq within the same cxlds. + */ struct cxl_dev_id { struct cxl_dev_state *cxlds; }; static int cxl_request_irq(struct cxl_dev_state *cxlds, int irq, - irq_handler_t handler, irq_handler_t thread_fn) + irq_handler_t thread_fn) { struct device *dev = cxlds->dev; struct cxl_dev_id *dev_id; - /* dev_id must be globally unique and must contain the cxlds */ dev_id = devm_kzalloc(dev, sizeof(*dev_id), GFP_KERNEL); if (!dev_id) return -ENOMEM; dev_id->cxlds = cxlds; - return devm_request_threaded_irq(dev, irq, handler, thread_fn, - IRQF_SHARED | IRQF_ONESHOT, - NULL, dev_id); + return devm_request_threaded_irq(dev, irq, NULL, thread_fn, + IRQF_SHARED | IRQF_ONESHOT, NULL, + dev_id); } static bool cxl_mbox_background_complete(struct cxl_dev_state *cxlds) @@ -128,10 +131,10 @@ static irqreturn_t cxl_pci_mbox_irq(int irq, void *id) reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET); opcode = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_OPCODE_MASK, reg); if (opcode == CXL_MBOX_OP_SANITIZE) { + mutex_lock(&mds->mbox_mutex); if (mds->security.sanitize_node) - sysfs_notify_dirent(mds->security.sanitize_node); - - dev_dbg(cxlds->dev, "Sanitization operation ended\n"); + mod_delayed_work(system_wq, &mds->security.poll_dwork, 0); + mutex_unlock(&mds->mbox_mutex); } else { /* short-circuit the wait in __cxl_pci_mbox_send_cmd() */ rcuwait_wake_up(&mds->mbox_wait); @@ -152,18 +155,16 @@ static void cxl_mbox_sanitize_work(struct work_struct *work) mutex_lock(&mds->mbox_mutex); if (cxl_mbox_background_complete(cxlds)) { mds->security.poll_tmo_secs = 0; - put_device(cxlds->dev); - if (mds->security.sanitize_node) sysfs_notify_dirent(mds->security.sanitize_node); + mds->security.sanitize_active = false; dev_dbg(cxlds->dev, "Sanitization operation ended\n"); } else { int timeout = mds->security.poll_tmo_secs + 10; mds->security.poll_tmo_secs = min(15 * 60, timeout); - queue_delayed_work(system_wq, &mds->security.poll_dwork, - timeout * HZ); + schedule_delayed_work(&mds->security.poll_dwork, timeout * HZ); } mutex_unlock(&mds->mbox_mutex); } @@ -295,18 +296,15 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_memdev_state *mds, * and allow userspace to poll(2) for completion. */ if (mbox_cmd->opcode == CXL_MBOX_OP_SANITIZE) { - if (mds->security.poll) { - /* hold the device throughout */ - get_device(cxlds->dev); - - /* give first timeout a second */ - timeout = 1; - mds->security.poll_tmo_secs = timeout; - queue_delayed_work(system_wq, - &mds->security.poll_dwork, - timeout * HZ); - } - + if (mds->security.sanitize_active) + return -EBUSY; + + /* give first timeout a second */ + timeout = 1; + mds->security.poll_tmo_secs = timeout; + mds->security.sanitize_active = true; + schedule_delayed_work(&mds->security.poll_dwork, + timeout * HZ); dev_dbg(dev, "Sanitization operation started\n"); goto success; } @@ -389,7 +387,9 @@ static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds) const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET); struct device *dev = cxlds->dev; unsigned long timeout; + int irq, msgnum; u64 md_status; + u32 ctrl; timeout = jiffies + mbox_ready_timeout * HZ; do { @@ -437,33 +437,26 @@ static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds) dev_dbg(dev, "Mailbox payload sized %zu", mds->payload_size); rcuwait_init(&mds->mbox_wait); + INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mbox_sanitize_work); - if (cap & CXLDEV_MBOX_CAP_BG_CMD_IRQ) { - u32 ctrl; - int irq, msgnum; - struct pci_dev *pdev = to_pci_dev(cxlds->dev); - - msgnum = FIELD_GET(CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK, cap); - irq = pci_irq_vector(pdev, msgnum); - if (irq < 0) - goto mbox_poll; - - if (cxl_request_irq(cxlds, irq, cxl_pci_mbox_irq, NULL)) - goto mbox_poll; + /* background command interrupts are optional */ + if (!(cap & CXLDEV_MBOX_CAP_BG_CMD_IRQ)) + return 0; - /* enable background command mbox irq support */ - ctrl = readl(cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); - ctrl |= CXLDEV_MBOX_CTRL_BG_CMD_IRQ; - writel(ctrl, cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); + msgnum = FIELD_GET(CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK, cap); + irq = pci_irq_vector(to_pci_dev(cxlds->dev), msgnum); + if (irq < 0) + return 0; + if (cxl_request_irq(cxlds, irq, cxl_pci_mbox_irq)) return 0; - } -mbox_poll: - mds->security.poll = true; - INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mbox_sanitize_work); + dev_dbg(cxlds->dev, "Mailbox interrupts enabled\n"); + /* enable background command mbox irq support */ + ctrl = readl(cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); + ctrl |= CXLDEV_MBOX_CTRL_BG_CMD_IRQ; + writel(ctrl, cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); - dev_dbg(cxlds->dev, "Mailbox interrupts are unsupported"); return 0; } @@ -484,7 +477,7 @@ static int cxl_rcrb_get_comp_regs(struct pci_dev *pdev, resource_size_t component_reg_phys; *map = (struct cxl_register_map) { - .dev = &pdev->dev, + .host = &pdev->dev, .resource = CXL_RESOURCE_NONE, }; @@ -653,7 +646,7 @@ static int cxl_event_req_irq(struct cxl_dev_state *cxlds, u8 setting) if (irq < 0) return irq; - return cxl_request_irq(cxlds, irq, NULL, cxl_event_thread); + return cxl_request_irq(cxlds, irq, cxl_event_thread); } static int cxl_event_get_int_policy(struct cxl_memdev_state *mds, @@ -834,16 +827,14 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) * If the component registers can't be found, the cxl_pci driver may * still be useful for management functions so don't return an error. */ - cxlds->component_reg_phys = CXL_RESOURCE_NONE; - rc = cxl_pci_setup_regs(pdev, CXL_REGLOC_RBI_COMPONENT, &map); + rc = cxl_pci_setup_regs(pdev, CXL_REGLOC_RBI_COMPONENT, + &cxlds->reg_map); if (rc) dev_warn(&pdev->dev, "No component registers (%d)\n", rc); - else if (!map.component_map.ras.valid) + else if (!cxlds->reg_map.component_map.ras.valid) dev_dbg(&pdev->dev, "RAS registers not found\n"); - cxlds->component_reg_phys = map.resource; - - rc = cxl_map_component_regs(&map, &cxlds->regs.component, + rc = cxl_map_component_regs(&cxlds->reg_map, &cxlds->regs.component, BIT(CXL_CM_CAP_CAP_ID_RAS)); if (rc) dev_dbg(&pdev->dev, "Failed to map RAS capability.\n"); @@ -882,11 +873,15 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; - cxlmd = devm_cxl_add_memdev(cxlds); + cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds); if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); - rc = cxl_memdev_setup_fw_upload(mds); + rc = devm_cxl_setup_fw_upload(&pdev->dev, mds); + if (rc) + return rc; + + rc = devm_cxl_sanitize_setup_notifier(&pdev->dev, cxlmd); if (rc) return rc; @@ -900,7 +895,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) break; } - rc = cxl_map_pmu_regs(pdev, &pmu_regs, &map); + rc = cxl_map_pmu_regs(&map, &pmu_regs); if (rc) { dev_dbg(&pdev->dev, "Could not map PMU regs\n"); break; diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index 6240e05b95424b..47bc8e0b859077 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -62,6 +62,9 @@ static int cxl_switch_port_probe(struct cxl_port *port) struct cxl_hdm *cxlhdm; int rc; + /* Cache the data early to ensure is_visible() works */ + read_cdat_data(port); + rc = devm_cxl_port_enumerate_dports(port); if (rc < 0) return rc; diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig index 228652a59f27e0..8999fcebde6aa8 100644 --- a/drivers/pci/pcie/Kconfig +++ b/drivers/pci/pcie/Kconfig @@ -49,6 +49,15 @@ config PCIEAER_INJECT gotten from: https://git.kernel.org/cgit/linux/kernel/git/gong.chen/aer-inject.git/ +config PCIEAER_CXL + bool "PCI Express CXL RAS support" + default y + depends on PCIEAER && CXL_PCI + help + Enables CXL error handling. + + If unsure, say Y. + # # PCI Express ECRC # diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index dcd35993004e30..42a3bd35a3e118 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -760,9 +760,10 @@ int cper_severity_to_aer(int cper_severity) } } EXPORT_SYMBOL_GPL(cper_severity_to_aer); +#endif -void cper_print_aer(struct pci_dev *dev, int aer_severity, - struct aer_capability_regs *aer) +void pci_print_aer(struct pci_dev *dev, int aer_severity, + struct aer_capability_regs *aer) { int layer, agent, tlp_header_valid = 0; u32 status, mask; @@ -801,7 +802,7 @@ void cper_print_aer(struct pci_dev *dev, int aer_severity, trace_aer_event(dev_name(&dev->dev), (status & ~mask), aer_severity, tlp_header_valid, &aer->header_log); } -#endif +EXPORT_SYMBOL_NS_GPL(pci_print_aer, CXL); /** * add_error_device - list device to be handled @@ -934,14 +935,153 @@ static bool find_source_device(struct pci_dev *parent, return true; } +#ifdef CONFIG_PCIEAER_CXL + /** - * handle_error_source - handle logging error into an event log + * pci_aer_unmask_internal_errors - unmask internal errors + * @dev: pointer to the pcie_dev data structure + * + * Unmasks internal errors in the Uncorrectable and Correctable Error + * Mask registers. + * + * Note: AER must be enabled and supported by the device which must be + * checked in advance, e.g. with pcie_aer_is_native(). + */ +static void pci_aer_unmask_internal_errors(struct pci_dev *dev) +{ + int aer = dev->aer_cap; + u32 mask; + + pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, &mask); + mask &= ~PCI_ERR_UNC_INTN; + pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, mask); + + pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK, &mask); + mask &= ~PCI_ERR_COR_INTERNAL; + pci_write_config_dword(dev, aer + PCI_ERR_COR_MASK, mask); +} + +static bool is_cxl_mem_dev(struct pci_dev *dev) +{ + /* + * The capability, status, and control fields in Device 0, + * Function 0 DVSEC control the CXL functionality of the + * entire device (CXL 3.0, 8.1.3). + */ + if (dev->devfn != PCI_DEVFN(0, 0)) + return false; + + /* + * CXL Memory Devices must have the 502h class code set (CXL + * 3.0, 8.1.12.1). + */ + if ((dev->class >> 8) != PCI_CLASS_MEMORY_CXL) + return false; + + return true; +} + +static bool cxl_error_is_native(struct pci_dev *dev) +{ + struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); + + return (pcie_ports_native || host->native_aer); +} + +static bool is_internal_error(struct aer_err_info *info) +{ + if (info->severity == AER_CORRECTABLE) + return info->status & PCI_ERR_COR_INTERNAL; + + return info->status & PCI_ERR_UNC_INTN; +} + +static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data) +{ + struct aer_err_info *info = (struct aer_err_info *)data; + const struct pci_error_handlers *err_handler; + + if (!is_cxl_mem_dev(dev) || !cxl_error_is_native(dev)) + return 0; + + /* protect dev->driver */ + device_lock(&dev->dev); + + err_handler = dev->driver ? dev->driver->err_handler : NULL; + if (!err_handler) + goto out; + + if (info->severity == AER_CORRECTABLE) { + if (err_handler->cor_error_detected) + err_handler->cor_error_detected(dev); + } else if (err_handler->error_detected) { + if (info->severity == AER_NONFATAL) + err_handler->error_detected(dev, pci_channel_io_normal); + else if (info->severity == AER_FATAL) + err_handler->error_detected(dev, pci_channel_io_frozen); + } +out: + device_unlock(&dev->dev); + return 0; +} + +static void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info) +{ + /* + * Internal errors of an RCEC indicate an AER error in an + * RCH's downstream port. Check and handle them in the CXL.mem + * device driver. + */ + if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC && + is_internal_error(info)) + pcie_walk_rcec(dev, cxl_rch_handle_error_iter, info); +} + +static int handles_cxl_error_iter(struct pci_dev *dev, void *data) +{ + bool *handles_cxl = data; + + if (!*handles_cxl) + *handles_cxl = is_cxl_mem_dev(dev) && cxl_error_is_native(dev); + + /* Non-zero terminates iteration */ + return *handles_cxl; +} + +static bool handles_cxl_errors(struct pci_dev *rcec) +{ + bool handles_cxl = false; + + if (pci_pcie_type(rcec) == PCI_EXP_TYPE_RC_EC && + pcie_aer_is_native(rcec)) + pcie_walk_rcec(rcec, handles_cxl_error_iter, &handles_cxl); + + return handles_cxl; +} + +static void cxl_rch_enable_rcec(struct pci_dev *rcec) +{ + if (!handles_cxl_errors(rcec)) + return; + + pci_aer_unmask_internal_errors(rcec); + pci_info(rcec, "CXL: Internal errors unmasked"); +} + +#else +static inline void cxl_rch_enable_rcec(struct pci_dev *dev) { } +static inline void cxl_rch_handle_error(struct pci_dev *dev, + struct aer_err_info *info) { } +#endif + +/** + * pci_aer_handle_error - handle logging error into an event log * @dev: pointer to pci_dev data structure of error source device * @info: comprehensive error information * * Invoked when an error being detected by Root Port. */ -static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info) +static void pci_aer_handle_error(struct pci_dev *dev, struct aer_err_info *info) { int aer = dev->aer_cap; @@ -965,6 +1105,12 @@ static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info) pcie_do_recovery(dev, pci_channel_io_normal, aer_root_reset); else if (info->severity == AER_FATAL) pcie_do_recovery(dev, pci_channel_io_frozen, aer_root_reset); +} + +static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info) +{ + cxl_rch_handle_error(dev, info); + pci_aer_handle_error(dev, info); pci_dev_put(dev); } @@ -997,7 +1143,7 @@ static void aer_recover_work_func(struct work_struct *work) PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn)); continue; } - cper_print_aer(pdev, entry.severity, entry.regs); + pci_print_aer(pdev, entry.severity, entry.regs); /* * Memory for aer_capability_regs(entry.regs) is being allocated from the * ghes_estatus_pool to protect it from overwriting when multiple sections @@ -1348,6 +1494,7 @@ static int aer_probe(struct pcie_device *dev) return status; } + cxl_rch_enable_rcec(port); aer_enable_rootport(rpc); pci_info(port, "enabled with IRQ %d\n", dev->irq); return 0; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 24e014760328eb..d37642d8522518 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -15,6 +15,7 @@ #include #include #include +#include struct irq_domain; struct irq_domain_ops; @@ -24,6 +25,16 @@ struct irq_domain_ops; #endif #include +#ifdef CONFIG_ACPI_TABLE_LIB +#define EXPORT_SYMBOL_ACPI_LIB(x) EXPORT_SYMBOL_NS_GPL(x, ACPI) +#define __init_or_acpilib +#define __initdata_or_acpilib +#else +#define EXPORT_SYMBOL_ACPI_LIB(x) +#define __init_or_acpilib __init +#define __initdata_or_acpilib __initdata +#endif + #ifdef CONFIG_ACPI #include @@ -119,21 +130,8 @@ enum acpi_address_range_id { /* Table Handlers */ -union acpi_subtable_headers { - struct acpi_subtable_header common; - struct acpi_hmat_structure hmat; - struct acpi_prmt_module_header prmt; - struct acpi_cedt_header cedt; -}; - typedef int (*acpi_tbl_table_handler)(struct acpi_table_header *table); -typedef int (*acpi_tbl_entry_handler)(union acpi_subtable_headers *header, - const unsigned long end); - -typedef int (*acpi_tbl_entry_handler_arg)(union acpi_subtable_headers *header, - void *arg, const unsigned long end); - /* Debugger support */ struct acpi_debugger_ops { @@ -207,14 +205,6 @@ static inline int acpi_debugger_notify_command_complete(void) (!entry) || (unsigned long)entry + sizeof(*entry) > end || \ ((struct acpi_subtable_header *)entry)->length < sizeof(*entry)) -struct acpi_subtable_proc { - int id; - acpi_tbl_entry_handler handler; - acpi_tbl_entry_handler_arg handler_arg; - void *arg; - int count; -}; - void __iomem *__acpi_map_table(unsigned long phys, unsigned long size); void __acpi_unmap_table(void __iomem *map, unsigned long size); int early_acpi_boot_init(void); @@ -229,16 +219,6 @@ void acpi_reserve_initial_tables (void); void acpi_table_init_complete (void); int acpi_table_init (void); -#ifdef CONFIG_ACPI_TABLE_LIB -#define EXPORT_SYMBOL_ACPI_LIB(x) EXPORT_SYMBOL_NS_GPL(x, ACPI) -#define __init_or_acpilib -#define __initdata_or_acpilib -#else -#define EXPORT_SYMBOL_ACPI_LIB(x) -#define __init_or_acpilib __init -#define __initdata_or_acpilib __initdata -#endif - int acpi_table_parse(char *id, acpi_tbl_table_handler handler); int __init_or_acpilib acpi_table_parse_entries(char *id, unsigned long table_size, int entry_id, diff --git a/include/linux/aer.h b/include/linux/aer.h index 29cc10220952db..f6ea2f57d80896 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -51,7 +51,7 @@ static inline int pci_aer_clear_nonfatal_status(struct pci_dev *dev) static inline int pcie_aer_is_native(struct pci_dev *dev) { return 0; } #endif -void cper_print_aer(struct pci_dev *dev, int aer_severity, +void pci_print_aer(struct pci_dev *dev, int aer_severity, struct aer_capability_regs *aer); int cper_severity_to_aer(int cper_severity); void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, diff --git a/include/linux/fw_table.h b/include/linux/fw_table.h new file mode 100644 index 00000000000000..ff8fa58d5818b3 --- /dev/null +++ b/include/linux/fw_table.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * fw_tables.h - Parsing support for ACPI and ACPI-like tables provided by + * platform or device firmware + * + * Copyright (C) 2001 Paul Diefenbaugh + * Copyright (C) 2023 Intel Corp. + */ +#ifndef _FW_TABLE_H_ +#define _FW_TABLE_H_ + +union acpi_subtable_headers; + +typedef int (*acpi_tbl_entry_handler)(union acpi_subtable_headers *header, + const unsigned long end); + +typedef int (*acpi_tbl_entry_handler_arg)(union acpi_subtable_headers *header, + void *arg, const unsigned long end); + +struct acpi_subtable_proc { + int id; + acpi_tbl_entry_handler handler; + acpi_tbl_entry_handler_arg handler_arg; + void *arg; + int count; +}; + +#include +#include + +union acpi_subtable_headers { + struct acpi_subtable_header common; + struct acpi_hmat_structure hmat; + struct acpi_prmt_module_header prmt; + struct acpi_cedt_header cedt; +}; + +int acpi_parse_entries_array(char *id, unsigned long table_size, + struct acpi_table_header *table_header, + struct acpi_subtable_proc *proc, + int proc_num, unsigned int max_entries); + +#endif diff --git a/lib/Kconfig b/lib/Kconfig index 2d90935d5a2105..3ea1c830efabc3 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -772,3 +772,6 @@ config ASN1_ENCODER config POLYNOMIAL tristate + +config FIRMWARE_TABLE + bool diff --git a/lib/Makefile b/lib/Makefile index 13455f47f9df61..6b09731d8e6195 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -409,6 +409,8 @@ obj-$(CONFIG_SIPHASH_KUNIT_TEST) += siphash_kunit.o obj-$(CONFIG_GENERIC_LIB_DEVMEM_IS_ALLOWED) += devmem_is_allowed.o +obj-$(CONFIG_FIRMWARE_TABLE) += fw_table.o + # FORTIFY_SOURCE compile-time behavior tests TEST_FORTIFY_SRCS = $(wildcard $(srctree)/$(src)/test_fortify/*-*.c) TEST_FORTIFY_LOGS = $(patsubst $(srctree)/$(src)/%.c, %.log, $(TEST_FORTIFY_SRCS)) diff --git a/lib/fw_table.c b/lib/fw_table.c new file mode 100644 index 00000000000000..b51f30a28e47b9 --- /dev/null +++ b/lib/fw_table.c @@ -0,0 +1,188 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * fw_tables.c - Parsing support for ACPI and ACPI-like tables provided by + * platform or device firmware + * + * Copyright (C) 2001 Paul Diefenbaugh + * Copyright (C) 2023 Intel Corp. + */ +#include +#include +#include +#include +#include +#include + +enum acpi_subtable_type { + ACPI_SUBTABLE_COMMON, + ACPI_SUBTABLE_HMAT, + ACPI_SUBTABLE_PRMT, + ACPI_SUBTABLE_CEDT, +}; + +struct acpi_subtable_entry { + union acpi_subtable_headers *hdr; + enum acpi_subtable_type type; +}; + +static unsigned long __init_or_acpilib +acpi_get_entry_type(struct acpi_subtable_entry *entry) +{ + switch (entry->type) { + case ACPI_SUBTABLE_COMMON: + return entry->hdr->common.type; + case ACPI_SUBTABLE_HMAT: + return entry->hdr->hmat.type; + case ACPI_SUBTABLE_PRMT: + return 0; + case ACPI_SUBTABLE_CEDT: + return entry->hdr->cedt.type; + } + return 0; +} + +static unsigned long __init_or_acpilib +acpi_get_entry_length(struct acpi_subtable_entry *entry) +{ + switch (entry->type) { + case ACPI_SUBTABLE_COMMON: + return entry->hdr->common.length; + case ACPI_SUBTABLE_HMAT: + return entry->hdr->hmat.length; + case ACPI_SUBTABLE_PRMT: + return entry->hdr->prmt.length; + case ACPI_SUBTABLE_CEDT: + return entry->hdr->cedt.length; + } + return 0; +} + +static unsigned long __init_or_acpilib +acpi_get_subtable_header_length(struct acpi_subtable_entry *entry) +{ + switch (entry->type) { + case ACPI_SUBTABLE_COMMON: + return sizeof(entry->hdr->common); + case ACPI_SUBTABLE_HMAT: + return sizeof(entry->hdr->hmat); + case ACPI_SUBTABLE_PRMT: + return sizeof(entry->hdr->prmt); + case ACPI_SUBTABLE_CEDT: + return sizeof(entry->hdr->cedt); + } + return 0; +} + +static enum acpi_subtable_type __init_or_acpilib +acpi_get_subtable_type(char *id) +{ + if (strncmp(id, ACPI_SIG_HMAT, 4) == 0) + return ACPI_SUBTABLE_HMAT; + if (strncmp(id, ACPI_SIG_PRMT, 4) == 0) + return ACPI_SUBTABLE_PRMT; + if (strncmp(id, ACPI_SIG_CEDT, 4) == 0) + return ACPI_SUBTABLE_CEDT; + return ACPI_SUBTABLE_COMMON; +} + +static __init_or_acpilib bool has_handler(struct acpi_subtable_proc *proc) +{ + return proc->handler || proc->handler_arg; +} + +static __init_or_acpilib int call_handler(struct acpi_subtable_proc *proc, + union acpi_subtable_headers *hdr, + unsigned long end) +{ + if (proc->handler) + return proc->handler(hdr, end); + if (proc->handler_arg) + return proc->handler_arg(hdr, proc->arg, end); + return -EINVAL; +} + +/** + * acpi_parse_entries_array - for each proc_num find a suitable subtable + * + * @id: table id (for debugging purposes) + * @table_size: size of the root table + * @table_header: where does the table start? + * @proc: array of acpi_subtable_proc struct containing entry id + * and associated handler with it + * @proc_num: how big proc is? + * @max_entries: how many entries can we process? + * + * For each proc_num find a subtable with proc->id and run proc->handler + * on it. Assumption is that there's only single handler for particular + * entry id. + * + * The table_size is not the size of the complete ACPI table (the length + * field in the header struct), but only the size of the root table; i.e., + * the offset from the very first byte of the complete ACPI table, to the + * first byte of the very first subtable. + * + * On success returns sum of all matching entries for all proc handlers. + * Otherwise, -ENODEV or -EINVAL is returned. + */ +int __init_or_acpilib +acpi_parse_entries_array(char *id, unsigned long table_size, + struct acpi_table_header *table_header, + struct acpi_subtable_proc *proc, + int proc_num, unsigned int max_entries) +{ + unsigned long table_end, subtable_len, entry_len; + struct acpi_subtable_entry entry; + int count = 0; + int errs = 0; + int i; + + table_end = (unsigned long)table_header + table_header->length; + + /* Parse all entries looking for a match. */ + + entry.type = acpi_get_subtable_type(id); + entry.hdr = (union acpi_subtable_headers *) + ((unsigned long)table_header + table_size); + subtable_len = acpi_get_subtable_header_length(&entry); + + while (((unsigned long)entry.hdr) + subtable_len < table_end) { + if (max_entries && count >= max_entries) + break; + + for (i = 0; i < proc_num; i++) { + if (acpi_get_entry_type(&entry) != proc[i].id) + continue; + if (!has_handler(&proc[i]) || + (!errs && + call_handler(&proc[i], entry.hdr, table_end))) { + errs++; + continue; + } + + proc[i].count++; + break; + } + if (i != proc_num) + count++; + + /* + * If entry->length is 0, break from this loop to avoid + * infinite loop. + */ + entry_len = acpi_get_entry_length(&entry); + if (entry_len == 0) { + pr_err("[%4.4s:0x%02x] Invalid zero length\n", id, proc->id); + return -EINVAL; + } + + entry.hdr = (union acpi_subtable_headers *) + ((unsigned long)entry.hdr + entry_len); + } + + if (max_entries && count > max_entries) { + pr_warn("[%4.4s:0x%02x] found the maximum %i entries\n", + id, proc->id, count); + } + + return errs ? -EINVAL : count; +} diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index fb6ab9cef84f77..b8854629990227 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -831,7 +831,7 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld) cxld->interleave_ways = 2; else cxld->interleave_ways = 1; - cxld->interleave_granularity = 256; + cxld->interleave_granularity = 4096; cxld->hpa_range = (struct range) { .start = base, .end = base + size - 1, diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index 464fc39ed2776b..ee61fa3a2411f8 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -89,6 +89,12 @@ static struct cxl_cel_entry mock_cel[] = { .effect = cpu_to_le16(EFFECT(CONF_CHANGE_COLD_RESET) | EFFECT(CONF_CHANGE_IMMEDIATE)), }, + { + .opcode = cpu_to_le16(CXL_MBOX_OP_SANITIZE), + .effect = cpu_to_le16(EFFECT(DATA_CHANGE_IMMEDIATE) | + EFFECT(SECURITY_CHANGE_IMMEDIATE) | + EFFECT(BACKGROUND_OP)), + }, }; /* See CXL 2.0 Table 181 Get Health Info Output Payload */ @@ -133,7 +139,6 @@ struct mock_event_log { }; struct mock_event_store { - struct cxl_memdev_state *mds; struct mock_event_log mock_logs[CXL_EVENT_TYPE_MAX]; u32 ev_status; }; @@ -150,8 +155,10 @@ struct cxl_mockmem_data { int user_limit; int master_limit; struct mock_event_store mes; + struct cxl_memdev_state *mds; u8 event_buf[SZ_4K]; u64 timestamp; + unsigned long sanitize_timeout; }; static struct mock_event_log *event_find_log(struct device *dev, int log_type) @@ -326,7 +333,7 @@ static void cxl_mock_event_trigger(struct device *dev) event_reset_log(log); } - cxl_mem_get_event_records(mes->mds, mes->ev_status); + cxl_mem_get_event_records(mdata->mds, mes->ev_status); } struct cxl_event_record_raw maint_needed = { @@ -567,9 +574,26 @@ static int mock_partition_info(struct cxl_mbox_cmd *cmd) return 0; } +void cxl_mockmem_sanitize_work(struct work_struct *work) +{ + struct cxl_memdev_state *mds = + container_of(work, typeof(*mds), security.poll_dwork.work); + + mutex_lock(&mds->mbox_mutex); + if (mds->security.sanitize_node) + sysfs_notify_dirent(mds->security.sanitize_node); + mds->security.sanitize_active = false; + mutex_unlock(&mds->mbox_mutex); + + dev_dbg(mds->cxlds.dev, "sanitize complete\n"); +} + static int mock_sanitize(struct cxl_mockmem_data *mdata, struct cxl_mbox_cmd *cmd) { + struct cxl_memdev_state *mds = mdata->mds; + int rc = 0; + if (cmd->size_in != 0) return -EINVAL; @@ -585,7 +609,16 @@ static int mock_sanitize(struct cxl_mockmem_data *mdata, return -ENXIO; } - return 0; /* assume less than 2 secs, no bg */ + mutex_lock(&mds->mbox_mutex); + if (schedule_delayed_work(&mds->security.poll_dwork, + msecs_to_jiffies(mdata->sanitize_timeout))) { + mds->security.sanitize_active = true; + dev_dbg(mds->cxlds.dev, "sanitize issued\n"); + } else + rc = -EBUSY; + mutex_unlock(&mds->mbox_mutex); + + return rc; } static int mock_secure_erase(struct cxl_mockmem_data *mdata, @@ -1237,6 +1270,7 @@ static int mock_transfer_fw(struct cxl_mockmem_data *mdata, } memcpy(fw + offset, transfer->data, length); + usleep_range(1500, 2000); return 0; } @@ -1415,16 +1449,16 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (IS_ERR(mds)) return PTR_ERR(mds); + mdata->mds = mds; mds->mbox_send = cxl_mock_mbox_send; mds->payload_size = SZ_4K; mds->event.buf = (struct cxl_get_event_payload *) mdata->event_buf; + INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mockmem_sanitize_work); cxlds = &mds->cxlds; cxlds->serial = pdev->id; - if (is_rcd(pdev)) { + if (is_rcd(pdev)) cxlds->rcd = true; - cxlds->component_reg_phys = CXL_RESOURCE_NONE; - } rc = cxl_enumerate_cmds(mds); if (rc) @@ -1447,14 +1481,17 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (rc) return rc; - mdata->mes.mds = mds; cxl_mock_add_event_logs(&mdata->mes); - cxlmd = devm_cxl_add_memdev(cxlds); + cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds); if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); - rc = cxl_memdev_setup_fw_upload(mds); + rc = devm_cxl_setup_fw_upload(&pdev->dev, mds); + if (rc) + return rc; + + rc = devm_cxl_sanitize_setup_notifier(&pdev->dev, cxlmd); if (rc) return rc; @@ -1526,10 +1563,38 @@ static ssize_t fw_buf_checksum_show(struct device *dev, static DEVICE_ATTR_RO(fw_buf_checksum); +static ssize_t sanitize_timeout_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cxl_mockmem_data *mdata = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%lu\n", mdata->sanitize_timeout); +} + +static ssize_t sanitize_timeout_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cxl_mockmem_data *mdata = dev_get_drvdata(dev); + unsigned long val; + int rc; + + rc = kstrtoul(buf, 0, &val); + if (rc) + return rc; + + mdata->sanitize_timeout = val; + + return count; +} + +static DEVICE_ATTR_RW(sanitize_timeout); + static struct attribute *cxl_mock_mem_attrs[] = { &dev_attr_security_lock.attr, &dev_attr_event_trigger.attr, &dev_attr_fw_buf_checksum.attr, + &dev_attr_sanitize_timeout.attr, NULL }; ATTRIBUTE_GROUPS(cxl_mock_mem);