From a9da291f25f014c8ee999f498305949332d58cd6 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Thu, 4 Aug 2016 04:30:37 +0000 Subject: [PATCH 01/42] dt-bindings: irqchip: Add J-Core interrupt controller bindings Signed-off-by: Rich Felker Acked-by: Rob Herring Link: https://lkml.kernel.org/r/c8aae4597153595cf965efe96422f699639c9d51.147018b6529.git.dalias@libc.org Signed-off-by: Jason Cooper --- .../interrupt-controller/jcore,aic.txt | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 Documentation/devicetree/bindings/interrupt-controller/jcore,aic.txt diff --git a/Documentation/devicetree/bindings/interrupt-controller/jcore,aic.txt b/Documentation/devicetree/bindings/interrupt-controller/jcore,aic.txt new file mode 100644 index 00000000000000..ee2ad36f8df8c8 --- /dev/null +++ b/Documentation/devicetree/bindings/interrupt-controller/jcore,aic.txt @@ -0,0 +1,26 @@ +J-Core Advanced Interrupt Controller + +Required properties: + +- compatible: Should be "jcore,aic1" for the (obsolete) first-generation aic + with 8 interrupt lines with programmable priorities, or "jcore,aic2" for + the "aic2" core with 64 interrupts. + +- reg: Memory region(s) for configuration. For SMP, there should be one + region per cpu, indexed by the sequential, zero-based hardware cpu + number. + +- interrupt-controller: Identifies the node as an interrupt controller + +- #interrupt-cells: Specifies the number of cells needed to encode an + interrupt source. The value shall be 1. + + +Example: + +aic: interrupt-controller@200 { + compatible = "jcore,aic2"; + reg = < 0x200 0x30 0x500 0x30 >; + interrupt-controller; + #interrupt-cells = <1>; +}; From 981b58f66cfcd32dc4ebbaeef8451daf393b6c94 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Thu, 4 Aug 2016 04:30:37 +0000 Subject: [PATCH 02/42] irqchip/jcore-aic: Add J-Core AIC driver There are two versions of the J-Core interrupt controller in use, aic1 which generates interrupts with programmable priorities, but only supports 8 irq lines and maps them to cpu traps in the range 17 to 24, and aic2 which uses traps in the range 64-127 and supports up to 128 irqs, with priorities dependent on the interrupt number. The Linux driver does not make use of priorities anyway. For simplicity, there is no aic1-specific logic in the driver beyond setting the priority register, which is necessary for interrupts to work at all. Eventually aic1 will likely be phased out, but it's currently in use in deployments and all released bitstream binaries. Signed-off-by: Rich Felker Link: https://lkml.kernel.org/r/c3b89ef74aaa6477575dbe2d410eb1d182503243.147018b6529.git.dalias@libc.org Signed-off-by: Jason Cooper --- drivers/irqchip/Kconfig | 7 +++ drivers/irqchip/Makefile | 1 + drivers/irqchip/irq-jcore-aic.c | 94 +++++++++++++++++++++++++++++++++ 3 files changed, 102 insertions(+) create mode 100644 drivers/irqchip/irq-jcore-aic.c diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 7f8728984f4472..43bed4e5c7ae4e 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -156,6 +156,13 @@ config PIC32_EVIC select GENERIC_IRQ_CHIP select IRQ_DOMAIN +config JCORE_AIC + bool "J-Core integrated AIC" + depends on OF && (SUPERH || COMPILE_TEST) + select IRQ_DOMAIN + help + Support for the J-Core integrated AIC. + config RENESAS_INTC_IRQPIN bool select IRQ_DOMAIN diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index 4c203b6b816357..ee7e3ca0ac2383 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -40,6 +40,7 @@ obj-$(CONFIG_I8259) += irq-i8259.o obj-$(CONFIG_IMGPDC_IRQ) += irq-imgpdc.o obj-$(CONFIG_IRQ_MIPS_CPU) += irq-mips-cpu.o obj-$(CONFIG_SIRF_IRQ) += irq-sirfsoc.o +obj-$(CONFIG_JCORE_AIC) += irq-jcore-aic.o obj-$(CONFIG_RENESAS_INTC_IRQPIN) += irq-renesas-intc-irqpin.o obj-$(CONFIG_RENESAS_IRQC) += irq-renesas-irqc.o obj-$(CONFIG_VERSATILE_FPGA_IRQ) += irq-versatile-fpga.o diff --git a/drivers/irqchip/irq-jcore-aic.c b/drivers/irqchip/irq-jcore-aic.c new file mode 100644 index 00000000000000..5e5e3bb7d3c704 --- /dev/null +++ b/drivers/irqchip/irq-jcore-aic.c @@ -0,0 +1,94 @@ +/* + * J-Core SoC AIC driver + * + * Copyright (C) 2015-2016 Smart Energy Instruments, Inc. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define JCORE_AIC_MAX_HWIRQ 127 +#define JCORE_AIC1_MIN_HWIRQ 16 +#define JCORE_AIC2_MIN_HWIRQ 64 + +#define JCORE_AIC1_INTPRI_REG 8 + +static struct irq_chip jcore_aic; + +static int jcore_aic_irqdomain_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hwirq) +{ + struct irq_chip *aic = d->host_data; + + irq_set_chip_and_handler(irq, aic, handle_simple_irq); + + return 0; +} + +static const struct irq_domain_ops jcore_aic_irqdomain_ops = { + .map = jcore_aic_irqdomain_map, + .xlate = irq_domain_xlate_onecell, +}; + +static void noop(struct irq_data *data) +{ +} + +int __init aic_irq_of_init(struct device_node *node, struct device_node *parent) +{ + unsigned min_irq = JCORE_AIC2_MIN_HWIRQ; + unsigned dom_sz = JCORE_AIC_MAX_HWIRQ+1; + struct irq_domain *domain; + + pr_info("Initializing J-Core AIC\n"); + + /* AIC1 needs priority initialization to receive interrupts. */ + if (of_device_is_compatible(node, "jcore,aic1")) { + unsigned cpu; + + for_each_present_cpu(cpu) { + void __iomem *base = of_iomap(node, cpu); + + if (!base) { + pr_err("Unable to map AIC for cpu %u\n", cpu); + return -ENOMEM; + } + __raw_writel(0xffffffff, base + JCORE_AIC1_INTPRI_REG); + iounmap(base); + } + min_irq = JCORE_AIC1_MIN_HWIRQ; + } + + /* + * The irq chip framework requires either mask/unmask or enable/disable + * function pointers to be provided, but the hardware does not have any + * such mechanism; the only interrupt masking is at the cpu level and + * it affects all interrupts. We provide dummy mask/unmask. The hardware + * handles all interrupt control and clears pending status when the cpu + * accepts the interrupt. + */ + jcore_aic.irq_mask = noop; + jcore_aic.irq_unmask = noop; + jcore_aic.name = "AIC"; + + domain = irq_domain_add_linear(node, dom_sz, &jcore_aic_irqdomain_ops, + &jcore_aic); + if (!domain) + return -ENOMEM; + irq_create_strict_mappings(domain, min_irq, min_irq, dom_sz - min_irq); + + return 0; +} + +IRQCHIP_DECLARE(jcore_aic2, "jcore,aic2", aic_irq_of_init); +IRQCHIP_DECLARE(jcore_aic1, "jcore,aic1", aic_irq_of_init); From 33a6c324a7266462f933ab25a92383c882e4b4f1 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Fri, 5 Aug 2016 16:55:18 +0200 Subject: [PATCH 03/42] dt-bindings: interrupt-controller: add DT binding for Marvell 7K/8K PIC This commit adds the Device Tree binding description for the PIC interrupt controller available in the ARM64 Marvell Armada 7K/8K SoCs. Signed-off-by: Thomas Petazzoni Acked-by: Rob Herring Link: https://lkml.kernel.org/r/1470408921-447-2-git-send-email-thomas.petazzoni@free-electrons.com Signed-off-by: Jason Cooper --- .../marvell,armada-8k-pic.txt | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 Documentation/devicetree/bindings/interrupt-controller/marvell,armada-8k-pic.txt diff --git a/Documentation/devicetree/bindings/interrupt-controller/marvell,armada-8k-pic.txt b/Documentation/devicetree/bindings/interrupt-controller/marvell,armada-8k-pic.txt new file mode 100644 index 00000000000000..86a7b4cd03f5df --- /dev/null +++ b/Documentation/devicetree/bindings/interrupt-controller/marvell,armada-8k-pic.txt @@ -0,0 +1,25 @@ +Marvell Armada 7K/8K PIC Interrupt controller +--------------------------------------------- + +This is the Device Tree binding for the PIC, a secondary interrupt +controller available on the Marvell Armada 7K/8K ARM64 SoCs, and +typically connected to the GIC as the primary interrupt controller. + +Required properties: +- compatible: should be "marvell,armada-8k-pic" +- interrupt-controller: identifies the node as an interrupt controller +- #interrupt-cells: the number of cells to define interrupts on this + controller. Should be 1 +- reg: the register area for the PIC interrupt controller +- interrupts: the interrupt to the primary interrupt controller, + typically the GIC + +Example: + + pic: interrupt-controller@3f0100 { + compatible = "marvell,armada-8k-pic"; + reg = <0x3f0100 0x10>; + #interrupt-cells = <1>; + interrupt-controller; + interrupts = ; + }; From a109893bd3e71912b376a731b27de8c45fded9b3 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Fri, 5 Aug 2016 16:55:19 +0200 Subject: [PATCH 04/42] irqchip/mvebu-pic: New driver for Marvell Armada 7K/8K PIC The Marvell Armada 7K/8K integrates a secondary interrupt controller very originally named "PIC". It is connected to the main GIC via a PPI. Amongst other things, this PIC is used for the ARM PMU. This commit adds a simple irqchip driver for this interrupt controller. Since this interrupt controller is not needed early at boot time, we make the driver a proper platform driver rather than use the IRQCHIP_DECLARE() mechanism. Signed-off-by: Yehuda Yitschak Signed-off-by: Thomas Petazzoni Link: https://lkml.kernel.org/r/1470408921-447-3-git-send-email-thomas.petazzoni@free-electrons.com Signed-off-by: Jason Cooper --- drivers/irqchip/Kconfig | 3 + drivers/irqchip/Makefile | 1 + drivers/irqchip/irq-mvebu-pic.c | 197 ++++++++++++++++++++++++++++++++ 3 files changed, 201 insertions(+) create mode 100644 drivers/irqchip/irq-mvebu-pic.c diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 7f8728984f4472..5c08cdb510d055 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -251,6 +251,9 @@ config IRQ_MXS config MVEBU_ODMI bool +config MVEBU_PIC + bool + config LS_SCFG_MSI def_bool y if SOC_LS1021A || ARCH_LAYERSCAPE depends on PCI && PCI_MSI diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index 4c203b6b816357..93e61bad814213 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -68,6 +68,7 @@ obj-$(CONFIG_INGENIC_IRQ) += irq-ingenic.o obj-$(CONFIG_IMX_GPCV2) += irq-imx-gpcv2.o obj-$(CONFIG_PIC32_EVIC) += irq-pic32-evic.o obj-$(CONFIG_MVEBU_ODMI) += irq-mvebu-odmi.o +obj-$(CONFIG_MVEBU_PIC) += irq-mvebu-pic.o obj-$(CONFIG_LS_SCFG_MSI) += irq-ls-scfg-msi.o obj-$(CONFIG_EZNPS_GIC) += irq-eznps.o obj-$(CONFIG_ARCH_ASPEED) += irq-aspeed-vic.o diff --git a/drivers/irqchip/irq-mvebu-pic.c b/drivers/irqchip/irq-mvebu-pic.c new file mode 100644 index 00000000000000..eec63951129ad4 --- /dev/null +++ b/drivers/irqchip/irq-mvebu-pic.c @@ -0,0 +1,197 @@ +/* + * Copyright (C) 2016 Marvell + * + * Yehuda Yitschak + * Thomas Petazzoni + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define PIC_CAUSE 0x0 +#define PIC_MASK 0x4 + +#define PIC_MAX_IRQS 32 +#define PIC_MAX_IRQ_MASK ((1UL << PIC_MAX_IRQS) - 1) + +struct mvebu_pic { + void __iomem *base; + u32 parent_irq; + struct irq_domain *domain; + struct irq_chip irq_chip; +}; + +static void mvebu_pic_reset(struct mvebu_pic *pic) +{ + /* ACK and mask all interrupts */ + writel(0, pic->base + PIC_MASK); + writel(PIC_MAX_IRQ_MASK, pic->base + PIC_CAUSE); +} + +static void mvebu_pic_eoi_irq(struct irq_data *d) +{ + struct mvebu_pic *pic = irq_data_get_irq_chip_data(d); + + writel(1 << d->hwirq, pic->base + PIC_CAUSE); +} + +static void mvebu_pic_mask_irq(struct irq_data *d) +{ + struct mvebu_pic *pic = irq_data_get_irq_chip_data(d); + u32 reg; + + reg = readl(pic->base + PIC_MASK); + reg |= (1 << d->hwirq); + writel(reg, pic->base + PIC_MASK); +} + +static void mvebu_pic_unmask_irq(struct irq_data *d) +{ + struct mvebu_pic *pic = irq_data_get_irq_chip_data(d); + u32 reg; + + reg = readl(pic->base + PIC_MASK); + reg &= ~(1 << d->hwirq); + writel(reg, pic->base + PIC_MASK); +} + +static int mvebu_pic_irq_map(struct irq_domain *domain, unsigned int virq, + irq_hw_number_t hwirq) +{ + struct mvebu_pic *pic = domain->host_data; + + irq_set_percpu_devid(virq); + irq_set_chip_data(virq, pic); + irq_set_chip_and_handler(virq, &pic->irq_chip, + handle_percpu_devid_irq); + irq_set_status_flags(virq, IRQ_LEVEL); + irq_set_probe(virq); + + return 0; +} + +static const struct irq_domain_ops mvebu_pic_domain_ops = { + .map = mvebu_pic_irq_map, + .xlate = irq_domain_xlate_onecell, +}; + +static void mvebu_pic_handle_cascade_irq(struct irq_desc *desc) +{ + struct mvebu_pic *pic = irq_desc_get_handler_data(desc); + struct irq_chip *chip = irq_desc_get_chip(desc); + unsigned long irqmap, irqn; + unsigned int cascade_irq; + + irqmap = readl_relaxed(pic->base + PIC_CAUSE); + chained_irq_enter(chip, desc); + + for_each_set_bit(irqn, &irqmap, BITS_PER_LONG) { + cascade_irq = irq_find_mapping(pic->domain, irqn); + generic_handle_irq(cascade_irq); + } + + chained_irq_exit(chip, desc); +} + +static void mvebu_pic_enable_percpu_irq(void *data) +{ + struct mvebu_pic *pic = data; + + mvebu_pic_reset(pic); + enable_percpu_irq(pic->parent_irq, IRQ_TYPE_NONE); +} + +static void mvebu_pic_disable_percpu_irq(void *data) +{ + struct mvebu_pic *pic = data; + + disable_percpu_irq(pic->parent_irq); +} + +static int mvebu_pic_probe(struct platform_device *pdev) +{ + struct device_node *node = pdev->dev.of_node; + struct mvebu_pic *pic; + struct irq_chip *irq_chip; + struct resource *res; + + pic = devm_kzalloc(&pdev->dev, sizeof(struct mvebu_pic), GFP_KERNEL); + if (!pic) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + pic->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(pic->base)) + return PTR_ERR(pic->base); + + irq_chip = &pic->irq_chip; + irq_chip->name = dev_name(&pdev->dev); + irq_chip->irq_mask = mvebu_pic_mask_irq; + irq_chip->irq_unmask = mvebu_pic_unmask_irq; + irq_chip->irq_eoi = mvebu_pic_eoi_irq; + + pic->parent_irq = irq_of_parse_and_map(node, 0); + if (pic->parent_irq <= 0) { + dev_err(&pdev->dev, "Failed to parse parent interrupt\n"); + return -EINVAL; + } + + pic->domain = irq_domain_add_linear(node, PIC_MAX_IRQS, + &mvebu_pic_domain_ops, pic); + if (!pic->domain) { + dev_err(&pdev->dev, "Failed to allocate irq domain\n"); + return -ENOMEM; + } + + irq_set_chained_handler(pic->parent_irq, mvebu_pic_handle_cascade_irq); + irq_set_handler_data(pic->parent_irq, pic); + + on_each_cpu(mvebu_pic_enable_percpu_irq, pic, 1); + + platform_set_drvdata(pdev, pic); + + return 0; +} + +static int mvebu_pic_remove(struct platform_device *pdev) +{ + struct mvebu_pic *pic = platform_get_drvdata(pdev); + + on_each_cpu(mvebu_pic_disable_percpu_irq, pic, 1); + irq_domain_remove(pic->domain); + + return 0; +} + +static const struct of_device_id mvebu_pic_of_match[] = { + { .compatible = "marvell,armada-8k-pic", }, + {}, +}; +MODULE_DEVICE_TABLE(of, mvebu_pic_of_match); + +static struct platform_driver mvebu_pic_driver = { + .probe = mvebu_pic_probe, + .remove = mvebu_pic_remove, + .driver = { + .name = "mvebu-pic", + .of_match_table = mvebu_pic_of_match, + }, +}; +module_platform_driver(mvebu_pic_driver); + +MODULE_AUTHOR("Yehuda Yitschak "); +MODULE_AUTHOR("Thomas Petazzoni "); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:mvebu_pic"); + From 04208a24b9d2f46f07f4400a4829d5372d0a3661 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Fri, 5 Aug 2016 16:55:20 +0200 Subject: [PATCH 05/42] arm64: marvell: enable the Marvell PIC driver This commit makes sure the driver for the Marvell PIC interrupt controller (used on Marvell Armada 7K/8K) is enabled. Signed-off-by: Thomas Petazzoni Link: https://lkml.kernel.org/r/1470408921-447-4-git-send-email-thomas.petazzoni@free-electrons.com Signed-off-by: Jason Cooper --- arch/arm64/Kconfig.platforms | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index bb2616b161576b..0b988f315c40d4 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -93,6 +93,7 @@ config ARCH_MVEBU select ARMADA_CP110_SYSCON select ARMADA_37XX_CLK select MVEBU_ODMI + select MVEBU_PIC help This enables support for Marvell EBU familly, including: - Armada 3700 SoC Family From 21118df66c198d6ebb23e6827e2e92ab1e148e78 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 20 Aug 2016 15:26:28 +0000 Subject: [PATCH 06/42] irqchip/jcore-aic: Fix non static symbol warning Fixes the following sparse warning: drivers/irqchip/irq-jcore-aic.c:47:12: warning: symbol 'aic_irq_of_init' was not declared. Should it be static? Signed-off-by: Wei Yongjun Link: https://lkml.kernel.org/r/1471706788-27587-1-git-send-email-weiyj.lk@gmail.com Signed-off-by: Jason Cooper --- drivers/irqchip/irq-jcore-aic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-jcore-aic.c b/drivers/irqchip/irq-jcore-aic.c index 5e5e3bb7d3c704..84b01dec277dfe 100644 --- a/drivers/irqchip/irq-jcore-aic.c +++ b/drivers/irqchip/irq-jcore-aic.c @@ -44,7 +44,8 @@ static void noop(struct irq_data *data) { } -int __init aic_irq_of_init(struct device_node *node, struct device_node *parent) +static int __init aic_irq_of_init(struct device_node *node, + struct device_node *parent) { unsigned min_irq = JCORE_AIC2_MIN_HWIRQ; unsigned dom_sz = JCORE_AIC_MAX_HWIRQ+1; From cae750bae4e488c138eb436175201a60943eb3dc Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 19 Aug 2016 18:11:19 +0100 Subject: [PATCH 07/42] irqchip/mips-gic: Use for_each_set_bit to iterate over IRQs The MIPS GIC driver has previously iterated over bits set in a bitmap representing pending IRQs by calling find_first_bit, clearing that bit then calling find_first_bit again until all bits are clear. If multiple interrupts are pending then this is wasteful, as find_first_bit will have to loop over the whole bitmap from the start. Use the for_each_set_bit macro which performs exactly what we need here instead. It will use find_next_bit and thus only scan over the relevant part of the bitmap, and it makes the intent of the code more clear. Signed-off-by: Paul Burton Link: https://lkml.kernel.org/r/20160819171119.28121-1-paul.burton@imgtec.com Signed-off-by: Jason Cooper --- drivers/irqchip/irq-mips-gic.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index c5f33c3bd22851..a376fc63226373 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -371,18 +371,13 @@ static void gic_handle_shared_int(bool chained) bitmap_and(pending, pending, intrmask, gic_shared_intrs); bitmap_and(pending, pending, pcpu_mask, gic_shared_intrs); - intr = find_first_bit(pending, gic_shared_intrs); - while (intr != gic_shared_intrs) { + for_each_set_bit(intr, pending, gic_shared_intrs) { virq = irq_linear_revmap(gic_irq_domain, GIC_SHARED_TO_HWIRQ(intr)); if (chained) generic_handle_irq(virq); else do_IRQ(virq); - - /* go to next pending bit */ - bitmap_clear(pending, intr, 1); - intr = find_first_bit(pending, gic_shared_intrs); } } From 2349f205df22c51fd717cd52b56df0e63c6c3ff2 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 1 Sep 2016 11:44:54 +0100 Subject: [PATCH 08/42] irqchip/keystone: Fix typo "sporious" -> "spurious" Trivial fix to typo in dev_warn message. Signed-off-by: Colin Ian King Link: https://lkml.kernel.org/r/20160901104454.26092-1-colin.king@canonical.com Signed-off-by: Jason Cooper --- drivers/irqchip/irq-keystone.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-keystone.c b/drivers/irqchip/irq-keystone.c index deb89d63a728d2..54a5e870a8f56d 100644 --- a/drivers/irqchip/irq-keystone.c +++ b/drivers/irqchip/irq-keystone.c @@ -109,7 +109,7 @@ static void keystone_irq_handler(struct irq_desc *desc) dev_dbg(kirq->dev, "dispatch bit %d, virq %d\n", src, virq); if (!virq) - dev_warn(kirq->dev, "sporious irq detected hwirq %d, virq %d\n", + dev_warn(kirq->dev, "spurious irq detected hwirq %d, virq %d\n", src, virq); generic_handle_irq(virq); } From fc590c22f9f056ab50190b797f6cacead29f9b75 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 2 Sep 2016 14:45:19 +0200 Subject: [PATCH 09/42] genirq: Robustify handle_percpu_devid_irq() The percpu_devid handler is not robust against spurious interrupts. If a spurious interrupt happens and no action is installed then the handler crashes with a NULL pointer dereference. Add a sanity check for this and log the wreckage once in dmesg. Reported-by: Majun Signed-off-by: Thomas Gleixner Cc: Mark Rutland Cc: Marc Zyngier Cc: guohanjun@huawei.com Cc: dingtianhong@huawei.com Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1609021436160.5647@nanos --- kernel/irq/chip.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index b4c1bc7c9ca204..93c373a8b12b44 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -756,7 +756,6 @@ void handle_percpu_devid_irq(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct irqaction *action = desc->action; - void *dev_id = raw_cpu_ptr(action->percpu_dev_id); unsigned int irq = irq_desc_get_irq(desc); irqreturn_t res; @@ -765,9 +764,20 @@ void handle_percpu_devid_irq(struct irq_desc *desc) if (chip->irq_ack) chip->irq_ack(&desc->irq_data); - trace_irq_handler_entry(irq, action); - res = action->handler(irq, dev_id); - trace_irq_handler_exit(irq, action, res); + if (likely(action)) { + trace_irq_handler_entry(irq, action); + res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id)); + trace_irq_handler_exit(irq, action, res); + } else { + unsigned int cpu = smp_processor_id(); + bool enabled = cpumask_test_cpu(cpu, desc->percpu_enabled); + + if (enabled) + irq_percpu_disable(desc, cpu); + + pr_err_once("Spurious%s percpu IRQ%u on CPU%u\n", + enabled ? " and unmasked" : "", irq, cpu); + } if (chip->irq_eoi) chip->irq_eoi(&desc->irq_data); From 48e0fba842c7daab80f3351398146368c5504a27 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 2 Sep 2016 17:30:35 +0200 Subject: [PATCH 10/42] genirq: Remove export of irq_map_generic_chip() No module users. Signed-off-by: Thomas Gleixner --- kernel/irq/generic-chip.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index abd286afbd2732..5fbb94b077b375 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c @@ -409,7 +409,6 @@ int irq_map_generic_chip(struct irq_domain *d, unsigned int virq, irq_modify_status(virq, dgc->irq_flags_to_clear, dgc->irq_flags_to_set); return 0; } -EXPORT_SYMBOL_GPL(irq_map_generic_chip); struct irq_domain_ops irq_generic_chip_ops = { .map = irq_map_generic_chip, From f0c450eaa364cb77c778f2a46ee2aa3ff464b332 Mon Sep 17 00:00:00 2001 From: Sebastian Frias Date: Mon, 1 Aug 2016 16:27:53 +0200 Subject: [PATCH 11/42] genirq/generic_chip: Get rid of code duplication irq_map_generic_chip() contains about the same code as irq_get_domain_generic_chip() except for the return values. Split out the irq_get_domain_generic_chip() implementation so it can be reused. [ tglx: Removed the extra churn in irq_get_domain_generic_chip() callers and massaged changelog ] Signed-off-by: Sebastian Frias Cc: Marc Zyngier Cc: Mason Cc: Jason Cooper Link: http://lkml.kernel.org/r/579F5C69.8070006@laposte.net Signed-off-by: Thomas Gleixner --- kernel/irq/generic-chip.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index 5fbb94b077b375..11ad73b39d2eaa 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c @@ -328,6 +328,20 @@ int irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, } EXPORT_SYMBOL_GPL(irq_alloc_domain_generic_chips); +static struct irq_chip_generic * +__irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq) +{ + struct irq_domain_chip_generic *dgc = d->gc; + int idx; + + if (!dgc) + return ERR_PTR(-ENODEV); + idx = hw_irq / dgc->irqs_per_chip; + if (idx >= dgc->num_chips) + return ERR_PTR(-EINVAL); + return dgc->gc[idx]; +} + /** * irq_get_domain_generic_chip - Get a pointer to the generic chip of a hw_irq * @d: irq domain pointer @@ -336,15 +350,9 @@ EXPORT_SYMBOL_GPL(irq_alloc_domain_generic_chips); struct irq_chip_generic * irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq) { - struct irq_domain_chip_generic *dgc = d->gc; - int idx; + struct irq_chip_generic *gc = __irq_get_domain_generic_chip(d, hw_irq); - if (!dgc) - return NULL; - idx = hw_irq / dgc->irqs_per_chip; - if (idx >= dgc->num_chips) - return NULL; - return dgc->gc[idx]; + return !IS_ERR(gc) ? gc : NULL; } EXPORT_SYMBOL_GPL(irq_get_domain_generic_chip); @@ -368,13 +376,9 @@ int irq_map_generic_chip(struct irq_domain *d, unsigned int virq, unsigned long flags; int idx; - if (!d->gc) - return -ENODEV; - - idx = hw_irq / dgc->irqs_per_chip; - if (idx >= dgc->num_chips) - return -EINVAL; - gc = dgc->gc[idx]; + gc = __irq_get_domain_generic_chip(d, hw_irq); + if (IS_ERR(gc)) + return PTR_ERR(gc); idx = hw_irq % dgc->irqs_per_chip; From ee26c013cdee0b947e29d6cadfb9ff3341c69ff9 Mon Sep 17 00:00:00 2001 From: Sebastian Frias Date: Mon, 1 Aug 2016 16:27:38 +0200 Subject: [PATCH 12/42] genirq/generic_chip: Add irq_unmap callback Without this patch irq_domain_disassociate() cannot properly release the interrupt. In fact, irq_map_generic_chip() checks a bit on 'gc->installed' but said bit is never cleared, only set. Commit 088f40b7b027 ("genirq: Generic chip: Add linear irq domain support") added irq_map_generic_chip() function and also stated "This lacks a removal function for now". This commit provides an implementation of an unmap function that can be called by irq_domain_disassociate(). [ tglx: Made the function static and removed the export as we have neither a prototype nor a modular user. ] Fixes: 088f40b7b027 ("genirq: Generic chip: Add linear irq domain support") Signed-off-by: Sebastian Frias Cc: Marc Zyngier Cc: Mason Cc: Jason Cooper Link: http://lkml.kernel.org/r/579F5C5A.2070507@laposte.net Signed-off-by: Thomas Gleixner --- kernel/irq/generic-chip.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index 11ad73b39d2eaa..a3a392097804b1 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c @@ -414,8 +414,29 @@ int irq_map_generic_chip(struct irq_domain *d, unsigned int virq, return 0; } +static void irq_unmap_generic_chip(struct irq_domain *d, unsigned int virq) +{ + struct irq_data *data = irq_domain_get_irq_data(d, virq); + struct irq_domain_chip_generic *dgc = d->gc; + unsigned int hw_irq = data->hwirq; + struct irq_chip_generic *gc; + int irq_idx; + + gc = irq_get_domain_generic_chip(d, hw_irq); + if (!gc) + return; + + irq_idx = hw_irq % dgc->irqs_per_chip; + + clear_bit(irq_idx, &gc->installed); + irq_domain_set_info(d, virq, hw_irq, &no_irq_chip, NULL, NULL, NULL, + NULL); + +} + struct irq_domain_ops irq_generic_chip_ops = { .map = irq_map_generic_chip, + .unmap = irq_unmap_generic_chip, .xlate = irq_domain_xlate_onetwocell, }; EXPORT_SYMBOL_GPL(irq_generic_chip_ops); From 0c228919e04ddec195402296e7ebf2472ed6caef Mon Sep 17 00:00:00 2001 From: Sebastian Frias Date: Tue, 2 Aug 2016 10:52:45 +0200 Subject: [PATCH 13/42] irqdomain: Mask irq type in irq_domain_xlate_onetwocell() According to the xlate() callback definition, the 'out_type' parameter needs to be the "linux irq type". A mask for such bits exists, IRQ_TYPE_SENSE_MASK, which is correctly applied in irq_domain_xlate_twocell() So use it for irq_domain_xlate_onetwocell() as well. Signed-off-by: Sebastian Frias Cc: Grant Likely Cc: Marc Zyngier Cc: Mason Cc: Jason Cooper Link: http://lkml.kernel.org/r/57A05F5D.103@laposte.net Signed-off-by: Thomas Gleixner --- kernel/irq/irqdomain.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 4752b43662e001..f10cffe8aefb66 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -868,7 +868,10 @@ int irq_domain_xlate_onetwocell(struct irq_domain *d, if (WARN_ON(intsize < 1)) return -EINVAL; *out_hwirq = intspec[0]; - *out_type = (intsize > 1) ? intspec[1] : IRQ_TYPE_NONE; + if (intsize > 1) + *out_type = intspec[1] & IRQ_TYPE_SENSE_MASK; + else + *out_type = IRQ_TYPE_NONE; return 0; } EXPORT_SYMBOL_GPL(irq_domain_xlate_onetwocell); From f88eecfe2f22b2790e7527c0aaec14ea175919de Mon Sep 17 00:00:00 2001 From: Sebastian Frias Date: Tue, 16 Aug 2016 16:05:08 +0200 Subject: [PATCH 14/42] genirq/generic_chip: Verify irqs_per_chip <= 32 Most (if not all) code here implicitly assumes that the maximum number of IRQs per chip will be 32, and thus uses 'u32' or 'unsigned long' for many tasks (for example "struct irq_data" declares its 'mask' field as 'u32', and "struct irq_chip_generic" declares its 'installed' field as 'unsigned long') However, there is no check to verify that irqs_per_chip is <= 32. Hence, calling irq_alloc_domain_generic_chips() with a bigger value will result in unexpected results. Provide a wrapper with a MAYBE_BUILD_BUG_ON(nrirqs >= 32) to catch such cases. [ tglx: Reduced changelog to the essential information ] Signed-off-by: Sebastian Frias Cc: Marc Zyngier Cc: Mason Cc: Jason Cooper Link: http://lkml.kernel.org/r/57B31D94.5040701@laposte.net Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 18 +++++++++++++----- kernel/irq/generic-chip.c | 16 ++++++++-------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/include/linux/irq.h b/include/linux/irq.h index b52424eaa0ed38..603986741f2cca 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -916,12 +916,20 @@ void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk, unsigned int clr, unsigned int set); struct irq_chip_generic *irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq); -int irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, - int num_ct, const char *name, - irq_flow_handler_t handler, - unsigned int clr, unsigned int set, - enum irq_gc_flags flags); +int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, + int num_ct, const char *name, + irq_flow_handler_t handler, + unsigned int clr, unsigned int set, + enum irq_gc_flags flags); + +#define irq_alloc_domain_generic_chips(d, irqs_per_chip, num_ct, name, \ + handler, clr, set, flags) \ +({ \ + MAYBE_BUILD_BUG_ON(irqs_per_chip > 32); \ + __irq_alloc_domain_generic_chips(d, irqs_per_chip, num_ct, name,\ + handler, clr, set, flags); \ +}) static inline struct irq_chip_type *irq_data_get_chip_type(struct irq_data *d) { diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index a3a392097804b1..ee32870079c9c9 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c @@ -260,9 +260,9 @@ irq_gc_init_mask_cache(struct irq_chip_generic *gc, enum irq_gc_flags flags) } /** - * irq_alloc_domain_generic_chip - Allocate generic chips for an irq domain + * __irq_alloc_domain_generic_chip - Allocate generic chips for an irq domain * @d: irq domain for which to allocate chips - * @irqs_per_chip: Number of interrupts each chip handles + * @irqs_per_chip: Number of interrupts each chip handles (max 32) * @num_ct: Number of irq_chip_type instances associated with this * @name: Name of the irq chip * @handler: Default flow handler associated with these chips @@ -270,11 +270,11 @@ irq_gc_init_mask_cache(struct irq_chip_generic *gc, enum irq_gc_flags flags) * @set: IRQ_* bits to set in the mapping function * @gcflags: Generic chip specific setup flags */ -int irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, - int num_ct, const char *name, - irq_flow_handler_t handler, - unsigned int clr, unsigned int set, - enum irq_gc_flags gcflags) +int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, + int num_ct, const char *name, + irq_flow_handler_t handler, + unsigned int clr, unsigned int set, + enum irq_gc_flags gcflags) { struct irq_domain_chip_generic *dgc; struct irq_chip_generic *gc; @@ -326,7 +326,7 @@ int irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, d->name = name; return 0; } -EXPORT_SYMBOL_GPL(irq_alloc_domain_generic_chips); +EXPORT_SYMBOL_GPL(__irq_alloc_domain_generic_chips); static struct irq_chip_generic * __irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq) From 545d5d657b720e9c4dc773265bb7e9d88e34b269 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Tue, 31 May 2016 13:56:48 +0100 Subject: [PATCH 15/42] genirq: Update stale comment for __irq_domain_add Commit 1bf4ddc46c5d ("irqdomain: Introduce irq_domain_create_{linear, tree}") introduced the use of fwnode_handle to identify the interrupt controller when calling __irq_domain_add but missed updating the kernel doc parameters for the function. Update this comment. While we are touching this code, also consolidate the declaration and assignment of of_node. Signed-off-by: Punit Agrawal Acked-by: Marc Zygnier Link: http://lkml.kernel.org/r/1464699409-23113-1-git-send-email-punit.agrawal@arm.com Signed-off-by: Thomas Gleixner --- kernel/irq/irqdomain.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index f10cffe8aefb66..8c0a0ae43521c7 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -80,7 +80,7 @@ EXPORT_SYMBOL_GPL(irq_domain_free_fwnode); /** * __irq_domain_add() - Allocate a new irq_domain data structure - * @of_node: optional device-tree node of the interrupt controller + * @fwnode: firmware node for the interrupt controller * @size: Size of linear map; 0 for radix mapping only * @hwirq_max: Maximum number of interrupts supported by controller * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no @@ -96,10 +96,8 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size, const struct irq_domain_ops *ops, void *host_data) { + struct device_node *of_node = to_of_node(fwnode); struct irq_domain *domain; - struct device_node *of_node; - - of_node = to_of_node(fwnode); domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size), GFP_KERNEL, of_node_to_nid(of_node)); From 00b992deaa08495ab958da5950c9ebbba27d0ddc Mon Sep 17 00:00:00 2001 From: Alexander Kuleshov Date: Tue, 19 Jul 2016 15:54:08 +0600 Subject: [PATCH 16/42] genirq: No need to mask non trigger mode flags before __irq_set_trigger() Some callers of __irq_set_trigger() masks all flags except trigger mode flags. This is unnecessary, ase __irq_set_trigger() already does this before usage of flags. [ tglx: Moved the flag mask and adjusted comment. Removed the hunk in enable_percpu_irq() as it is required there ] Signed-off-by: Alexander Kuleshov Link: http://lkml.kernel.org/r/20160719095408.13778-1-kuleshovmail@gmail.com Signed-off-by: Thomas Gleixner --- kernel/irq/chip.c | 1 - kernel/irq/manage.c | 5 ++--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 93c373a8b12b44..e11e8afcf209b7 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -76,7 +76,6 @@ int irq_set_irq_type(unsigned int irq, unsigned int type) if (!desc) return -EINVAL; - type &= IRQ_TYPE_SENSE_MASK; ret = __irq_set_trigger(desc, type); irq_put_desc_busunlock(desc, flags); return ret; diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 73a2b786b5e99b..4908617dee2877 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -669,8 +669,6 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned long flags) return 0; } - flags &= IRQ_TYPE_SENSE_MASK; - if (chip->flags & IRQCHIP_SET_TYPE_MASKED) { if (!irqd_irq_masked(&desc->irq_data)) mask_irq(desc); @@ -678,7 +676,8 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned long flags) unmask = 1; } - /* caller masked out all except trigger mode flags */ + /* Mask all flags except trigger mode */ + flags &= IRQ_TYPE_SENSE_MASK; ret = chip->irq_set_type(&desc->irq_data, flags); switch (ret) { From 23299b8c64f8c50e83eb345b835077f3c29588b8 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Mon, 5 Sep 2016 18:05:13 +0300 Subject: [PATCH 17/42] dt-bindings: mvebu-odmi: Fix example typo Make the example compatible string match its definition. Signed-off-by: Baruch Siach Reviewed-by: Thomas Petazzoni Link: https://lkml.kernel.org/r/0743fef6fe390bc4ae7cabd15c4836bbed98f7cf.1473087913.git.baruch@tkos.co.il Signed-off-by: Jason Cooper --- .../bindings/interrupt-controller/marvell,odmi-controller.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/interrupt-controller/marvell,odmi-controller.txt b/Documentation/devicetree/bindings/interrupt-controller/marvell,odmi-controller.txt index 8af0a8e613abeb..3f6442c7f867fb 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/marvell,odmi-controller.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/marvell,odmi-controller.txt @@ -31,7 +31,7 @@ Required properties: Example: odmi: odmi@300000 { - compatible = "marvell,ap806-odm-controller", + compatible = "marvell,ap806-odmi-controller", "marvell,odmi-controller"; interrupt-controller; msi-controller; From 04c8b0f82c7d5a9a1c296eef914ae3bb820bcb85 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 27 Jun 2016 18:11:43 +0100 Subject: [PATCH 18/42] irqchip/gic: Make locking a BL_SWITCHER only feature The BL switcher code manipulates the logical/physical CPU mapping, forcing a lock to be taken on the IPI path. With an IPI heavy load, this single lock becomes contended. But when CONFIG_BL_SWITCHER is not enabled, there is no reason to take this lock at all since the CPU mapping is immutable. This patch allows the lock to be entierely removed when BL_SWITCHER is not enabled (which is the case in most configurations), leading to a small improvement of "perf bench sched pipe" (measured on an 8 core AMD Seattle system): Before: 101370 ops/sec After: 103680 ops/sec Take this opportunity to remove a useless lock being taken when handling an interrupt on a secondary GIC. Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic.c | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 390fac59c6bca6..d108fe6a32d9ed 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -91,7 +91,27 @@ struct gic_chip_data { #endif }; -static DEFINE_RAW_SPINLOCK(irq_controller_lock); +#ifdef CONFIG_BL_SWITCHER + +static DEFINE_RAW_SPINLOCK(cpu_map_lock); + +#define gic_lock_irqsave(f) \ + raw_spin_lock_irqsave(&cpu_map_lock, (f)) +#define gic_unlock_irqrestore(f) \ + raw_spin_unlock_irqrestore(&cpu_map_lock, (f)) + +#define gic_lock() raw_spin_lock(&cpu_map_lock) +#define gic_unlock() raw_spin_unlock(&cpu_map_lock) + +#else + +#define gic_lock_irqsave(f) do { (void)(f); } while(0) +#define gic_unlock_irqrestore(f) do { (void)(f); } while(0) + +#define gic_lock() do { } while(0) +#define gic_unlock() do { } while(0) + +#endif /* * The GIC mapping of CPU interfaces does not necessarily match @@ -317,12 +337,12 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, if (cpu >= NR_GIC_CPU_IF || cpu >= nr_cpu_ids) return -EINVAL; - raw_spin_lock_irqsave(&irq_controller_lock, flags); + gic_lock_irqsave(flags); mask = 0xff << shift; bit = gic_cpu_map[cpu] << shift; val = readl_relaxed(reg) & ~mask; writel_relaxed(val | bit, reg); - raw_spin_unlock_irqrestore(&irq_controller_lock, flags); + gic_unlock_irqrestore(flags); return IRQ_SET_MASK_OK_DONE; } @@ -374,9 +394,7 @@ static void gic_handle_cascade_irq(struct irq_desc *desc) chained_irq_enter(chip, desc); - raw_spin_lock(&irq_controller_lock); status = readl_relaxed(gic_data_cpu_base(chip_data) + GIC_CPU_INTACK); - raw_spin_unlock(&irq_controller_lock); gic_irq = (status & GICC_IAR_INT_ID_MASK); if (gic_irq == GICC_INT_SPURIOUS) @@ -776,7 +794,7 @@ static void gic_raise_softirq(const struct cpumask *mask, unsigned int irq) return; } - raw_spin_lock_irqsave(&irq_controller_lock, flags); + gic_lock_irqsave(flags); /* Convert our logical CPU mask into a physical one. */ for_each_cpu(cpu, mask) @@ -791,7 +809,7 @@ static void gic_raise_softirq(const struct cpumask *mask, unsigned int irq) /* this always happens on GIC0 */ writel_relaxed(map << 16 | irq, gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT); - raw_spin_unlock_irqrestore(&irq_controller_lock, flags); + gic_unlock_irqrestore(flags); } #endif @@ -859,7 +877,7 @@ void gic_migrate_target(unsigned int new_cpu_id) cur_target_mask = 0x01010101 << cur_cpu_id; ror_val = (cur_cpu_id - new_cpu_id) & 31; - raw_spin_lock(&irq_controller_lock); + gic_lock(); /* Update the target interface for this logical CPU */ gic_cpu_map[cpu] = 1 << new_cpu_id; @@ -879,7 +897,7 @@ void gic_migrate_target(unsigned int new_cpu_id) } } - raw_spin_unlock(&irq_controller_lock); + gic_unlock(); /* * Now let's migrate and clear any potential SGIs that might be From 91ef84428a86b75a52e15c6fe4f56b446ba75f93 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Fri, 19 Aug 2016 17:13:09 +0100 Subject: [PATCH 19/42] irqchip/gic-v3: Reset BPR during initialization Currently, when running on FVP, CPU 0 boots up with its BPR changed from the reset value. This renders it impossible to (preemptively) prioritize interrupts on CPU 0. This is harmless on normal systems since Linux typically does not support preemptive interrupts. It does however cause problems in systems with additional changes (such as patches for NMI simulation). Many thanks to Andrew Thoelke for suggesting the BPR as having the potential to harm preemption. Suggested-by: Andrew Thoelke Signed-off-by: Daniel Thompson Signed-off-by: Marc Zyngier --- arch/arm/include/asm/arch_gicv3.h | 6 ++++++ arch/arm64/include/asm/arch_gicv3.h | 6 ++++++ drivers/irqchip/irq-gic-v3.c | 8 ++++++++ 3 files changed, 20 insertions(+) diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h index e08d151840567d..dfe4002812dad6 100644 --- a/arch/arm/include/asm/arch_gicv3.h +++ b/arch/arm/include/asm/arch_gicv3.h @@ -34,6 +34,7 @@ #define ICC_CTLR __ACCESS_CP15(c12, 0, c12, 4) #define ICC_SRE __ACCESS_CP15(c12, 0, c12, 5) #define ICC_IGRPEN1 __ACCESS_CP15(c12, 0, c12, 7) +#define ICC_BPR1 __ACCESS_CP15(c12, 0, c12, 3) #define ICC_HSRE __ACCESS_CP15(c12, 4, c9, 5) @@ -157,6 +158,11 @@ static inline void gic_write_sre(u32 val) isb(); } +static inline void gic_write_bpr1(u32 val) +{ + asm volatile("mcr " __stringify(ICC_BPR1) : : "r" (val)); +} + /* * Even in 32bit systems that use LPAE, there is no guarantee that the I/O * interface provides true 64bit atomic accesses, so using strd/ldrd doesn't diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 8ec88e5b290f9b..fc2a0cb47b2c80 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -28,6 +28,7 @@ #define ICC_CTLR_EL1 sys_reg(3, 0, 12, 12, 4) #define ICC_SRE_EL1 sys_reg(3, 0, 12, 12, 5) #define ICC_GRPEN1_EL1 sys_reg(3, 0, 12, 12, 7) +#define ICC_BPR1_EL1 sys_reg(3, 0, 12, 12, 3) #define ICC_SRE_EL2 sys_reg(3, 4, 12, 9, 5) @@ -165,6 +166,11 @@ static inline void gic_write_sre(u32 val) isb(); } +static inline void gic_write_bpr1(u32 val) +{ + asm volatile("msr_s " __stringify(ICC_BPR1_EL1) ", %0" : : "r" (val)); +} + #define gic_read_typer(c) readq_relaxed(c) #define gic_write_irouter(v, c) writeq_relaxed(v, c) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index ede5672ab34d45..ecc5b2360c7aa0 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -495,6 +495,14 @@ static void gic_cpu_sys_reg_init(void) /* Set priority mask register */ gic_write_pmr(DEFAULT_PMR_VALUE); + /* + * Some firmwares hand over to the kernel with the BPR changed from + * its reset value (and with a value large enough to prevent + * any pre-emptive interrupts from working at all). Writing a zero + * to BPR restores is reset value. + */ + gic_write_bpr1(0); + if (static_key_true(&supports_deactivate)) { /* EOI drops priority only (mode 1) */ gic_write_ctlr(ICC_CTLR_EL1_EOImode_drop); From 39a342b25252f34d1dbe758b2b99bd055ff3b885 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Tue, 16 Aug 2016 11:14:10 +0100 Subject: [PATCH 20/42] irqchip/gic-pm: Update driver to use of_pm_clk_add_clk Commit 498b5fdd40dd ("PM / clk: Add support for adding a specific clock from device-tree") add a new helper function for adding a clock from device-tree to a device. Update the GIC-PM driver to use this new function to simplify the driver. Signed-off-by: Jon Hunter Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-pm.c | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/drivers/irqchip/irq-gic-pm.c b/drivers/irqchip/irq-gic-pm.c index 4cbffba3ff1357..ecafd295c31ced 100644 --- a/drivers/irqchip/irq-gic-pm.c +++ b/drivers/irqchip/irq-gic-pm.c @@ -64,7 +64,6 @@ static int gic_runtime_suspend(struct device *dev) static int gic_get_clocks(struct device *dev, const struct gic_clk_data *data) { - struct clk *clk; unsigned int i; int ret; @@ -76,28 +75,16 @@ static int gic_get_clocks(struct device *dev, const struct gic_clk_data *data) return ret; for (i = 0; i < data->num_clocks; i++) { - clk = of_clk_get_by_name(dev->of_node, data->clocks[i]); - if (IS_ERR(clk)) { - dev_err(dev, "failed to get clock %s\n", - data->clocks[i]); - ret = PTR_ERR(clk); - goto error; - } - - ret = pm_clk_add_clk(dev, clk); + ret = of_pm_clk_add_clk(dev, data->clocks[i]); if (ret) { - dev_err(dev, "failed to add clock at index %d\n", i); - clk_put(clk); - goto error; + dev_err(dev, "failed to add clock %s\n", + data->clocks[i]); + pm_clk_destroy(dev); + return ret; } } return 0; - -error: - pm_clk_destroy(dev); - - return ret; } static int gic_probe(struct platform_device *pdev) From 89c59cca48f04ede7e63944418af1794a8e31da6 Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Wed, 7 Sep 2016 19:26:45 +0800 Subject: [PATCH 21/42] irqchip/gic: Mark gic_init_physaddr() static We get 1 warning when building kernel with W=1: drivers/irqchip/irq-gic.c:917:13: warning: no previous prototype for 'gic_init_physaddr' [-Wmissing-prototypes] In fact, this function is only used in the file in which it is declared and don't need a declaration, but can be made static. so this patch marks this function with 'static'. Signed-off-by: Baoyou Xie Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index d108fe6a32d9ed..58e5b4e870561c 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -939,7 +939,7 @@ unsigned long gic_get_sgir_physaddr(void) return gic_dist_physaddr + GIC_DIST_SOFTINT; } -void __init gic_init_physaddr(struct device_node *node) +static void __init gic_init_physaddr(struct device_node *node) { struct resource res; if (of_address_to_resource(node, 0, &res) == 0) { From 88ef16d888a094587b2ac77de60927df5da5d56d Mon Sep 17 00:00:00 2001 From: Tomasz Nowicki Date: Mon, 12 Sep 2016 20:54:20 +0200 Subject: [PATCH 22/42] ACPI: I/O Remapping Table (IORT) initial support IORT shows representation of IO topology for ARM based systems. It describes how various components are connected together on parent-child basis e.g. PCI RC -> SMMU -> ITS. Also see IORT spec. http://infocenter.arm.com/help/topic/com.arm.doc.den0049b/DEN0049B_IO_Remapping_Table.pdf Initial support allows to detect IORT table presence and save its root pointer obtained through acpi_get_table(). The pointer validity depends on acpi_gbl_permanent_mmap because if acpi_gbl_permanent_mmap is not set while using IORT nodes we would dereference unmapped pointers. For the aforementioned reason call acpi_iort_init() from acpi_init() which guarantees acpi_gbl_permanent_mmap to be set at that point. Add generic helpers which are helpful for scanning and retrieving information from IORT table content. List of the most important helpers: - iort_find_dev_node() finds IORT node for a given device - iort_node_map_rid() maps device RID and returns IORT node which provides final translation IORT support is placed under drivers/acpi/arm64/ new directory due to its ARM64 specific nature. The code there is considered only for ARM64. The long term plan is to keep all ARM64 specific tables support in this place e.g. GTDT table. Signed-off-by: Tomasz Nowicki Acked-by: Rafael J. Wysocki Reviewed-by: Hanjun Guo Reviewed-by: Lorenzo Pieralisi Signed-off-by: Marc Zyngier --- drivers/acpi/Kconfig | 4 + drivers/acpi/Makefile | 2 + drivers/acpi/arm64/Kconfig | 6 + drivers/acpi/arm64/Makefile | 1 + drivers/acpi/arm64/iort.c | 216 ++++++++++++++++++++++++++++++++++++ drivers/acpi/bus.c | 2 + include/linux/acpi_iort.h | 30 +++++ 7 files changed, 261 insertions(+) create mode 100644 drivers/acpi/arm64/Kconfig create mode 100644 drivers/acpi/arm64/Makefile create mode 100644 drivers/acpi/arm64/iort.c create mode 100644 include/linux/acpi_iort.h diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 445ce28475b306..d5c06145d07f43 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -521,4 +521,8 @@ config ACPI_CONFIGFS userspace. The configurable ACPI groups will be visible under /config/acpi, assuming configfs is mounted under /config. +if ARM64 +source "drivers/acpi/arm64/Kconfig" +endif + endif # ACPI diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index 5ae9d85c5159b6..e5ada7895697b4 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -105,3 +105,5 @@ obj-$(CONFIG_ACPI_CONFIGFS) += acpi_configfs.o video-objs += acpi_video.o video_detect.o obj-y += dptf/ + +obj-$(CONFIG_ARM64) += arm64/ diff --git a/drivers/acpi/arm64/Kconfig b/drivers/acpi/arm64/Kconfig new file mode 100644 index 00000000000000..4616da4c15be05 --- /dev/null +++ b/drivers/acpi/arm64/Kconfig @@ -0,0 +1,6 @@ +# +# ACPI Configuration for ARM64 +# + +config ACPI_IORT + bool diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile new file mode 100644 index 00000000000000..72331f2ce0e983 --- /dev/null +++ b/drivers/acpi/arm64/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_ACPI_IORT) += iort.o diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c new file mode 100644 index 00000000000000..5279a358924a3d --- /dev/null +++ b/drivers/acpi/arm64/iort.c @@ -0,0 +1,216 @@ +/* + * Copyright (C) 2016, Semihalf + * Author: Tomasz Nowicki + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * This file implements early detection/parsing of I/O mapping + * reported to OS through firmware via I/O Remapping Table (IORT) + * IORT document number: ARM DEN 0049A + */ + +#define pr_fmt(fmt) "ACPI: IORT: " fmt + +#include +#include +#include + +typedef acpi_status (*iort_find_node_callback) + (struct acpi_iort_node *node, void *context); + +/* Root pointer to the mapped IORT table */ +static struct acpi_table_header *iort_table; + +static LIST_HEAD(iort_msi_chip_list); +static DEFINE_SPINLOCK(iort_msi_chip_lock); + +static struct acpi_iort_node *iort_scan_node(enum acpi_iort_node_type type, + iort_find_node_callback callback, + void *context) +{ + struct acpi_iort_node *iort_node, *iort_end; + struct acpi_table_iort *iort; + int i; + + if (!iort_table) + return NULL; + + /* Get the first IORT node */ + iort = (struct acpi_table_iort *)iort_table; + iort_node = ACPI_ADD_PTR(struct acpi_iort_node, iort, + iort->node_offset); + iort_end = ACPI_ADD_PTR(struct acpi_iort_node, iort_table, + iort_table->length); + + for (i = 0; i < iort->node_count; i++) { + if (WARN_TAINT(iort_node >= iort_end, TAINT_FIRMWARE_WORKAROUND, + "IORT node pointer overflows, bad table!\n")) + return NULL; + + if (iort_node->type == type && + ACPI_SUCCESS(callback(iort_node, context))) + return iort_node; + + iort_node = ACPI_ADD_PTR(struct acpi_iort_node, iort_node, + iort_node->length); + } + + return NULL; +} + +static acpi_status iort_match_node_callback(struct acpi_iort_node *node, + void *context) +{ + struct device *dev = context; + acpi_status status; + + if (node->type == ACPI_IORT_NODE_NAMED_COMPONENT) { + struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL }; + struct acpi_device *adev = to_acpi_device_node(dev->fwnode); + struct acpi_iort_named_component *ncomp; + + if (!adev) { + status = AE_NOT_FOUND; + goto out; + } + + status = acpi_get_name(adev->handle, ACPI_FULL_PATHNAME, &buf); + if (ACPI_FAILURE(status)) { + dev_warn(dev, "Can't get device full path name\n"); + goto out; + } + + ncomp = (struct acpi_iort_named_component *)node->node_data; + status = !strcmp(ncomp->device_name, buf.pointer) ? + AE_OK : AE_NOT_FOUND; + acpi_os_free(buf.pointer); + } else if (node->type == ACPI_IORT_NODE_PCI_ROOT_COMPLEX) { + struct acpi_iort_root_complex *pci_rc; + struct pci_bus *bus; + + bus = to_pci_bus(dev); + pci_rc = (struct acpi_iort_root_complex *)node->node_data; + + /* + * It is assumed that PCI segment numbers maps one-to-one + * with root complexes. Each segment number can represent only + * one root complex. + */ + status = pci_rc->pci_segment_number == pci_domain_nr(bus) ? + AE_OK : AE_NOT_FOUND; + } else { + status = AE_NOT_FOUND; + } +out: + return status; +} + +static int iort_id_map(struct acpi_iort_id_mapping *map, u8 type, u32 rid_in, + u32 *rid_out) +{ + /* Single mapping does not care for input id */ + if (map->flags & ACPI_IORT_ID_SINGLE_MAPPING) { + if (type == ACPI_IORT_NODE_NAMED_COMPONENT || + type == ACPI_IORT_NODE_PCI_ROOT_COMPLEX) { + *rid_out = map->output_base; + return 0; + } + + pr_warn(FW_BUG "[map %p] SINGLE MAPPING flag not allowed for node type %d, skipping ID map\n", + map, type); + return -ENXIO; + } + + if (rid_in < map->input_base || + (rid_in >= map->input_base + map->id_count)) + return -ENXIO; + + *rid_out = map->output_base + (rid_in - map->input_base); + return 0; +} + +static struct acpi_iort_node *iort_node_map_rid(struct acpi_iort_node *node, + u32 rid_in, u32 *rid_out, + u8 type) +{ + u32 rid = rid_in; + + /* Parse the ID mapping tree to find specified node type */ + while (node) { + struct acpi_iort_id_mapping *map; + int i; + + if (node->type == type) { + if (rid_out) + *rid_out = rid; + return node; + } + + if (!node->mapping_offset || !node->mapping_count) + goto fail_map; + + map = ACPI_ADD_PTR(struct acpi_iort_id_mapping, node, + node->mapping_offset); + + /* Firmware bug! */ + if (!map->output_reference) { + pr_err(FW_BUG "[node %p type %d] ID map has NULL parent reference\n", + node, node->type); + goto fail_map; + } + + /* Do the RID translation */ + for (i = 0; i < node->mapping_count; i++, map++) { + if (!iort_id_map(map, node->type, rid, &rid)) + break; + } + + if (i == node->mapping_count) + goto fail_map; + + node = ACPI_ADD_PTR(struct acpi_iort_node, iort_table, + map->output_reference); + } + +fail_map: + /* Map input RID to output RID unchanged on mapping failure*/ + if (rid_out) + *rid_out = rid_in; + + return NULL; +} + +static struct acpi_iort_node *iort_find_dev_node(struct device *dev) +{ + struct pci_bus *pbus; + + if (!dev_is_pci(dev)) + return iort_scan_node(ACPI_IORT_NODE_NAMED_COMPONENT, + iort_match_node_callback, dev); + + /* Find a PCI root bus */ + pbus = to_pci_dev(dev)->bus; + while (!pci_is_root_bus(pbus)) + pbus = pbus->parent; + + return iort_scan_node(ACPI_IORT_NODE_PCI_ROOT_COMPLEX, + iort_match_node_callback, &pbus->dev); +} + +void __init acpi_iort_init(void) +{ + acpi_status status; + + status = acpi_get_table(ACPI_SIG_IORT, 0, &iort_table); + if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) { + const char *msg = acpi_format_exception(status); + pr_err("Failed to get table, %s\n", msg); + } +} diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 85b7d07fe5c816..e56e6438515ac4 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -36,6 +36,7 @@ #ifdef CONFIG_X86 #include #endif +#include #include #include #include @@ -1186,6 +1187,7 @@ static int __init acpi_init(void) } pci_mmcfg_late_init(); + acpi_iort_init(); acpi_scan_init(); acpi_ec_init(); acpi_debugfs_init(); diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h new file mode 100644 index 00000000000000..fcacaf7ed64dfc --- /dev/null +++ b/include/linux/acpi_iort.h @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2016, Semihalf + * Author: Tomasz Nowicki + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ + +#ifndef __ACPI_IORT_H__ +#define __ACPI_IORT_H__ + +#include + +#ifdef CONFIG_ACPI_IORT +void acpi_iort_init(void); +#else +static inline void acpi_iort_init(void) { } +#endif + +#endif /* __ACPI_IORT_H__ */ From 4bf2efd26d7624372fb7adff8745b4c2e8407004 Mon Sep 17 00:00:00 2001 From: Tomasz Nowicki Date: Mon, 12 Sep 2016 20:32:21 +0200 Subject: [PATCH 23/42] ACPI: Add new IORT functions to support MSI domain handling For ITS, MSI functionality consists on building domain stack and during that process we need to reference to domain stack components e.g. before we create new DOMAIN_BUS_PCI_MSI domain we need to specify its DOMAIN_BUS_NEXUS parent domain. In order to manage that process properly, maintain list which elements contain domain token (unique for MSI domain stack) and ITS ID: iort_register_domain_token() and iort_deregister_domain_token(). Then retrieve domain token any time later with ITS ID being key off: iort_find_domain_token(). With domain token and domain type we are able to find corresponding IRQ domain. Since IORT is prepared to describe MSI domain on a per-device basis, use existing IORT helpers and implement two calls: 1. iort_msi_map_rid() to map MSI RID for a device 2. iort_get_device_domain() to find domain token for a device Signed-off-by: Tomasz Nowicki Acked-by: Rafael J. Wysocki Reviewed-by: Hanjun Guo Signed-off-by: Marc Zyngier --- drivers/acpi/arm64/iort.c | 152 ++++++++++++++++++++++++++++++++++++++ include/linux/acpi_iort.h | 12 +++ 2 files changed, 164 insertions(+) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 5279a358924a3d..6b81746cd13c83 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -22,6 +22,12 @@ #include #include +struct iort_its_msi_chip { + struct list_head list; + struct fwnode_handle *fw_node; + u32 translation_id; +}; + typedef acpi_status (*iort_find_node_callback) (struct acpi_iort_node *node, void *context); @@ -31,6 +37,76 @@ static struct acpi_table_header *iort_table; static LIST_HEAD(iort_msi_chip_list); static DEFINE_SPINLOCK(iort_msi_chip_lock); +/** + * iort_register_domain_token() - register domain token and related ITS ID + * to the list from where we can get it back later on. + * @trans_id: ITS ID. + * @fw_node: Domain token. + * + * Returns: 0 on success, -ENOMEM if no memory when allocating list element + */ +int iort_register_domain_token(int trans_id, struct fwnode_handle *fw_node) +{ + struct iort_its_msi_chip *its_msi_chip; + + its_msi_chip = kzalloc(sizeof(*its_msi_chip), GFP_KERNEL); + if (!its_msi_chip) + return -ENOMEM; + + its_msi_chip->fw_node = fw_node; + its_msi_chip->translation_id = trans_id; + + spin_lock(&iort_msi_chip_lock); + list_add(&its_msi_chip->list, &iort_msi_chip_list); + spin_unlock(&iort_msi_chip_lock); + + return 0; +} + +/** + * iort_deregister_domain_token() - Deregister domain token based on ITS ID + * @trans_id: ITS ID. + * + * Returns: none. + */ +void iort_deregister_domain_token(int trans_id) +{ + struct iort_its_msi_chip *its_msi_chip, *t; + + spin_lock(&iort_msi_chip_lock); + list_for_each_entry_safe(its_msi_chip, t, &iort_msi_chip_list, list) { + if (its_msi_chip->translation_id == trans_id) { + list_del(&its_msi_chip->list); + kfree(its_msi_chip); + break; + } + } + spin_unlock(&iort_msi_chip_lock); +} + +/** + * iort_find_domain_token() - Find domain token based on given ITS ID + * @trans_id: ITS ID. + * + * Returns: domain token when find on the list, NULL otherwise + */ +struct fwnode_handle *iort_find_domain_token(int trans_id) +{ + struct fwnode_handle *fw_node = NULL; + struct iort_its_msi_chip *its_msi_chip; + + spin_lock(&iort_msi_chip_lock); + list_for_each_entry(its_msi_chip, &iort_msi_chip_list, list) { + if (its_msi_chip->translation_id == trans_id) { + fw_node = its_msi_chip->fw_node; + break; + } + } + spin_unlock(&iort_msi_chip_lock); + + return fw_node; +} + static struct acpi_iort_node *iort_scan_node(enum acpi_iort_node_type type, iort_find_node_callback callback, void *context) @@ -204,6 +280,82 @@ static struct acpi_iort_node *iort_find_dev_node(struct device *dev) iort_match_node_callback, &pbus->dev); } +/** + * iort_msi_map_rid() - Map a MSI requester ID for a device + * @dev: The device for which the mapping is to be done. + * @req_id: The device requester ID. + * + * Returns: mapped MSI RID on success, input requester ID otherwise + */ +u32 iort_msi_map_rid(struct device *dev, u32 req_id) +{ + struct acpi_iort_node *node; + u32 dev_id; + + node = iort_find_dev_node(dev); + if (!node) + return req_id; + + iort_node_map_rid(node, req_id, &dev_id, ACPI_IORT_NODE_ITS_GROUP); + return dev_id; +} + +/** + * iort_dev_find_its_id() - Find the ITS identifier for a device + * @dev: The device. + * @idx: Index of the ITS identifier list. + * @its_id: ITS identifier. + * + * Returns: 0 on success, appropriate error value otherwise + */ +static int iort_dev_find_its_id(struct device *dev, u32 req_id, + unsigned int idx, int *its_id) +{ + struct acpi_iort_its_group *its; + struct acpi_iort_node *node; + + node = iort_find_dev_node(dev); + if (!node) + return -ENXIO; + + node = iort_node_map_rid(node, req_id, NULL, ACPI_IORT_NODE_ITS_GROUP); + if (!node) + return -ENXIO; + + /* Move to ITS specific data */ + its = (struct acpi_iort_its_group *)node->node_data; + if (idx > its->its_count) { + dev_err(dev, "requested ITS ID index [%d] is greater than available [%d]\n", + idx, its->its_count); + return -ENXIO; + } + + *its_id = its->identifiers[idx]; + return 0; +} + +/** + * iort_get_device_domain() - Find MSI domain related to a device + * @dev: The device. + * @req_id: Requester ID for the device. + * + * Returns: the MSI domain for this device, NULL otherwise + */ +struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id) +{ + struct fwnode_handle *handle; + int its_id; + + if (iort_dev_find_its_id(dev, req_id, 0, &its_id)) + return NULL; + + handle = iort_find_domain_token(its_id); + if (!handle) + return NULL; + + return irq_find_matching_fwnode(handle, DOMAIN_BUS_PCI_MSI); +} + void __init acpi_iort_init(void) { acpi_status status; diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index fcacaf7ed64dfc..0e32dac8fd0389 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -20,11 +20,23 @@ #define __ACPI_IORT_H__ #include +#include +#include +int iort_register_domain_token(int trans_id, struct fwnode_handle *fw_node); +void iort_deregister_domain_token(int trans_id); +struct fwnode_handle *iort_find_domain_token(int trans_id); #ifdef CONFIG_ACPI_IORT void acpi_iort_init(void); +u32 iort_msi_map_rid(struct device *dev, u32 req_id); +struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id); #else static inline void acpi_iort_init(void) { } +static inline u32 iort_msi_map_rid(struct device *dev, u32 req_id) +{ return req_id; } +static inline struct irq_domain *iort_get_device_domain(struct device *dev, + u32 req_id) +{ return NULL; } #endif #endif /* __ACPI_IORT_H__ */ From be2021baeed64d8947a56529fc383308918ecc41 Mon Sep 17 00:00:00 2001 From: Tomasz Nowicki Date: Mon, 12 Sep 2016 20:32:22 +0200 Subject: [PATCH 24/42] PCI/MSI: Setup MSI domain on a per-device basis using IORT ACPI table It is possible to provide information about which MSI controller to use on a per-device basis for DT. This patch supply this with ACPI support. Currently, IORT is the only one ACPI table which can provide such mapping. In order to plug IORT into MSI infrastructure we are adding ACPI equivalents for finding PCI device domain and its RID translation (pci_msi_domain_get_msi_rid and pci_msi_domain_get_msi_rid calls). Signed-off-by: Tomasz Nowicki Reviewed-by: Hanjun Guo Acked-by: Marc Zyngier Acked-by: Bjorn Helgaas Signed-off-by: Marc Zyngier --- drivers/pci/msi.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 98f12223c734f1..137b4c5fb638a2 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -1502,8 +1503,8 @@ u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev) pci_for_each_dma_alias(pdev, get_msi_id_cb, &rid); of_node = irq_domain_get_of_node(domain); - if (of_node) - rid = of_msi_map_rid(&pdev->dev, of_node, rid); + rid = of_node ? of_msi_map_rid(&pdev->dev, of_node, rid) : + iort_msi_map_rid(&pdev->dev, rid); return rid; } @@ -1519,9 +1520,13 @@ u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev) */ struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev) { + struct irq_domain *dom; u32 rid = 0; pci_for_each_dma_alias(pdev, get_msi_id_cb, &rid); - return of_msi_map_get_device_domain(&pdev->dev, rid); + dom = of_msi_map_get_device_domain(&pdev->dev, rid); + if (!dom) + dom = iort_get_device_domain(&pdev->dev, rid); + return dom; } #endif /* CONFIG_PCI_MSI_IRQ_DOMAIN */ From d14ae5e6bac36e88cd5deeee411104da424bc73d Mon Sep 17 00:00:00 2001 From: Tomasz Nowicki Date: Mon, 12 Sep 2016 20:32:23 +0200 Subject: [PATCH 25/42] irqchip/gicv3-its: Cleanup for ITS domain initialization There is no point to initialize ITS without having msi-controller property in corresponding DT node. However, its_probe is checking msi-controller presence at the end, so we can save our time and do that check prior to its_probe call. Also, for the code clarity purpose, we put domain initialization to separate function. Signed-off-by: Tomasz Nowicki Acked-by: Marc Zyngier Reviewed-by: Hanjun Guo Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 57 +++++++++++++++++++------------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 36b9c28a5c917b..943442d689d80b 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1614,13 +1614,37 @@ static void its_enable_quirks(struct its_node *its) gic_enable_quirks(iidr, its_quirks, its); } +static int its_init_domain(struct device_node *node, struct its_node *its, + struct irq_domain *parent) +{ + struct irq_domain *inner_domain; + struct msi_domain_info *info; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + + inner_domain = irq_domain_add_tree(node, &its_domain_ops, its); + if (!inner_domain) { + kfree(info); + return -ENOMEM; + } + + inner_domain->parent = parent; + inner_domain->bus_token = DOMAIN_BUS_NEXUS; + info->ops = &its_msi_domain_ops; + info->data = its; + inner_domain->host_data = info; + + return 0; +} + static int __init its_probe(struct device_node *node, struct irq_domain *parent) { struct resource res; struct its_node *its; void __iomem *its_base; - struct irq_domain *inner_domain; u32 val; u64 baser, tmp; int err; @@ -1712,28 +1736,9 @@ static int __init its_probe(struct device_node *node, writeq_relaxed(0, its->base + GITS_CWRITER); writel_relaxed(GITS_CTLR_ENABLE, its->base + GITS_CTLR); - if (of_property_read_bool(node, "msi-controller")) { - struct msi_domain_info *info; - - info = kzalloc(sizeof(*info), GFP_KERNEL); - if (!info) { - err = -ENOMEM; - goto out_free_tables; - } - - inner_domain = irq_domain_add_tree(node, &its_domain_ops, its); - if (!inner_domain) { - err = -ENOMEM; - kfree(info); - goto out_free_tables; - } - - inner_domain->parent = parent; - inner_domain->bus_token = DOMAIN_BUS_NEXUS; - info->ops = &its_msi_domain_ops; - info->data = its; - inner_domain->host_data = info; - } + err = its_init_domain(node, its, parent); + if (err) + goto out_free_tables; spin_lock(&its_lock); list_add(&its->entry, &its_nodes); @@ -1784,6 +1789,12 @@ int __init its_init(struct device_node *node, struct rdists *rdists, for (np = of_find_matching_node(node, its_device_id); np; np = of_find_matching_node(np, its_device_id)) { + if (!of_property_read_bool(np, "msi-controller")) { + pr_warn("%s: no msi-controller property, ITS ignored\n", + np->full_name); + continue; + } + its_probe(np, parent_domain); } From db40f0a7aea5e03ef044ef5dbc51a364e1ff7991 Mon Sep 17 00:00:00 2001 From: Tomasz Nowicki Date: Mon, 12 Sep 2016 20:32:24 +0200 Subject: [PATCH 26/42] irqchip/gicv3-its: Refactor ITS DT init code to prepare for ACPI In order to add ACPI support we need to isolate ACPI&DT common code and move DT logic to corresponding functions. To achieve this we are using firmware agnostic handle which can be unpacked to either DT or ACPI node. No functional changes other than a very minor one: 1. Terminate its_init call with -ENODEV for non-DT case which allows to remove hack from its-gic-v3.c. 2. Fix ITS base register address type (from 'unsigned long' to 'phys_addr_t'), as a bonus we get nice string formatting. 3. Since there is only one of ITS parent domain convert it to static global variable and drop the parameter from its_probe_one. Users can refer to it in more convenient way then. Signed-off-by: Hanjun Guo Signed-off-by: Tomasz Nowicki Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 65 +++++++++++++++++------------- drivers/irqchip/irq-gic-v3.c | 7 +--- include/linux/irqchip/arm-gic-v3.h | 4 +- 3 files changed, 42 insertions(+), 34 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 943442d689d80b..c7518c7b48bcfe 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -75,7 +75,7 @@ struct its_node { raw_spinlock_t lock; struct list_head entry; void __iomem *base; - unsigned long phys_base; + phys_addr_t phys_base; struct its_cmd_block *cmd_base; struct its_cmd_block *cmd_write; struct its_baser tables[GITS_BASER_NR_REGS]; @@ -115,6 +115,7 @@ struct its_device { static LIST_HEAD(its_nodes); static DEFINE_SPINLOCK(its_lock); static struct rdists *gic_rdists; +static struct irq_domain *its_parent; #define gic_data_rdist() (raw_cpu_ptr(gic_rdists->rdist)) #define gic_data_rdist_rd_base() (gic_data_rdist()->rd_base) @@ -1614,8 +1615,7 @@ static void its_enable_quirks(struct its_node *its) gic_enable_quirks(iidr, its_quirks, its); } -static int its_init_domain(struct device_node *node, struct its_node *its, - struct irq_domain *parent) +static int its_init_domain(struct fwnode_handle *handle, struct its_node *its) { struct irq_domain *inner_domain; struct msi_domain_info *info; @@ -1624,13 +1624,13 @@ static int its_init_domain(struct device_node *node, struct its_node *its, if (!info) return -ENOMEM; - inner_domain = irq_domain_add_tree(node, &its_domain_ops, its); + inner_domain = irq_domain_create_tree(handle, &its_domain_ops, its); if (!inner_domain) { kfree(info); return -ENOMEM; } - inner_domain->parent = parent; + inner_domain->parent = its_parent; inner_domain->bus_token = DOMAIN_BUS_NEXUS; info->ops = &its_msi_domain_ops; info->data = its; @@ -1639,43 +1639,35 @@ static int its_init_domain(struct device_node *node, struct its_node *its, return 0; } -static int __init its_probe(struct device_node *node, - struct irq_domain *parent) +static int __init its_probe_one(struct resource *res, + struct fwnode_handle *handle, int numa_node) { - struct resource res; struct its_node *its; void __iomem *its_base; u32 val; u64 baser, tmp; int err; - err = of_address_to_resource(node, 0, &res); - if (err) { - pr_warn("%s: no regs?\n", node->full_name); - return -ENXIO; - } - - its_base = ioremap(res.start, resource_size(&res)); + its_base = ioremap(res->start, resource_size(res)); if (!its_base) { - pr_warn("%s: unable to map registers\n", node->full_name); + pr_warn("ITS@%pa: Unable to map ITS registers\n", &res->start); return -ENOMEM; } val = readl_relaxed(its_base + GITS_PIDR2) & GIC_PIDR2_ARCH_MASK; if (val != 0x30 && val != 0x40) { - pr_warn("%s: no ITS detected, giving up\n", node->full_name); + pr_warn("ITS@%pa: No ITS detected, giving up\n", &res->start); err = -ENODEV; goto out_unmap; } err = its_force_quiescent(its_base); if (err) { - pr_warn("%s: failed to quiesce, giving up\n", - node->full_name); + pr_warn("ITS@%pa: Failed to quiesce, giving up\n", &res->start); goto out_unmap; } - pr_info("ITS: %s\n", node->full_name); + pr_info("ITS %pR\n", res); its = kzalloc(sizeof(*its), GFP_KERNEL); if (!its) { @@ -1687,9 +1679,9 @@ static int __init its_probe(struct device_node *node, INIT_LIST_HEAD(&its->entry); INIT_LIST_HEAD(&its->its_device_list); its->base = its_base; - its->phys_base = res.start; + its->phys_base = res->start; its->ite_size = ((readl_relaxed(its_base + GITS_TYPER) >> 4) & 0xf) + 1; - its->numa_node = of_node_to_nid(node); + its->numa_node = numa_node; its->cmd_base = kzalloc(ITS_CMD_QUEUE_SZ, GFP_KERNEL); if (!its->cmd_base) { @@ -1736,7 +1728,7 @@ static int __init its_probe(struct device_node *node, writeq_relaxed(0, its->base + GITS_CWRITER); writel_relaxed(GITS_CTLR_ENABLE, its->base + GITS_CTLR); - err = its_init_domain(node, its, parent); + err = its_init_domain(handle, its); if (err) goto out_free_tables; @@ -1754,7 +1746,7 @@ static int __init its_probe(struct device_node *node, kfree(its); out_unmap: iounmap(its_base); - pr_err("ITS: failed probing %s (%d)\n", node->full_name, err); + pr_err("ITS@%pa: failed probing (%d)\n", &res->start, err); return err; } @@ -1782,10 +1774,10 @@ static struct of_device_id its_device_id[] = { {}, }; -int __init its_init(struct device_node *node, struct rdists *rdists, - struct irq_domain *parent_domain) +static int __init its_of_probe(struct device_node *node) { struct device_node *np; + struct resource res; for (np = of_find_matching_node(node, its_device_id); np; np = of_find_matching_node(np, its_device_id)) { @@ -1795,8 +1787,27 @@ int __init its_init(struct device_node *node, struct rdists *rdists, continue; } - its_probe(np, parent_domain); + if (of_address_to_resource(np, 0, &res)) { + pr_warn("%s: no regs?\n", np->full_name); + continue; + } + + its_probe_one(&res, &np->fwnode, of_node_to_nid(np)); } + return 0; +} + +int __init its_init(struct fwnode_handle *handle, struct rdists *rdists, + struct irq_domain *parent_domain) +{ + struct device_node *of_node; + + its_parent = parent_domain; + of_node = to_of_node(handle); + if (of_node) + its_of_probe(of_node); + else + return -ENODEV; if (list_empty(&its_nodes)) { pr_warn("ITS: No ITS available, not enabling LPIs\n"); diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index ecc5b2360c7aa0..850f9c422f2426 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -918,7 +918,6 @@ static int __init gic_init_bases(void __iomem *dist_base, u64 redist_stride, struct fwnode_handle *handle) { - struct device_node *node; u32 typer; int gic_irqs; int err; @@ -959,10 +958,8 @@ static int __init gic_init_bases(void __iomem *dist_base, set_handle_irq(gic_handle_irq); - node = to_of_node(handle); - if (IS_ENABLED(CONFIG_ARM_GIC_V3_ITS) && gic_dist_supports_lpis() && - node) /* Temp hack to prevent ITS init for ACPI */ - its_init(node, &gic_data.rdists, gic_data.domain); + if (IS_ENABLED(CONFIG_ARM_GIC_V3_ITS) && gic_dist_supports_lpis()) + its_init(handle, &gic_data.rdists, gic_data.domain); gic_smp_init(); gic_dist_init(); diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 99ac022edc6063..8361c8d3edd10d 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -430,9 +430,9 @@ struct rdists { }; struct irq_domain; -struct device_node; +struct fwnode_handle; int its_cpu_init(void); -int its_init(struct device_node *node, struct rdists *rdists, +int its_init(struct fwnode_handle *handle, struct rdists *rdists, struct irq_domain *domain); static inline bool gic_enable_sre(void) From 3f010cf197324b6c1e87f472e64b87c5f909735e Mon Sep 17 00:00:00 2001 From: Tomasz Nowicki Date: Mon, 12 Sep 2016 20:32:25 +0200 Subject: [PATCH 27/42] irqchip/gicv3-its: Probe ITS in the ACPI way ITS is prepared for being initialized different than DT, therefore we can initialize it in ACPI way. We collect register base address from MADT table and pass mandatory info to firmware-agnostic ITS init call. Use here IORT lib to register ITS domain which then can be found and used on to build another PCI MSI domain in hierarchical stack domain. NOTE: Waiting for proper ITS and NUMA node relation description in IORT table, we pass around NUMA_NO_NODE to the its_probe_one init call. This means that Cavium ThunderX erratum 23144 (pass1.1 only) is not supported for ACPI boot method yet. Signed-off-by: Tomasz Nowicki Acked-by: Marc Zyngier Reviewed-by: Hanjun Guo Signed-off-by: Marc Zyngier --- drivers/irqchip/Kconfig | 1 + drivers/irqchip/irq-gic-v3-its.c | 61 +++++++++++++++++++++++++++++++- 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 7f8728984f4472..9aeea1d8a57966 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -39,6 +39,7 @@ config ARM_GIC_V3_ITS bool depends on PCI depends on PCI_MSI + select ACPI_IORT if ACPI config ARM_NVIC bool diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index c7518c7b48bcfe..35c851c14e497e 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -15,10 +15,13 @@ * along with this program. If not, see . */ +#include #include #include #include #include +#include +#include #include #include #include @@ -1438,6 +1441,11 @@ static int its_irq_gic_domain_alloc(struct irq_domain *domain, fwspec.param[0] = GIC_IRQ_TYPE_LPI; fwspec.param[1] = hwirq; fwspec.param[2] = IRQ_TYPE_EDGE_RISING; + } else if (is_fwnode_irqchip(domain->parent->fwnode)) { + fwspec.fwnode = domain->parent->fwnode; + fwspec.param_count = 2; + fwspec.param[0] = hwirq; + fwspec.param[1] = IRQ_TYPE_EDGE_RISING; } else { return -EINVAL; } @@ -1797,6 +1805,57 @@ static int __init its_of_probe(struct device_node *node) return 0; } +#ifdef CONFIG_ACPI + +#define ACPI_GICV3_ITS_MEM_SIZE (SZ_128K) + +static int __init gic_acpi_parse_madt_its(struct acpi_subtable_header *header, + const unsigned long end) +{ + struct acpi_madt_generic_translator *its_entry; + struct fwnode_handle *dom_handle; + struct resource res; + int err; + + its_entry = (struct acpi_madt_generic_translator *)header; + memset(&res, 0, sizeof(res)); + res.start = its_entry->base_address; + res.end = its_entry->base_address + ACPI_GICV3_ITS_MEM_SIZE - 1; + res.flags = IORESOURCE_MEM; + + dom_handle = irq_domain_alloc_fwnode((void *)its_entry->base_address); + if (!dom_handle) { + pr_err("ITS@%pa: Unable to allocate GICv3 ITS domain token\n", + &res.start); + return -ENOMEM; + } + + err = iort_register_domain_token(its_entry->translation_id, dom_handle); + if (err) { + pr_err("ITS@%pa: Unable to register GICv3 ITS domain token (ITS ID %d) to IORT\n", + &res.start, its_entry->translation_id); + goto dom_err; + } + + err = its_probe_one(&res, dom_handle, NUMA_NO_NODE); + if (!err) + return 0; + + iort_deregister_domain_token(its_entry->translation_id); +dom_err: + irq_domain_free_fwnode(dom_handle); + return err; +} + +static void __init its_acpi_probe(void) +{ + acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_TRANSLATOR, + gic_acpi_parse_madt_its, 0); +} +#else +static void __init its_acpi_probe(void) { } +#endif + int __init its_init(struct fwnode_handle *handle, struct rdists *rdists, struct irq_domain *parent_domain) { @@ -1807,7 +1866,7 @@ int __init its_init(struct fwnode_handle *handle, struct rdists *rdists, if (of_node) its_of_probe(of_node); else - return -ENODEV; + its_acpi_probe(); if (list_empty(&its_nodes)) { pr_warn("ITS: No ITS available, not enabling LPIs\n"); From db744aaa279fa1d2a06dd6b95f9599acf3557885 Mon Sep 17 00:00:00 2001 From: Tomasz Nowicki Date: Mon, 12 Sep 2016 20:32:26 +0200 Subject: [PATCH 28/42] irqchip/gicv3-its: Factor out PCI-MSI part that might be reused for ACPI Firmware agnostic code lands in common functions which do necessary domain initialization based on unique domain handler. DT specific code goes to DT specific init call. Signed-off-by: Tomasz Nowicki Acked-by: Marc Zyngier Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its-pci-msi.c | 44 ++++++++++++++++-------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its-pci-msi.c b/drivers/irqchip/irq-gic-v3-its-pci-msi.c index aee60ed025dccf..d2c2496d61e9da 100644 --- a/drivers/irqchip/irq-gic-v3-its-pci-msi.c +++ b/drivers/irqchip/irq-gic-v3-its-pci-msi.c @@ -106,34 +106,48 @@ static struct of_device_id its_device_id[] = { {}, }; -static int __init its_pci_msi_init(void) +static int __init its_pci_msi_init_one(struct fwnode_handle *handle, + const char *name) { - struct device_node *np; struct irq_domain *parent; + parent = irq_find_matching_fwnode(handle, DOMAIN_BUS_NEXUS); + if (!parent || !msi_get_domain_info(parent)) { + pr_err("%s: Unable to locate ITS domain\n", name); + return -ENXIO; + } + + if (!pci_msi_create_irq_domain(handle, &its_pci_msi_domain_info, + parent)) { + pr_err("%s: Unable to create PCI domain\n", name); + return -ENOMEM; + } + + return 0; +} + +static int __init its_pci_of_msi_init(void) +{ + struct device_node *np; + for (np = of_find_matching_node(NULL, its_device_id); np; np = of_find_matching_node(np, its_device_id)) { if (!of_property_read_bool(np, "msi-controller")) continue; - parent = irq_find_matching_host(np, DOMAIN_BUS_NEXUS); - if (!parent || !msi_get_domain_info(parent)) { - pr_err("%s: unable to locate ITS domain\n", - np->full_name); + if (its_pci_msi_init_one(of_node_to_fwnode(np), np->full_name)) continue; - } - - if (!pci_msi_create_irq_domain(of_node_to_fwnode(np), - &its_pci_msi_domain_info, - parent)) { - pr_err("%s: unable to create PCI domain\n", - np->full_name); - continue; - } pr_info("PCI/MSI: %s domain created\n", np->full_name); } return 0; } + +static int __init its_pci_msi_init(void) +{ + its_pci_of_msi_init(); + + return 0; +} early_initcall(its_pci_msi_init); From 723344dd0b2aa10ef9d28fe7f35d594d3e64f0f9 Mon Sep 17 00:00:00 2001 From: Tomasz Nowicki Date: Mon, 12 Sep 2016 20:32:27 +0200 Subject: [PATCH 29/42] irqchip/gicv3-its: Use MADT ITS subtable to do PCI/MSI domain initialization Let ACPI build ITS PCI MSI domain. ACPI code is responsible for retrieving inner domain token and passing it on to its_pci_msi_init_one generic init call. IORT maintains list of registered domain tokens and allows to find corresponding domain based on MADT ITS subtable ID info. Signed-off-by: Tomasz Nowicki Acked-by: Marc Zyngier Reviewed-by: Hanjun Guo Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its-pci-msi.c | 44 ++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/drivers/irqchip/irq-gic-v3-its-pci-msi.c b/drivers/irqchip/irq-gic-v3-its-pci-msi.c index d2c2496d61e9da..aee1c60d7ab568 100644 --- a/drivers/irqchip/irq-gic-v3-its-pci-msi.c +++ b/drivers/irqchip/irq-gic-v3-its-pci-msi.c @@ -15,6 +15,7 @@ * along with this program. If not, see . */ +#include #include #include #include @@ -144,9 +145,52 @@ static int __init its_pci_of_msi_init(void) return 0; } +#ifdef CONFIG_ACPI + +static int __init +its_pci_msi_parse_madt(struct acpi_subtable_header *header, + const unsigned long end) +{ + struct acpi_madt_generic_translator *its_entry; + struct fwnode_handle *dom_handle; + const char *node_name; + int err = -ENXIO; + + its_entry = (struct acpi_madt_generic_translator *)header; + node_name = kasprintf(GFP_KERNEL, "ITS@0x%lx", + (long)its_entry->base_address); + dom_handle = iort_find_domain_token(its_entry->translation_id); + if (!dom_handle) { + pr_err("%s: Unable to locate ITS domain handle\n", node_name); + goto out; + } + + err = its_pci_msi_init_one(dom_handle, node_name); + if (!err) + pr_info("PCI/MSI: %s domain created\n", node_name); + +out: + kfree(node_name); + return err; +} + +static int __init its_pci_acpi_msi_init(void) +{ + acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_TRANSLATOR, + its_pci_msi_parse_madt, 0); + return 0; +} +#else +static int __init its_pci_acpi_msi_init(void) +{ + return 0; +} +#endif + static int __init its_pci_msi_init(void) { its_pci_of_msi_init(); + its_pci_acpi_msi_init(); return 0; } From ecb3f394c5dba897d215a5422f1b363e93e2ce4e Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Tue, 13 Sep 2016 12:14:51 -0400 Subject: [PATCH 30/42] genirq: Expose interrupt information through sysfs Information about interrupts is exposed via /proc/interrupts, but the format of that file has changed over kernel versions and differs across architectures. It also has varying column numbers depending on hardware. That all makes it hard for tools to parse. To solve this, expose the information through sysfs so each irq attribute is in a separate file in a consistent, machine parsable way. This feature is only available when both CONFIG_SPARSE_IRQ and CONFIG_SYSFS are enabled. Examples: /sys/kernel/irq/18/actions: i801_smbus,ehci_hcd:usb1,uhci_hcd:usb7 /sys/kernel/irq/18/chip_name: IR-IO-APIC /sys/kernel/irq/18/hwirq: 18 /sys/kernel/irq/18/name: fasteoi /sys/kernel/irq/18/per_cpu_count: 0,0 /sys/kernel/irq/18/type: level /sys/kernel/irq/25/actions: ahci0 /sys/kernel/irq/25/chip_name: IR-PCI-MSI /sys/kernel/irq/25/hwirq: 512000 /sys/kernel/irq/25/name: edge /sys/kernel/irq/25/per_cpu_count: 29036,0 /sys/kernel/irq/25/type: edge [ tglx: Moved kobject_del() under sparse_irq_lock, massaged code comments and changelog ] Signed-off-by: Craig Gallek Cc: David Decotigny Link: http://lkml.kernel.org/r/1473783291-122873-1-git-send-email-kraigatgoog@gmail.com Signed-off-by: Thomas Gleixner --- Documentation/ABI/testing/sysfs-kernel-irq | 53 ++++++ include/linux/irqdesc.h | 3 + kernel/irq/irqdesc.c | 193 ++++++++++++++++++++- 3 files changed, 247 insertions(+), 2 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-kernel-irq diff --git a/Documentation/ABI/testing/sysfs-kernel-irq b/Documentation/ABI/testing/sysfs-kernel-irq new file mode 100644 index 00000000000000..eb074b10098667 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-irq @@ -0,0 +1,53 @@ +What: /sys/kernel/irq +Date: September 2016 +KernelVersion: 4.9 +Contact: Craig Gallek +Description: Directory containing information about the system's IRQs. + Specifically, data from the associated struct irq_desc. + The information here is similar to that in /proc/interrupts + but in a more machine-friendly format. This directory contains + one subdirectory for each Linux IRQ number. + +What: /sys/kernel/irq//actions +Date: September 2016 +KernelVersion: 4.9 +Contact: Craig Gallek +Description: The IRQ action chain. A comma-separated list of zero or more + device names associated with this interrupt. + +What: /sys/kernel/irq//chip_name +Date: September 2016 +KernelVersion: 4.9 +Contact: Craig Gallek +Description: Human-readable chip name supplied by the associated device + driver. + +What: /sys/kernel/irq//hwirq +Date: September 2016 +KernelVersion: 4.9 +Contact: Craig Gallek +Description: When interrupt translation domains are used, this file contains + the underlying hardware IRQ number used for this Linux IRQ. + +What: /sys/kernel/irq//name +Date: September 2016 +KernelVersion: 4.9 +Contact: Craig Gallek +Description: Human-readable flow handler name as defined by the irq chip + driver. + +What: /sys/kernel/irq//per_cpu_count +Date: September 2016 +KernelVersion: 4.9 +Contact: Craig Gallek +Description: The number of times the interrupt has fired since boot. This + is a comma-separated list of counters; one per CPU in CPU id + order. NOTE: This file consistently shows counters for all + CPU ids. This differs from the behavior of /proc/interrupts + which only shows counters for online CPUs. + +What: /sys/kernel/irq//type +Date: September 2016 +KernelVersion: 4.9 +Contact: Craig Gallek +Description: The type of the interrupt. Either the string 'level' or 'edge'. diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index b51beebf9804c1..c9be57931b5811 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -2,6 +2,7 @@ #define _LINUX_IRQDESC_H #include +#include /* * Core internal functions to deal with irq descriptors @@ -43,6 +44,7 @@ struct pt_regs; * @force_resume_depth: number of irqactions on a irq descriptor with * IRQF_FORCE_RESUME set * @rcu: rcu head for delayed free + * @kobj: kobject used to represent this struct in sysfs * @dir: /proc/irq/ procfs entry * @name: flow handler name for /proc/interrupts output */ @@ -88,6 +90,7 @@ struct irq_desc { #endif #ifdef CONFIG_SPARSE_IRQ struct rcu_head rcu; + struct kobject kobj; #endif int parent_irq; struct module *owner; diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index a623b44f2d4bec..93b51727abaa19 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "internals.h" @@ -123,6 +124,181 @@ static DECLARE_BITMAP(allocated_irqs, IRQ_BITMAP_BITS); #ifdef CONFIG_SPARSE_IRQ +static void irq_kobj_release(struct kobject *kobj); + +#ifdef CONFIG_SYSFS +static struct kobject *irq_kobj_base; + +#define IRQ_ATTR_RO(_name) \ +static struct kobj_attribute _name##_attr = __ATTR_RO(_name) + +static ssize_t per_cpu_count_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj); + int cpu, irq = desc->irq_data.irq; + ssize_t ret = 0; + char *p = ""; + + for_each_possible_cpu(cpu) { + unsigned int c = kstat_irqs_cpu(irq, cpu); + + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%u", p, c); + p = ","; + } + + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); + return ret; +} +IRQ_ATTR_RO(per_cpu_count); + +static ssize_t chip_name_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj); + ssize_t ret = 0; + + raw_spin_lock_irq(&desc->lock); + if (desc->irq_data.chip && desc->irq_data.chip->name) { + ret = scnprintf(buf, PAGE_SIZE, "%s\n", + desc->irq_data.chip->name); + } + raw_spin_unlock_irq(&desc->lock); + + return ret; +} +IRQ_ATTR_RO(chip_name); + +static ssize_t hwirq_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj); + ssize_t ret = 0; + + raw_spin_lock_irq(&desc->lock); + if (desc->irq_data.domain) + ret = sprintf(buf, "%d\n", (int)desc->irq_data.hwirq); + raw_spin_unlock_irq(&desc->lock); + + return ret; +} +IRQ_ATTR_RO(hwirq); + +static ssize_t type_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj); + ssize_t ret = 0; + + raw_spin_lock_irq(&desc->lock); + ret = sprintf(buf, "%s\n", + irqd_is_level_type(&desc->irq_data) ? "level" : "edge"); + raw_spin_unlock_irq(&desc->lock); + + return ret; + +} +IRQ_ATTR_RO(type); + +static ssize_t name_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj); + ssize_t ret = 0; + + raw_spin_lock_irq(&desc->lock); + if (desc->name) + ret = scnprintf(buf, PAGE_SIZE, "%s\n", desc->name); + raw_spin_unlock_irq(&desc->lock); + + return ret; +} +IRQ_ATTR_RO(name); + +static ssize_t actions_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj); + struct irqaction *action; + ssize_t ret = 0; + char *p = ""; + + raw_spin_lock_irq(&desc->lock); + for (action = desc->action; action != NULL; action = action->next) { + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%s", + p, action->name); + p = ","; + } + raw_spin_unlock_irq(&desc->lock); + + if (ret) + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); + + return ret; +} +IRQ_ATTR_RO(actions); + +static struct attribute *irq_attrs[] = { + &per_cpu_count_attr.attr, + &chip_name_attr.attr, + &hwirq_attr.attr, + &type_attr.attr, + &name_attr.attr, + &actions_attr.attr, + NULL +}; + +static struct kobj_type irq_kobj_type = { + .release = irq_kobj_release, + .sysfs_ops = &kobj_sysfs_ops, + .default_attrs = irq_attrs, +}; + +static void irq_sysfs_add(int irq, struct irq_desc *desc) +{ + if (irq_kobj_base) { + /* + * Continue even in case of failure as this is nothing + * crucial. + */ + if (kobject_add(&desc->kobj, irq_kobj_base, "%d", irq)) + pr_warn("Failed to add kobject for irq %d\n", irq); + } +} + +static int __init irq_sysfs_init(void) +{ + struct irq_desc *desc; + int irq; + + /* Prevent concurrent irq alloc/free */ + irq_lock_sparse(); + + irq_kobj_base = kobject_create_and_add("irq", kernel_kobj); + if (!irq_kobj_base) { + irq_unlock_sparse(); + return -ENOMEM; + } + + /* Add the already allocated interrupts */ + for_each_irq_desc(irq, desc) + irq_sysfs_add(irq, desc); + irq_unlock_sparse(); + + return 0; +} +postcore_initcall(irq_sysfs_init); + +#else /* !CONFIG_SYSFS */ + +static struct kobj_type irq_kobj_type = { + .release = irq_kobj_release, +}; + +static void irq_sysfs_add(int irq, struct irq_desc *desc) {} + +#endif /* CONFIG_SYSFS */ + static RADIX_TREE(irq_desc_tree, GFP_KERNEL); static void irq_insert_desc(unsigned int irq, struct irq_desc *desc) @@ -187,6 +363,7 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags, desc_set_defaults(irq, desc, node, affinity, owner); irqd_set(&desc->irq_data, flags); + kobject_init(&desc->kobj, &irq_kobj_type); return desc; @@ -197,15 +374,22 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags, return NULL; } -static void delayed_free_desc(struct rcu_head *rhp) +static void irq_kobj_release(struct kobject *kobj) { - struct irq_desc *desc = container_of(rhp, struct irq_desc, rcu); + struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj); free_masks(desc); free_percpu(desc->kstat_irqs); kfree(desc); } +static void delayed_free_desc(struct rcu_head *rhp) +{ + struct irq_desc *desc = container_of(rhp, struct irq_desc, rcu); + + kobject_put(&desc->kobj); +} + static void free_desc(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); @@ -217,8 +401,12 @@ static void free_desc(unsigned int irq) * kstat_irq_usr(). Once we deleted the descriptor from the * sparse tree we can free it. Access in proc will fail to * lookup the descriptor. + * + * The sysfs entry must be serialized against a concurrent + * irq_sysfs_init() as well. */ mutex_lock(&sparse_irq_lock); + kobject_del(&desc->kobj); delete_irq_desc(irq); mutex_unlock(&sparse_irq_lock); @@ -261,6 +449,7 @@ static int alloc_descs(unsigned int start, unsigned int cnt, int node, goto err; mutex_lock(&sparse_irq_lock); irq_insert_desc(start + i, desc); + irq_sysfs_add(start + i, desc); mutex_unlock(&sparse_irq_lock); } return start; From 28f4b04143c56135b1ca742fc64b664ed04de6a4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 14 Sep 2016 16:18:47 +0200 Subject: [PATCH 31/42] genirq/msi: Add cpumask allocation to alloc_msi_entry For irq spreading want to store affinity masks in the msi_entry. Add the infrastructure for it. We allocate an array of cpumasks with an array size of the number of used vectors in the entry, so we can hand in the information per linux interrupt later. As we hand in the number of used vectors, we assign them right away. Convert all the call sites. Signed-off-by: Thomas Gleixner Cc: axboe@fb.com Cc: keith.busch@intel.com Cc: agordeev@redhat.com Cc: linux-block@vger.kernel.org Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/1473862739-15032-2-git-send-email-hch@lst.de --- drivers/base/platform-msi.c | 3 +-- drivers/pci/msi.c | 6 ++---- drivers/staging/fsl-mc/bus/mc-msi.c | 3 +-- include/linux/msi.h | 5 +++-- kernel/irq/msi.c | 26 ++++++++++++++++++++++++-- 5 files changed, 31 insertions(+), 12 deletions(-) diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c index 279e53989374f0..be6a599bc0c1a8 100644 --- a/drivers/base/platform-msi.c +++ b/drivers/base/platform-msi.c @@ -142,13 +142,12 @@ static int platform_msi_alloc_descs_with_irq(struct device *dev, int virq, } for (i = 0; i < nvec; i++) { - desc = alloc_msi_entry(dev); + desc = alloc_msi_entry(dev, 1, NULL); if (!desc) break; desc->platform.msi_priv_data = data; desc->platform.msi_index = base + i; - desc->nvec_used = 1; desc->irq = virq ? virq + i : 0; list_add_tail(&desc->list, dev_to_msi_list(dev)); diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 98f12223c734f1..0db72ba24003e2 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -555,7 +555,7 @@ static struct msi_desc *msi_setup_entry(struct pci_dev *dev, int nvec) struct msi_desc *entry; /* MSI Entry Initialization */ - entry = alloc_msi_entry(&dev->dev); + entry = alloc_msi_entry(&dev->dev, nvec, NULL); if (!entry) return NULL; @@ -568,7 +568,6 @@ static struct msi_desc *msi_setup_entry(struct pci_dev *dev, int nvec) entry->msi_attrib.default_irq = dev->irq; /* Save IOAPIC IRQ */ entry->msi_attrib.multi_cap = (control & PCI_MSI_FLAGS_QMASK) >> 1; entry->msi_attrib.multiple = ilog2(__roundup_pow_of_two(nvec)); - entry->nvec_used = nvec; entry->affinity = dev->irq_affinity; if (control & PCI_MSI_FLAGS_64BIT) @@ -693,7 +692,7 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, mask = cpumask_of(cpu); } - entry = alloc_msi_entry(&dev->dev); + entry = alloc_msi_entry(&dev->dev, 1, NULL); if (!entry) { if (!i) iounmap(base); @@ -711,7 +710,6 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, entry->msi_attrib.entry_nr = i; entry->msi_attrib.default_irq = dev->irq; entry->mask_base = base; - entry->nvec_used = 1; entry->affinity = mask; list_add_tail(&entry->list, dev_to_msi_list(&dev->dev)); diff --git a/drivers/staging/fsl-mc/bus/mc-msi.c b/drivers/staging/fsl-mc/bus/mc-msi.c index c7be156ae5e06c..4fd8e41ef46853 100644 --- a/drivers/staging/fsl-mc/bus/mc-msi.c +++ b/drivers/staging/fsl-mc/bus/mc-msi.c @@ -213,7 +213,7 @@ static int fsl_mc_msi_alloc_descs(struct device *dev, unsigned int irq_count) struct msi_desc *msi_desc; for (i = 0; i < irq_count; i++) { - msi_desc = alloc_msi_entry(dev); + msi_desc = alloc_msi_entry(dev, 1, NULL); if (!msi_desc) { dev_err(dev, "Failed to allocate msi entry\n"); error = -ENOMEM; @@ -221,7 +221,6 @@ static int fsl_mc_msi_alloc_descs(struct device *dev, unsigned int irq_count) } msi_desc->fsl_mc.msi_index = i; - msi_desc->nvec_used = 1; INIT_LIST_HEAD(&msi_desc->list); list_add_tail(&msi_desc->list, dev_to_msi_list(dev)); } diff --git a/include/linux/msi.h b/include/linux/msi.h index e8c81fbd5f9cd1..0db320b7bb15d9 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -68,7 +68,7 @@ struct msi_desc { unsigned int nvec_used; struct device *dev; struct msi_msg msg; - const struct cpumask *affinity; + struct cpumask *affinity; union { /* PCI MSI/X specific data */ @@ -123,7 +123,8 @@ static inline void *msi_desc_to_pci_sysdata(struct msi_desc *desc) } #endif /* CONFIG_PCI_MSI */ -struct msi_desc *alloc_msi_entry(struct device *dev); +struct msi_desc *alloc_msi_entry(struct device *dev, int nvec, + const struct cpumask *affinity); void free_msi_entry(struct msi_desc *entry); void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg); void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg); diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 19e9dfbe97fa53..8a3e872798f340 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -18,20 +18,42 @@ /* Temparory solution for building, will be removed later */ #include -struct msi_desc *alloc_msi_entry(struct device *dev) +/** + * alloc_msi_entry - Allocate an initialize msi_entry + * @dev: Pointer to the device for which this is allocated + * @nvec: The number of vectors used in this entry + * @affinity: Optional pointer to an affinity mask array size of @nvec + * + * If @affinity is not NULL then a an affinity array[@nvec] is allocated + * and the affinity masks from @affinity are copied. + */ +struct msi_desc * +alloc_msi_entry(struct device *dev, int nvec, const struct cpumask *affinity) { - struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL); + struct msi_desc *desc; + + desc = kzalloc(sizeof(*desc), GFP_KERNEL); if (!desc) return NULL; INIT_LIST_HEAD(&desc->list); desc->dev = dev; + desc->nvec_used = nvec; + if (affinity) { + desc->affinity = kmemdup(affinity, + nvec * sizeof(*desc->affinity), GFP_KERNEL); + if (!desc->affinity) { + kfree(desc); + return NULL; + } + } return desc; } void free_msi_entry(struct msi_desc *entry) { + kfree(entry->affinity); kfree(entry); } From 34c3d9819fda464be4f1bec59b63353814f76c73 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 14 Sep 2016 16:18:48 +0200 Subject: [PATCH 32/42] genirq/affinity: Provide smarter irq spreading infrastructure The current irq spreading infrastructure is just looking at a cpumask and tries to spread the interrupts over the mask. Thats suboptimal as it does not take numa nodes into account. Change the logic so the interrupts are spread across numa nodes and inside the nodes. If there are more cpus than vectors per node, then we set the affinity to several cpus. If HT siblings are available we take that into account and try to set all siblings to a single vector. Signed-off-by: Thomas Gleixner Cc: Christoph Hellwig Cc: axboe@fb.com Cc: keith.busch@intel.com Cc: agordeev@redhat.com Cc: linux-block@vger.kernel.org Link: http://lkml.kernel.org/r/1473862739-15032-3-git-send-email-hch@lst.de --- include/linux/interrupt.h | 15 ++++ kernel/irq/affinity.c | 149 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 164 insertions(+) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index b6683f0ffc9f6b..4e59d122cad99c 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -279,6 +279,8 @@ extern int irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify); struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs); +struct cpumask *irq_create_affinity_masks(const struct cpumask *affinity, int nvec); +int irq_calc_affinity_vectors(const struct cpumask *affinity, int maxvec); #else /* CONFIG_SMP */ @@ -316,6 +318,19 @@ static inline struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs) *nr_vecs = 1; return NULL; } + +static inline struct cpumask * +irq_create_affinity_masks(const struct cpumask *affinity, int nvec) +{ + return NULL; +} + +static inline int +irq_calc_affinity_vectors(const struct cpumask *affinity, int maxvec) +{ + return maxvec; +} + #endif /* CONFIG_SMP */ /* diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index 32f6cfcff21244..7812fecc6e2fb0 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -4,6 +4,155 @@ #include #include +static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk, + int cpus_per_vec) +{ + const struct cpumask *siblmsk; + int cpu, sibl; + + for ( ; cpus_per_vec > 0; ) { + cpu = cpumask_first(nmsk); + + /* Should not happen, but I'm too lazy to think about it */ + if (cpu >= nr_cpu_ids) + return; + + cpumask_clear_cpu(cpu, nmsk); + cpumask_set_cpu(cpu, irqmsk); + cpus_per_vec--; + + /* If the cpu has siblings, use them first */ + siblmsk = topology_sibling_cpumask(cpu); + for (sibl = -1; cpus_per_vec > 0; ) { + sibl = cpumask_next(sibl, siblmsk); + if (sibl >= nr_cpu_ids) + break; + if (!cpumask_test_and_clear_cpu(sibl, nmsk)) + continue; + cpumask_set_cpu(sibl, irqmsk); + cpus_per_vec--; + } + } +} + +static int get_nodes_in_cpumask(const struct cpumask *mask, nodemask_t *nodemsk) +{ + int n, nodes; + + /* Calculate the number of nodes in the supplied affinity mask */ + for (n = 0, nodes = 0; n < num_online_nodes(); n++) { + if (cpumask_intersects(mask, cpumask_of_node(n))) { + node_set(n, *nodemsk); + nodes++; + } + } + return nodes; +} + +/** + * irq_create_affinity_masks - Create affinity masks for multiqueue spreading + * @affinity: The affinity mask to spread. If NULL cpu_online_mask + * is used + * @nvecs: The number of vectors + * + * Returns the masks pointer or NULL if allocation failed. + */ +struct cpumask *irq_create_affinity_masks(const struct cpumask *affinity, + int nvec) +{ + int n, nodes, vecs_per_node, cpus_per_vec, extra_vecs, curvec = 0; + nodemask_t nodemsk = NODE_MASK_NONE; + struct cpumask *masks; + cpumask_var_t nmsk; + + if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL)) + return NULL; + + masks = kzalloc(nvec * sizeof(*masks), GFP_KERNEL); + if (!masks) + goto out; + + /* Stabilize the cpumasks */ + get_online_cpus(); + /* If the supplied affinity mask is NULL, use cpu online mask */ + if (!affinity) + affinity = cpu_online_mask; + + nodes = get_nodes_in_cpumask(affinity, &nodemsk); + + /* + * If the number of nodes in the mask is less than or equal the + * number of vectors we just spread the vectors across the nodes. + */ + if (nvec <= nodes) { + for_each_node_mask(n, nodemsk) { + cpumask_copy(masks + curvec, cpumask_of_node(n)); + if (++curvec == nvec) + break; + } + goto outonl; + } + + /* Spread the vectors per node */ + vecs_per_node = nvec / nodes; + /* Account for rounding errors */ + extra_vecs = nvec - (nodes * vecs_per_node); + + for_each_node_mask(n, nodemsk) { + int ncpus, v, vecs_to_assign = vecs_per_node; + + /* Get the cpus on this node which are in the mask */ + cpumask_and(nmsk, affinity, cpumask_of_node(n)); + + /* Calculate the number of cpus per vector */ + ncpus = cpumask_weight(nmsk); + + for (v = 0; curvec < nvec && v < vecs_to_assign; curvec++, v++) { + cpus_per_vec = ncpus / vecs_to_assign; + + /* Account for extra vectors to compensate rounding errors */ + if (extra_vecs) { + cpus_per_vec++; + if (!--extra_vecs) + vecs_per_node++; + } + irq_spread_init_one(masks + curvec, nmsk, cpus_per_vec); + } + + if (curvec >= nvec) + break; + } + +outonl: + put_online_cpus(); +out: + free_cpumask_var(nmsk); + return masks; +} + +/** + * irq_calc_affinity_vectors - Calculate to optimal number of vectors for a given affinity mask + * @affinity: The affinity mask to spread. If NULL cpu_online_mask + * is used + * @maxvec: The maximum number of vectors available + */ +int irq_calc_affinity_vectors(const struct cpumask *affinity, int maxvec) +{ + int cpus, ret; + + /* Stabilize the cpumasks */ + get_online_cpus(); + /* If the supplied affinity mask is NULL, use cpu online mask */ + if (!affinity) + affinity = cpu_online_mask; + + cpus = cpumask_weight(affinity); + ret = (cpus < maxvec) ? cpus : maxvec; + + put_online_cpus(); + return ret; +} + static int get_first_sibling(unsigned int cpu) { unsigned int ret; From e75eafb9b0395c338230b0eef2cc92ca8d20dee2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 14 Sep 2016 16:18:49 +0200 Subject: [PATCH 33/42] genirq/msi: Switch to new irq spreading infrastructure Switch MSI over to the new spreading code. If a pci device contains a valid pointer to a cpumask, then this mask is used for spreading otherwise the online cpu mask is used. This allows a driver to restrict the spread to a subset of CPUs, e.g. cpus on a particular node. Signed-off-by: Thomas Gleixner Cc: Christoph Hellwig Cc: axboe@fb.com Cc: keith.busch@intel.com Cc: agordeev@redhat.com Cc: linux-block@vger.kernel.org Link: http://lkml.kernel.org/r/1473862739-15032-4-git-send-email-hch@lst.de Signed-off-by: Thomas Gleixner --- drivers/pci/msi.c | 128 ++++++++++++++++++++++++------------------- kernel/irq/irqdesc.c | 31 +++++------ 2 files changed, 87 insertions(+), 72 deletions(-) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 0db72ba24003e2..06100dde0e86cf 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -549,15 +549,23 @@ static int populate_msi_sysfs(struct pci_dev *pdev) return ret; } -static struct msi_desc *msi_setup_entry(struct pci_dev *dev, int nvec) +static struct msi_desc * +msi_setup_entry(struct pci_dev *dev, int nvec, bool affinity) { - u16 control; + struct cpumask *masks = NULL; struct msi_desc *entry; + u16 control; + + if (affinity) { + masks = irq_create_affinity_masks(dev->irq_affinity, nvec); + if (!masks) + pr_err("Unable to allocate affinity masks, ignoring\n"); + } /* MSI Entry Initialization */ - entry = alloc_msi_entry(&dev->dev, nvec, NULL); + entry = alloc_msi_entry(&dev->dev, nvec, masks); if (!entry) - return NULL; + goto out; pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control); @@ -568,7 +576,6 @@ static struct msi_desc *msi_setup_entry(struct pci_dev *dev, int nvec) entry->msi_attrib.default_irq = dev->irq; /* Save IOAPIC IRQ */ entry->msi_attrib.multi_cap = (control & PCI_MSI_FLAGS_QMASK) >> 1; entry->msi_attrib.multiple = ilog2(__roundup_pow_of_two(nvec)); - entry->affinity = dev->irq_affinity; if (control & PCI_MSI_FLAGS_64BIT) entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_64; @@ -579,6 +586,8 @@ static struct msi_desc *msi_setup_entry(struct pci_dev *dev, int nvec) if (entry->msi_attrib.maskbit) pci_read_config_dword(dev, entry->mask_pos, &entry->masked); +out: + kfree(masks); return entry; } @@ -607,7 +616,7 @@ static int msi_verify_entries(struct pci_dev *dev) * an error, and a positive return value indicates the number of interrupts * which could have been allocated. */ -static int msi_capability_init(struct pci_dev *dev, int nvec) +static int msi_capability_init(struct pci_dev *dev, int nvec, bool affinity) { struct msi_desc *entry; int ret; @@ -615,7 +624,7 @@ static int msi_capability_init(struct pci_dev *dev, int nvec) pci_msi_set_enable(dev, 0); /* Disable MSI during set up */ - entry = msi_setup_entry(dev, nvec); + entry = msi_setup_entry(dev, nvec, affinity); if (!entry) return -ENOMEM; @@ -678,28 +687,29 @@ static void __iomem *msix_map_region(struct pci_dev *dev, unsigned nr_entries) } static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, - struct msix_entry *entries, int nvec) + struct msix_entry *entries, int nvec, + bool affinity) { - const struct cpumask *mask = NULL; + struct cpumask *curmsk, *masks = NULL; struct msi_desc *entry; - int cpu = -1, i; - - for (i = 0; i < nvec; i++) { - if (dev->irq_affinity) { - cpu = cpumask_next(cpu, dev->irq_affinity); - if (cpu >= nr_cpu_ids) - cpu = cpumask_first(dev->irq_affinity); - mask = cpumask_of(cpu); - } + int ret, i; - entry = alloc_msi_entry(&dev->dev, 1, NULL); + if (affinity) { + masks = irq_create_affinity_masks(dev->irq_affinity, nvec); + if (!masks) + pr_err("Unable to allocate affinity masks, ignoring\n"); + } + + for (i = 0, curmsk = masks; i < nvec; i++) { + entry = alloc_msi_entry(&dev->dev, 1, curmsk); if (!entry) { if (!i) iounmap(base); else free_msi_irqs(dev); /* No enough memory. Don't try again */ - return -ENOMEM; + ret = -ENOMEM; + goto out; } entry->msi_attrib.is_msix = 1; @@ -710,11 +720,14 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, entry->msi_attrib.entry_nr = i; entry->msi_attrib.default_irq = dev->irq; entry->mask_base = base; - entry->affinity = mask; list_add_tail(&entry->list, dev_to_msi_list(&dev->dev)); + if (masks) + curmsk++; } - + ret = 0; +out: + kfree(masks); return 0; } @@ -743,8 +756,8 @@ static void msix_program_entries(struct pci_dev *dev, * single MSI-X irq. A return of zero indicates the successful setup of * requested MSI-X entries with allocated irqs or non-zero for otherwise. **/ -static int msix_capability_init(struct pci_dev *dev, - struct msix_entry *entries, int nvec) +static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, + int nvec, bool affinity) { int ret; u16 control; @@ -759,7 +772,7 @@ static int msix_capability_init(struct pci_dev *dev, if (!base) return -ENOMEM; - ret = msix_setup_entries(dev, base, entries, nvec); + ret = msix_setup_entries(dev, base, entries, nvec, affinity); if (ret) return ret; @@ -939,22 +952,8 @@ int pci_msix_vec_count(struct pci_dev *dev) } EXPORT_SYMBOL(pci_msix_vec_count); -/** - * pci_enable_msix - configure device's MSI-X capability structure - * @dev: pointer to the pci_dev data structure of MSI-X device function - * @entries: pointer to an array of MSI-X entries (optional) - * @nvec: number of MSI-X irqs requested for allocation by device driver - * - * Setup the MSI-X capability structure of device function with the number - * of requested irqs upon its software driver call to request for - * MSI-X mode enabled on its hardware device function. A return of zero - * indicates the successful configuration of MSI-X capability structure - * with new allocated MSI-X irqs. A return of < 0 indicates a failure. - * Or a return of > 0 indicates that driver request is exceeding the number - * of irqs or MSI-X vectors available. Driver should use the returned value to - * re-send its request. - **/ -int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec) +static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, + int nvec, bool affinity) { int nr_entries; int i, j; @@ -986,7 +985,27 @@ int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec) dev_info(&dev->dev, "can't enable MSI-X (MSI IRQ already assigned)\n"); return -EINVAL; } - return msix_capability_init(dev, entries, nvec); + return msix_capability_init(dev, entries, nvec, affinity); +} + +/** + * pci_enable_msix - configure device's MSI-X capability structure + * @dev: pointer to the pci_dev data structure of MSI-X device function + * @entries: pointer to an array of MSI-X entries (optional) + * @nvec: number of MSI-X irqs requested for allocation by device driver + * + * Setup the MSI-X capability structure of device function with the number + * of requested irqs upon its software driver call to request for + * MSI-X mode enabled on its hardware device function. A return of zero + * indicates the successful configuration of MSI-X capability structure + * with new allocated MSI-X irqs. A return of < 0 indicates a failure. + * Or a return of > 0 indicates that driver request is exceeding the number + * of irqs or MSI-X vectors available. Driver should use the returned value to + * re-send its request. + **/ +int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec) +{ + return __pci_enable_msix(dev, entries, nvec, false); } EXPORT_SYMBOL(pci_enable_msix); @@ -1039,6 +1058,7 @@ EXPORT_SYMBOL(pci_msi_enabled); static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, unsigned int flags) { + bool affinity = flags & PCI_IRQ_AFFINITY; int nvec; int rc; @@ -1067,19 +1087,17 @@ static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, nvec = maxvec; for (;;) { - if (flags & PCI_IRQ_AFFINITY) { - dev->irq_affinity = irq_create_affinity_mask(&nvec); + if (affinity) { + nvec = irq_calc_affinity_vectors(dev->irq_affinity, + nvec); if (nvec < minvec) return -ENOSPC; } - rc = msi_capability_init(dev, nvec); + rc = msi_capability_init(dev, nvec, affinity); if (rc == 0) return nvec; - kfree(dev->irq_affinity); - dev->irq_affinity = NULL; - if (rc < 0) return rc; if (rc < minvec) @@ -1111,26 +1129,24 @@ static int __pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int minvec, int maxvec, unsigned int flags) { - int nvec = maxvec; - int rc; + bool affinity = flags & PCI_IRQ_AFFINITY; + int rc, nvec = maxvec; if (maxvec < minvec) return -ERANGE; for (;;) { - if (flags & PCI_IRQ_AFFINITY) { - dev->irq_affinity = irq_create_affinity_mask(&nvec); + if (affinity) { + nvec = irq_calc_affinity_vectors(dev->irq_affinity, + nvec); if (nvec < minvec) return -ENOSPC; } - rc = pci_enable_msix(dev, entries, nvec); + rc = __pci_enable_msix(dev, entries, nvec, affinity); if (rc == 0) return nvec; - kfree(dev->irq_affinity); - dev->irq_affinity = NULL; - if (rc < 0) return rc; if (rc < minvec) diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index a623b44f2d4bec..5a5a685aba334a 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -236,25 +236,24 @@ static int alloc_descs(unsigned int start, unsigned int cnt, int node, const struct cpumask *mask = NULL; struct irq_desc *desc; unsigned int flags; - int i, cpu = -1; + int i; - if (affinity && cpumask_empty(affinity)) - return -EINVAL; + /* Validate affinity mask(s) */ + if (affinity) { + for (i = 0, mask = affinity; i < cnt; i++, mask++) { + if (cpumask_empty(mask)) + return -EINVAL; + } + } flags = affinity ? IRQD_AFFINITY_MANAGED : 0; + mask = NULL; for (i = 0; i < cnt; i++) { if (affinity) { - cpu = cpumask_next(cpu, affinity); - if (cpu >= nr_cpu_ids) - cpu = cpumask_first(affinity); - node = cpu_to_node(cpu); - - /* - * For single allocations we use the caller provided - * mask otherwise we use the mask of the target cpu - */ - mask = cnt == 1 ? affinity : cpumask_of(cpu); + node = cpu_to_node(cpumask_first(affinity)); + mask = affinity; + affinity++; } desc = alloc_desc(start + i, node, flags, mask, owner); if (!desc) @@ -481,9 +480,9 @@ EXPORT_SYMBOL_GPL(irq_free_descs); * @cnt: Number of consecutive irqs to allocate. * @node: Preferred node on which the irq descriptor should be allocated * @owner: Owning module (can be NULL) - * @affinity: Optional pointer to an affinity mask which hints where the - * irq descriptors should be allocated and which default - * affinities to use + * @affinity: Optional pointer to an affinity mask array of size @cnt which + * hints where the irq descriptors should be allocated and which + * default affinities to use * * Returns the first irq number or error code */ From 44082fd6702fb12020967fd375f8bf6dd7c111bf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 14 Sep 2016 16:18:50 +0200 Subject: [PATCH 34/42] genirq/affinity: Remove old irq spread infrastructure No more users. Signed-off-by: Thomas Gleixner Cc: Christoph Hellwig Cc: axboe@fb.com Cc: keith.busch@intel.com Cc: agordeev@redhat.com Cc: linux-block@vger.kernel.org Link: http://lkml.kernel.org/r/1473862739-15032-5-git-send-email-hch@lst.de Signed-off-by: Thomas Gleixner --- include/linux/interrupt.h | 7 ----- kernel/irq/affinity.c | 58 --------------------------------------- 2 files changed, 65 deletions(-) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 4e59d122cad99c..72f0721f75e734 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -278,7 +278,6 @@ extern int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m); extern int irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify); -struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs); struct cpumask *irq_create_affinity_masks(const struct cpumask *affinity, int nvec); int irq_calc_affinity_vectors(const struct cpumask *affinity, int maxvec); @@ -313,12 +312,6 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) return 0; } -static inline struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs) -{ - *nr_vecs = 1; - return NULL; -} - static inline struct cpumask * irq_create_affinity_masks(const struct cpumask *affinity, int nvec) { diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index 7812fecc6e2fb0..17f51d63da5631 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -152,61 +152,3 @@ int irq_calc_affinity_vectors(const struct cpumask *affinity, int maxvec) put_online_cpus(); return ret; } - -static int get_first_sibling(unsigned int cpu) -{ - unsigned int ret; - - ret = cpumask_first(topology_sibling_cpumask(cpu)); - if (ret < nr_cpu_ids) - return ret; - return cpu; -} - -/* - * Take a map of online CPUs and the number of available interrupt vectors - * and generate an output cpumask suitable for spreading MSI/MSI-X vectors - * so that they are distributed as good as possible around the CPUs. If - * more vectors than CPUs are available we'll map one to each CPU, - * otherwise we map one to the first sibling of each socket. - * - * If there are more vectors than CPUs we will still only have one bit - * set per CPU, but interrupt code will keep on assigning the vectors from - * the start of the bitmap until we run out of vectors. - */ -struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs) -{ - struct cpumask *affinity_mask; - unsigned int max_vecs = *nr_vecs; - - if (max_vecs == 1) - return NULL; - - affinity_mask = kzalloc(cpumask_size(), GFP_KERNEL); - if (!affinity_mask) { - *nr_vecs = 1; - return NULL; - } - - get_online_cpus(); - if (max_vecs >= num_online_cpus()) { - cpumask_copy(affinity_mask, cpu_online_mask); - *nr_vecs = num_online_cpus(); - } else { - unsigned int vecs = 0, cpu; - - for_each_online_cpu(cpu) { - if (cpu == get_first_sibling(cpu)) { - cpumask_set_cpu(cpu, affinity_mask); - vecs++; - } - - if (--max_vecs == 0) - break; - } - *nr_vecs = vecs; - } - put_online_cpus(); - - return affinity_mask; -} From ee8d41e53efe14bfc5ea5866e1178b06d78a7c95 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 14 Sep 2016 16:18:51 +0200 Subject: [PATCH 35/42] pci/msi: Retrieve affinity for a vector Add a helper to get the affinity mask for a given PCI irq vector. For MSI or MSI-X vectors these are stored by the IRQ core, while for legacy interrupts we will always return cpu_possible_map. [hch: updated to follow the style of pci_irq_vector()] Signed-off-by: Thomas Gleixner Signed-off-by: Christoph Hellwig Cc: axboe@fb.com Cc: keith.busch@intel.com Cc: agordeev@redhat.com Cc: linux-block@vger.kernel.org Link: http://lkml.kernel.org/r/1473862739-15032-6-git-send-email-hch@lst.de Signed-off-by: Thomas Gleixner --- drivers/pci/msi.c | 31 +++++++++++++++++++++++++++++++ include/linux/pci.h | 6 ++++++ 2 files changed, 37 insertions(+) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 06100dde0e86cf..9da5ecb41f0b79 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -1270,6 +1270,37 @@ int pci_irq_vector(struct pci_dev *dev, unsigned int nr) } EXPORT_SYMBOL(pci_irq_vector); +/** + * pci_irq_get_affinity - return the affinity of a particular msi vector + * @dev: PCI device to operate on + * @nr: device-relative interrupt vector index (0-based). + */ +const struct cpumask *pci_irq_get_affinity(struct pci_dev *dev, int nr) +{ + if (dev->msix_enabled) { + struct msi_desc *entry; + int i = 0; + + for_each_pci_msi_entry(entry, dev) { + if (i == nr) + return entry->affinity; + i++; + } + WARN_ON_ONCE(1); + return NULL; + } else if (dev->msi_enabled) { + struct msi_desc *entry = first_pci_msi_entry(dev); + + if (WARN_ON_ONCE(!entry || nr >= entry->nvec_used)) + return NULL; + + return &entry->affinity[nr]; + } else { + return cpu_possible_mask; + } +} +EXPORT_SYMBOL(pci_irq_get_affinity); + struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc) { return to_pci_dev(desc->dev); diff --git a/include/linux/pci.h b/include/linux/pci.h index 0ab8359656696d..3b0a8004f31389 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1300,6 +1300,7 @@ int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs, unsigned int max_vecs, unsigned int flags); void pci_free_irq_vectors(struct pci_dev *dev); int pci_irq_vector(struct pci_dev *dev, unsigned int nr); +const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, int vec); #else static inline int pci_msi_vec_count(struct pci_dev *dev) { return -ENOSYS; } @@ -1342,6 +1343,11 @@ static inline int pci_irq_vector(struct pci_dev *dev, unsigned int nr) return -EINVAL; return dev->irq; } +static inline const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, + int vec) +{ + return cpu_possible_mask; +} #endif #ifdef CONFIG_PCIEPORTBUS From 0f4ed1580ce6c9499eba2a1ba013759700a5ed14 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Tue, 13 Sep 2016 17:54:27 +0100 Subject: [PATCH 36/42] irqchip/mips-gic: Use for_each_set_bit to iterate over local IRQs The MIPS GIC driver has previously iterated over bits set in a bitmap representing pending local IRQs by calling find_first_bit, clearing that bit then calling find_first_bit again until all bits are clear. If multiple interrupts are pending then this is wasteful, as find_first_bit will have to loop over the whole bitmap from the start. Use the for_each_set_bit macro which performs exactly what we need here instead. It will use find_next_bit and thus only scan over the relevant part of the bitmap, and it makes the intent of the code clearer. This makes the same change for local interrupts that commit cae750bae4e4 ("irqchip: mips-gic: Use for_each_set_bit to iterate over IRQs") made for shared interrupts. Signed-off-by: Paul Burton Cc: Marc Zyngier Cc: linux-mips@linux-mips.org Cc: Jason Cooper Link: http://lkml.kernel.org/r/20160913165427.31686-1-paul.burton@imgtec.com Signed-off-by: Thomas Gleixner --- drivers/irqchip/irq-mips-gic.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index 6185696405d5b6..8f7d38ba24c6e9 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -518,18 +518,13 @@ static void gic_handle_local_int(bool chained) bitmap_and(&pending, &pending, &masked, GIC_NUM_LOCAL_INTRS); - intr = find_first_bit(&pending, GIC_NUM_LOCAL_INTRS); - while (intr != GIC_NUM_LOCAL_INTRS) { + for_each_set_bit(intr, &pending, GIC_NUM_LOCAL_INTRS) { virq = irq_linear_revmap(gic_irq_domain, GIC_LOCAL_TO_HWIRQ(intr)); if (chained) generic_handle_irq(virq); else do_IRQ(virq); - - /* go to next pending bit */ - bitmap_clear(&pending, intr, 1); - intr = find_first_bit(&pending, GIC_NUM_LOCAL_INTRS); } } From 3027f78bb7243bef28c103507fc857e1471d769d Mon Sep 17 00:00:00 2001 From: Alexandre TORGUE Date: Tue, 20 Sep 2016 18:00:56 +0200 Subject: [PATCH 37/42] Documentation/dt-bindings: Document STM32 EXTI controller bindings Originally-from: Maxime Coquelin Signed-off-by: Alexandre TORGUE Cc: Mark Rutland Cc: devicetree@vger.kernel.org Cc: Daniel Thompson Cc: Jason Cooper Cc: arnd@arndb.de Cc: Marc Zyngier Cc: bruherrera@gmail.com Cc: Linus Walleij Cc: linux-gpio@vger.kernel.org Cc: Rob Herring Cc: lee.jones@linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1474387259-18926-2-git-send-email-alexandre.torgue@st.com Signed-off-by: Thomas Gleixner --- .../interrupt-controller/st,stm32-exti.txt | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.txt diff --git a/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.txt b/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.txt new file mode 100644 index 00000000000000..6e7703d4ff5b67 --- /dev/null +++ b/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.txt @@ -0,0 +1,20 @@ +STM32 External Interrupt Controller + +Required properties: + +- compatible: Should be "st,stm32-exti" +- reg: Specifies base physical address and size of the registers +- interrupt-controller: Indentifies the node as an interrupt controller +- #interrupt-cells: Specifies the number of cells to encode an interrupt + specifier, shall be 2 +- interrupts: interrupts references to primary interrupt controller + +Example: + +exti: interrupt-controller@40013c00 { + compatible = "st,stm32-exti"; + interrupt-controller; + #interrupt-cells = <2>; + reg = <0x40013C00 0x400>; + interrupts = <1>, <2>, <3>, <6>, <7>, <8>, <9>, <10>, <23>, <40>, <41>, <42>, <62>, <76>; +}; From e072041688ca73f125719815fa4b0fd23a45152c Mon Sep 17 00:00:00 2001 From: Alexandre TORGUE Date: Tue, 20 Sep 2016 18:00:57 +0200 Subject: [PATCH 38/42] drivers/irqchip: Add STM32 external interrupts support The STM32 external interrupt controller consists of edge detectors that generate interrupts requests or wake-up events. Each line can be independently configured as interrupt or wake-up source, and triggers either on rising, falling or both edges. Each line can also be masked independently. Originally-from: Maxime Coquelin Signed-off-by: Alexandre TORGUE Cc: Mark Rutland Cc: devicetree@vger.kernel.org Cc: Daniel Thompson Cc: Jason Cooper Cc: arnd@arndb.de Cc: Marc Zyngier Cc: bruherrera@gmail.com Cc: Linus Walleij Cc: linux-gpio@vger.kernel.org Cc: Rob Herring Cc: lee.jones@linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1474387259-18926-3-git-send-email-alexandre.torgue@st.com Signed-off-by: Thomas Gleixner --- drivers/irqchip/Kconfig | 4 + drivers/irqchip/Makefile | 1 + drivers/irqchip/irq-stm32-exti.c | 201 +++++++++++++++++++++++++++++++ 3 files changed, 206 insertions(+) create mode 100644 drivers/irqchip/irq-stm32-exti.c diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 9aeea1d8a57966..329c941e46b510 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -265,3 +265,7 @@ config EZNPS_GIC select IRQ_DOMAIN help Support the EZchip NPS400 global interrupt controller + +config STM32_EXTI + bool + select IRQ_DOMAIN diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index 4c203b6b816357..96383b22cffe5c 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -71,3 +71,4 @@ obj-$(CONFIG_MVEBU_ODMI) += irq-mvebu-odmi.o obj-$(CONFIG_LS_SCFG_MSI) += irq-ls-scfg-msi.o obj-$(CONFIG_EZNPS_GIC) += irq-eznps.o obj-$(CONFIG_ARCH_ASPEED) += irq-aspeed-vic.o +obj-$(CONFIG_STM32_EXTI) += irq-stm32-exti.o diff --git a/drivers/irqchip/irq-stm32-exti.c b/drivers/irqchip/irq-stm32-exti.c new file mode 100644 index 00000000000000..491568c95aa54f --- /dev/null +++ b/drivers/irqchip/irq-stm32-exti.c @@ -0,0 +1,201 @@ +/* + * Copyright (C) Maxime Coquelin 2015 + * Author: Maxime Coquelin + * License terms: GNU General Public License (GPL), version 2 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define EXTI_IMR 0x0 +#define EXTI_EMR 0x4 +#define EXTI_RTSR 0x8 +#define EXTI_FTSR 0xc +#define EXTI_SWIER 0x10 +#define EXTI_PR 0x14 + +static void stm32_irq_handler(struct irq_desc *desc) +{ + struct irq_domain *domain = irq_desc_get_handler_data(desc); + struct irq_chip_generic *gc = domain->gc->gc[0]; + struct irq_chip *chip = irq_desc_get_chip(desc); + unsigned long pending; + int n; + + chained_irq_enter(chip, desc); + + while ((pending = irq_reg_readl(gc, EXTI_PR))) { + for_each_set_bit(n, &pending, BITS_PER_LONG) { + generic_handle_irq(irq_find_mapping(domain, n)); + irq_reg_writel(gc, BIT(n), EXTI_PR); + } + } + + chained_irq_exit(chip, desc); +} + +static int stm32_irq_set_type(struct irq_data *data, unsigned int type) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data); + int pin = data->hwirq; + u32 rtsr, ftsr; + + irq_gc_lock(gc); + + rtsr = irq_reg_readl(gc, EXTI_RTSR); + ftsr = irq_reg_readl(gc, EXTI_FTSR); + + switch (type) { + case IRQ_TYPE_EDGE_RISING: + rtsr |= BIT(pin); + ftsr &= ~BIT(pin); + break; + case IRQ_TYPE_EDGE_FALLING: + rtsr &= ~BIT(pin); + ftsr |= BIT(pin); + break; + case IRQ_TYPE_EDGE_BOTH: + rtsr |= BIT(pin); + ftsr |= BIT(pin); + break; + default: + irq_gc_unlock(gc); + return -EINVAL; + } + + irq_reg_writel(gc, rtsr, EXTI_RTSR); + irq_reg_writel(gc, ftsr, EXTI_FTSR); + + irq_gc_unlock(gc); + + return 0; +} + +static int stm32_irq_set_wake(struct irq_data *data, unsigned int on) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data); + int pin = data->hwirq; + u32 emr; + + irq_gc_lock(gc); + + emr = irq_reg_readl(gc, EXTI_EMR); + if (on) + emr |= BIT(pin); + else + emr &= ~BIT(pin); + irq_reg_writel(gc, emr, EXTI_EMR); + + irq_gc_unlock(gc); + + return 0; +} + +static int stm32_exti_alloc(struct irq_domain *d, unsigned int virq, + unsigned int nr_irqs, void *data) +{ + struct irq_chip_generic *gc = d->gc->gc[0]; + struct irq_fwspec *fwspec = data; + irq_hw_number_t hwirq; + + hwirq = fwspec->param[0]; + + irq_map_generic_chip(d, virq, hwirq); + irq_domain_set_info(d, virq, hwirq, &gc->chip_types->chip, gc, + handle_simple_irq, NULL, NULL); + + return 0; +} + +static void stm32_exti_free(struct irq_domain *d, unsigned int virq, + unsigned int nr_irqs) +{ + struct irq_data *data = irq_domain_get_irq_data(d, virq); + + irq_domain_reset_irq_data(data); +} + +struct irq_domain_ops irq_exti_domain_ops = { + .map = irq_map_generic_chip, + .xlate = irq_domain_xlate_onetwocell, + .alloc = stm32_exti_alloc, + .free = stm32_exti_free, +}; + +static int __init stm32_exti_init(struct device_node *node, + struct device_node *parent) +{ + unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN; + int nr_irqs, nr_exti, ret, i; + struct irq_chip_generic *gc; + struct irq_domain *domain; + void *base; + + base = of_iomap(node, 0); + if (!base) { + pr_err("%s: Unable to map registers\n", node->full_name); + return -ENOMEM; + } + + /* Determine number of irqs supported */ + writel_relaxed(~0UL, base + EXTI_RTSR); + nr_exti = fls(readl_relaxed(base + EXTI_RTSR)); + writel_relaxed(0, base + EXTI_RTSR); + + pr_info("%s: %d External IRQs detected\n", node->full_name, nr_exti); + + domain = irq_domain_add_linear(node, nr_exti, + &irq_exti_domain_ops, NULL); + if (!domain) { + pr_err("%s: Could not register interrupt domain.\n", + node->name); + ret = -ENOMEM; + goto out_unmap; + } + + ret = irq_alloc_domain_generic_chips(domain, nr_exti, 1, "exti", + handle_edge_irq, clr, 0, 0); + if (ret) { + pr_err("%s: Could not allocate generic interrupt chip.\n", + node->full_name); + goto out_free_domain; + } + + gc = domain->gc->gc[0]; + gc->reg_base = base; + gc->chip_types->type = IRQ_TYPE_EDGE_BOTH; + gc->chip_types->chip.name = gc->chip_types[0].chip.name; + gc->chip_types->chip.irq_ack = irq_gc_ack_set_bit; + gc->chip_types->chip.irq_mask = irq_gc_mask_clr_bit; + gc->chip_types->chip.irq_unmask = irq_gc_mask_set_bit; + gc->chip_types->chip.irq_set_type = stm32_irq_set_type; + gc->chip_types->chip.irq_set_wake = stm32_irq_set_wake; + gc->chip_types->regs.ack = EXTI_PR; + gc->chip_types->regs.mask = EXTI_IMR; + gc->chip_types->handler = handle_edge_irq; + + nr_irqs = of_irq_count(node); + for (i = 0; i < nr_irqs; i++) { + unsigned int irq = irq_of_parse_and_map(node, i); + + irq_set_handler_data(irq, domain); + irq_set_chained_handler(irq, stm32_irq_handler); + } + + return 0; + +out_free_domain: + irq_domain_remove(domain); +out_unmap: + iounmap(base); + return ret; +} + +IRQCHIP_DECLARE(stm32_exti, "st,stm32-exti", stm32_exti_init); From 47f91519546ce39cceee2c51b0f5045eadc688a9 Mon Sep 17 00:00:00 2001 From: Alexandre TORGUE Date: Tue, 20 Sep 2016 18:00:58 +0200 Subject: [PATCH 39/42] ARM/STM32: Select external interrupts controller Originally-from: Maxime Coquelin Signed-off-by: Alexandre TORGUE Cc: Mark Rutland Cc: devicetree@vger.kernel.org Cc: Daniel Thompson Cc: Jason Cooper Cc: arnd@arndb.de Cc: Marc Zyngier Cc: bruherrera@gmail.com Cc: Linus Walleij Cc: linux-gpio@vger.kernel.org Cc: Rob Herring Cc: lee.jones@linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1474387259-18926-4-git-send-email-alexandre.torgue@st.com Signed-off-by: Thomas Gleixner --- arch/arm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index a9c4e48bb7ec99..bc9d6df8bab4ee 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -878,6 +878,7 @@ config ARCH_STM32 select CLKSRC_STM32 select PINCTRL select RESET_CONTROLLER + select STM32_EXTI help Support for STMicroelectronics STM32 processors. From 5a79d596378b65e773d93d00edcb57a33f87ea94 Mon Sep 17 00:00:00 2001 From: Alexandre TORGUE Date: Tue, 20 Sep 2016 18:00:59 +0200 Subject: [PATCH 40/42] ARM/dts: Add EXTI controller node to stm32f429 Originally-from: Maxime Coquelin Signed-off-by: Alexandre TORGUE Cc: Mark Rutland Cc: devicetree@vger.kernel.org Cc: Daniel Thompson Cc: Jason Cooper Cc: arnd@arndb.de Cc: Marc Zyngier Cc: bruherrera@gmail.com Cc: Linus Walleij Cc: linux-gpio@vger.kernel.org Cc: Rob Herring Cc: lee.jones@linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1474387259-18926-5-git-send-email-alexandre.torgue@st.com Signed-off-by: Thomas Gleixner --- arch/arm/boot/dts/stm32f429.dtsi | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm/boot/dts/stm32f429.dtsi b/arch/arm/boot/dts/stm32f429.dtsi index 35df462559cad3..1a189d44ad3818 100644 --- a/arch/arm/boot/dts/stm32f429.dtsi +++ b/arch/arm/boot/dts/stm32f429.dtsi @@ -176,6 +176,14 @@ reg = <0x40013800 0x400>; }; + exti: interrupt-controller@40013c00 { + compatible = "st,stm32-exti"; + interrupt-controller; + #interrupt-cells = <2>; + reg = <0x40013C00 0x400>; + interrupts = <1>, <2>, <3>, <6>, <7>, <8>, <9>, <10>, <23>, <40>, <41>, <42>, <62>, <76>; + }; + pin-controller { #address-cells = <1>; #size-cells = <1>; From b8129a1f6aaaca02d92186acf19ceb545b4b489a Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 25 Sep 2016 15:36:39 +0000 Subject: [PATCH 41/42] genirq: Make function __irq_do_set_handler() static Fixes the following sparse warning: kernel/irq/chip.c:786:1: warning: symbol '__irq_do_set_handler' was not declared. Should it be static? Signed-off-by: Wei Yongjun Link: http://lkml.kernel.org/r/1474817799-18676-1-git-send-email-weiyj.lk@gmail.com Signed-off-by: Thomas Gleixner --- kernel/irq/chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index d8dfdc630b7ecf..be3c34e4f2ac1b 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -782,7 +782,7 @@ void handle_percpu_devid_irq(struct irq_desc *desc) chip->irq_eoi(&desc->irq_data); } -void +static void __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle, int is_chained, const char *name) { From 4cd13c21b207e80ddb1144c576500098f2d5f882 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 31 Aug 2016 10:42:29 -0700 Subject: [PATCH 42/42] softirq: Let ksoftirqd do its job A while back, Paolo and Hannes sent an RFC patch adding threaded-able napi poll loop support : (https://patchwork.ozlabs.org/patch/620657/) The problem seems to be that softirqs are very aggressive and are often handled by the current process, even if we are under stress and that ksoftirqd was scheduled, so that innocent threads would have more chance to make progress. This patch makes sure that if ksoftirq is running, we let it perform the softirq work. Jonathan Corbet summarized the issue in https://lwn.net/Articles/687617/ Tested: - NIC receiving traffic handled by CPU 0 - UDP receiver running on CPU 0, using a single UDP socket. - Incoming flood of UDP packets targeting the UDP socket. Before the patch, the UDP receiver could almost never get CPU cycles and could only receive ~2,000 packets per second. After the patch, CPU cycles are split 50/50 between user application and ksoftirqd/0, and we can effectively read ~900,000 packets per second, a huge improvement in DOS situation. (Note that more packets are now dropped by the NIC itself, since the BH handlers get less CPU cycles to drain RX ring buffer) Since the load runs in well identified threads context, an admin can more easily tune process scheduling parameters if needed. Reported-by: Paolo Abeni Reported-by: Hannes Frederic Sowa Signed-off-by: Eric Dumazet Signed-off-by: Peter Zijlstra (Intel) Cc: David Miller Cc: Hannes Frederic Sowa Cc: Jesper Dangaard Brouer Cc: Jonathan Corbet Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1472665349.14381.356.camel@edumazet-glaptop3.roam.corp.google.com Signed-off-by: Ingo Molnar --- kernel/softirq.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/kernel/softirq.c b/kernel/softirq.c index 17caf4b63342d7..8ed90e3a88d620 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -77,6 +77,17 @@ static void wakeup_softirqd(void) wake_up_process(tsk); } +/* + * If ksoftirqd is scheduled, we do not want to process pending softirqs + * right now. Let ksoftirqd handle this at its own rate, to get fairness. + */ +static bool ksoftirqd_running(void) +{ + struct task_struct *tsk = __this_cpu_read(ksoftirqd); + + return tsk && (tsk->state == TASK_RUNNING); +} + /* * preempt_count and SOFTIRQ_OFFSET usage: * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving @@ -313,7 +324,7 @@ asmlinkage __visible void do_softirq(void) pending = local_softirq_pending(); - if (pending) + if (pending && !ksoftirqd_running()) do_softirq_own_stack(); local_irq_restore(flags); @@ -340,6 +351,9 @@ void irq_enter(void) static inline void invoke_softirq(void) { + if (ksoftirqd_running()) + return; + if (!force_irqthreads) { #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK /*