diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index d10944e619d3d2..cb8db4f9d09794 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -4391,6 +4391,22 @@ all such vmexits. Do not enable KVM_FEATURE_PV_UNHALT if you disable HLT exits. +7.14 KVM_CAP_S390_HPAGE_1M + +Architectures: s390 +Parameters: none +Returns: 0 on success, -EINVAL if hpage module parameter was not set + or cmma is enabled + +With this capability the KVM support for memory backing with 1m pages +through hugetlbfs can be enabled for a VM. After the capability is +enabled, cmma can't be enabled anymore and pfmfi and the storage key +interpretation are disabled. If cmma has already been enabled or the +hpage module parameter is not set to 1, -EINVAL is returned. + +While it is generally possible to create a huge page backed VM without +this capability, the VM will not be able to run. + 8. Other capabilities. ---------------------- diff --git a/MAINTAINERS b/MAINTAINERS index 629e08703c82b1..0a2342770dee2e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12408,7 +12408,6 @@ F: drivers/pci/hotplug/s390_pci_hpc.c S390 VFIO-CCW DRIVER M: Cornelia Huck -M: Dong Jia Shi M: Halil Pasic L: linux-s390@vger.kernel.org L: kvm@vger.kernel.org diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 4fe5b2affa2330..51524057693073 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -145,6 +145,7 @@ config S390 select HAVE_KERNEL_LZ4 select HAVE_KERNEL_LZMA select HAVE_KERNEL_LZO + select HAVE_KERNEL_UNCOMPRESSED select HAVE_KERNEL_XZ select HAVE_KPROBES select HAVE_KRETPROBES diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 68a690442be052..eee6703093c32e 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -14,8 +14,18 @@ LD_BFD := elf64-s390 LDFLAGS := -m elf64_s390 KBUILD_AFLAGS_MODULE += -fPIC KBUILD_CFLAGS_MODULE += -fPIC -KBUILD_CFLAGS += -m64 KBUILD_AFLAGS += -m64 +KBUILD_CFLAGS += -m64 +aflags_dwarf := -Wa,-gdwarf-2 +KBUILD_AFLAGS_DECOMPRESSOR := -m64 -D__ASSEMBLY__ +KBUILD_AFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),$(aflags_dwarf)) +KBUILD_CFLAGS_DECOMPRESSOR := -m64 -O2 +KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY +KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float +KBUILD_CFLAGS_DECOMPRESSOR += -fno-asynchronous-unwind-tables +KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-option,-ffreestanding) +KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g) +KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,)) UTS_MACHINE := s390x STACK_SIZE := 16384 CHECKFLAGS += -D__s390__ -D__s390x__ @@ -52,18 +62,14 @@ cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include # cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls -# old style option for packed stacks -ifeq ($(call cc-option-yn,-mkernel-backchain),y) -cflags-$(CONFIG_PACK_STACK) += -mkernel-backchain -D__PACK_STACK -aflags-$(CONFIG_PACK_STACK) += -D__PACK_STACK -endif - -# new style option for packed stacks ifeq ($(call cc-option-yn,-mpacked-stack),y) cflags-$(CONFIG_PACK_STACK) += -mpacked-stack -D__PACK_STACK aflags-$(CONFIG_PACK_STACK) += -D__PACK_STACK endif +KBUILD_AFLAGS_DECOMPRESSOR += $(aflags-y) +KBUILD_CFLAGS_DECOMPRESSOR += $(cflags-y) + ifeq ($(call cc-option-yn,-mstack-size=8192 -mstack-guard=128),y) cflags-$(CONFIG_CHECK_STACK) += -mstack-size=$(STACK_SIZE) ifneq ($(call cc-option-yn,-mstack-size=8192),y) @@ -71,8 +77,11 @@ cflags-$(CONFIG_CHECK_STACK) += -mstack-guard=$(CONFIG_STACK_GUARD) endif endif -ifeq ($(call cc-option-yn,-mwarn-dynamicstack),y) -cflags-$(CONFIG_WARN_DYNAMIC_STACK) += -mwarn-dynamicstack +ifdef CONFIG_WARN_DYNAMIC_STACK + ifeq ($(call cc-option-yn,-mwarn-dynamicstack),y) + KBUILD_CFLAGS += -mwarn-dynamicstack + KBUILD_CFLAGS_DECOMPRESSOR += -mwarn-dynamicstack + endif endif ifdef CONFIG_EXPOLINE @@ -82,6 +91,7 @@ ifdef CONFIG_EXPOLINE CC_FLAGS_EXPOLINE += -mindirect-branch-table export CC_FLAGS_EXPOLINE cflags-y += $(CC_FLAGS_EXPOLINE) -DCC_USING_EXPOLINE + aflags-y += -DCC_USING_EXPOLINE endif endif @@ -102,11 +112,12 @@ KBUILD_CFLAGS += -mbackchain -msoft-float $(cflags-y) KBUILD_CFLAGS += -pipe -fno-strength-reduce -Wno-sign-compare KBUILD_CFLAGS += -fno-asynchronous-unwind-tables $(cfi) KBUILD_AFLAGS += $(aflags-y) $(cfi) +export KBUILD_AFLAGS_DECOMPRESSOR +export KBUILD_CFLAGS_DECOMPRESSOR OBJCOPYFLAGS := -O binary -head-y := arch/s390/kernel/head.o -head-y += arch/s390/kernel/head64.o +head-y := arch/s390/kernel/head64.o # See arch/s390/Kbuild for content of core part of the kernel core-y += arch/s390/ @@ -121,7 +132,7 @@ boot := arch/s390/boot syscalls := arch/s390/kernel/syscalls tools := arch/s390/tools -all: image bzImage +all: bzImage #KBUILD_IMAGE is necessary for packaging targets like rpm-pkg, deb-pkg... KBUILD_IMAGE := $(boot)/bzImage @@ -129,7 +140,7 @@ KBUILD_IMAGE := $(boot)/bzImage install: vmlinux $(Q)$(MAKE) $(build)=$(boot) $@ -image bzImage: vmlinux +bzImage: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ zfcpdump: @@ -152,8 +163,7 @@ archprepare: # Don't use tabs in echo arguments define archhelp - echo '* image - Kernel image for IPL ($(boot)/image)' - echo '* bzImage - Compressed kernel image for IPL ($(boot)/bzImage)' + echo '* bzImage - Kernel image for IPL ($(boot)/bzImage)' echo ' install - Install kernel using' echo ' (your) ~/bin/$(INSTALLKERNEL) or' echo ' (distribution) /sbin/$(INSTALLKERNEL) or' diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index ee6a9c387c8796..9bf8489df6e62d 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -206,35 +206,28 @@ static int appldata_timer_handler(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - unsigned int len; - char buf[2]; + int timer_active = appldata_timer_active; + int zero = 0; + int one = 1; + int rc; + struct ctl_table ctl_entry = { + .procname = ctl->procname, + .data = &timer_active, + .maxlen = sizeof(int), + .extra1 = &zero, + .extra2 = &one, + }; + + rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos); + if (rc < 0 || !write) + return rc; - if (!*lenp || *ppos) { - *lenp = 0; - return 0; - } - if (!write) { - strncpy(buf, appldata_timer_active ? "1\n" : "0\n", - ARRAY_SIZE(buf)); - len = strnlen(buf, ARRAY_SIZE(buf)); - if (len > *lenp) - len = *lenp; - if (copy_to_user(buffer, buf, len)) - return -EFAULT; - goto out; - } - len = *lenp; - if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len)) - return -EFAULT; spin_lock(&appldata_timer_lock); - if (buf[0] == '1') + if (timer_active) __appldata_vtimer_setup(APPLDATA_ADD_TIMER); - else if (buf[0] == '0') + else __appldata_vtimer_setup(APPLDATA_DEL_TIMER); spin_unlock(&appldata_timer_lock); -out: - *lenp = len; - *ppos += len; return 0; } @@ -248,37 +241,24 @@ static int appldata_interval_handler(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - unsigned int len; - int interval; - char buf[16]; + int interval = appldata_interval; + int one = 1; + int rc; + struct ctl_table ctl_entry = { + .procname = ctl->procname, + .data = &interval, + .maxlen = sizeof(int), + .extra1 = &one, + }; - if (!*lenp || *ppos) { - *lenp = 0; - return 0; - } - if (!write) { - len = sprintf(buf, "%i\n", appldata_interval); - if (len > *lenp) - len = *lenp; - if (copy_to_user(buffer, buf, len)) - return -EFAULT; - goto out; - } - len = *lenp; - if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len)) - return -EFAULT; - interval = 0; - sscanf(buf, "%i", &interval); - if (interval <= 0) - return -EINVAL; + rc = proc_dointvec_minmax(&ctl_entry, write, buffer, lenp, ppos); + if (rc < 0 || !write) + return rc; spin_lock(&appldata_timer_lock); appldata_interval = interval; __appldata_vtimer_setup(APPLDATA_MOD_TIMER); spin_unlock(&appldata_timer_lock); -out: - *lenp = len; - *ppos += len; return 0; } @@ -293,10 +273,17 @@ appldata_generic_handler(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct appldata_ops *ops = NULL, *tmp_ops; - unsigned int len; - int rc, found; - char buf[2]; struct list_head *lh; + int rc, found; + int active; + int zero = 0; + int one = 1; + struct ctl_table ctl_entry = { + .data = &active, + .maxlen = sizeof(int), + .extra1 = &zero, + .extra2 = &one, + }; found = 0; mutex_lock(&appldata_ops_mutex); @@ -317,31 +304,15 @@ appldata_generic_handler(struct ctl_table *ctl, int write, } mutex_unlock(&appldata_ops_mutex); - if (!*lenp || *ppos) { - *lenp = 0; + active = ops->active; + rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos); + if (rc < 0 || !write) { module_put(ops->owner); - return 0; - } - if (!write) { - strncpy(buf, ops->active ? "1\n" : "0\n", ARRAY_SIZE(buf)); - len = strnlen(buf, ARRAY_SIZE(buf)); - if (len > *lenp) - len = *lenp; - if (copy_to_user(buffer, buf, len)) { - module_put(ops->owner); - return -EFAULT; - } - goto out; - } - len = *lenp; - if (copy_from_user(buf, buffer, - len > sizeof(buf) ? sizeof(buf) : len)) { - module_put(ops->owner); - return -EFAULT; + return rc; } mutex_lock(&appldata_ops_mutex); - if ((buf[0] == '1') && (ops->active == 0)) { + if (active && (ops->active == 0)) { // protect work queue callback if (!try_module_get(ops->owner)) { mutex_unlock(&appldata_ops_mutex); @@ -359,7 +330,7 @@ appldata_generic_handler(struct ctl_table *ctl, int write, module_put(ops->owner); } else ops->active = 1; - } else if ((buf[0] == '0') && (ops->active == 1)) { + } else if (!active && (ops->active == 1)) { ops->active = 0; rc = appldata_diag(ops->record_nr, APPLDATA_STOP_REC, (unsigned long) ops->data, ops->size, @@ -370,9 +341,6 @@ appldata_generic_handler(struct ctl_table *ctl, int write, module_put(ops->owner); } mutex_unlock(&appldata_ops_mutex); -out: - *lenp = len; - *ppos += len; module_put(ops->owner); return 0; } diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index d1fa37fcce8338..9e6668ee93de83 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -3,19 +3,52 @@ # Makefile for the linux s390-specific parts of the memory manager. # -targets := image -targets += bzImage -subdir- := compressed +KCOV_INSTRUMENT := n +GCOV_PROFILE := n +UBSAN_SANITIZE := n -$(obj)/image: vmlinux FORCE - $(call if_changed,objcopy) +KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR) +KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR) + +# +# Use -march=z900 for als.c to be able to print an error +# message if the kernel is started on a machine which is too old +# +ifneq ($(CC_FLAGS_MARCH),-march=z900) +AFLAGS_REMOVE_head.o += $(CC_FLAGS_MARCH) +AFLAGS_head.o += -march=z900 +AFLAGS_REMOVE_mem.o += $(CC_FLAGS_MARCH) +AFLAGS_mem.o += -march=z900 +CFLAGS_REMOVE_als.o += $(CC_FLAGS_MARCH) +CFLAGS_als.o += -march=z900 +CFLAGS_REMOVE_sclp_early_core.o += $(CC_FLAGS_MARCH) +CFLAGS_sclp_early_core.o += -march=z900 +endif + +CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char + +obj-y := head.o als.o ebcdic.o sclp_early_core.o mem.o +targets := bzImage startup.a $(obj-y) +subdir- := compressed + +OBJECTS := $(addprefix $(obj)/,$(obj-y)) $(obj)/bzImage: $(obj)/compressed/vmlinux FORCE $(call if_changed,objcopy) -$(obj)/compressed/vmlinux: FORCE +$(obj)/compressed/vmlinux: $(obj)/startup.a FORCE $(Q)$(MAKE) $(build)=$(obj)/compressed $@ +quiet_cmd_ar = AR $@ + cmd_ar = rm -f $@; $(AR) rcsTP$(KBUILD_ARFLAGS) $@ $(filter $(OBJECTS), $^) + +$(obj)/startup.a: $(OBJECTS) FORCE + $(call if_changed,ar) + install: $(CONFIGURE) $(obj)/bzImage sh -x $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/bzImage \ System.map "$(INSTALL_PATH)" + +chkbss := $(OBJECTS) +chkbss-target := $(obj)/startup.a +include $(srctree)/arch/s390/scripts/Makefile.chkbss diff --git a/arch/s390/kernel/als.c b/arch/s390/boot/als.c similarity index 83% rename from arch/s390/kernel/als.c rename to arch/s390/boot/als.c index d1892bf36cabfa..d592e0d90d9fbb 100644 --- a/arch/s390/kernel/als.c +++ b/arch/s390/boot/als.c @@ -3,12 +3,10 @@ * Copyright IBM Corp. 2016 */ #include -#include #include #include #include #include -#include "entry.h" /* * The code within this file will be called very early. It may _not_ @@ -18,9 +16,9 @@ * For temporary objects the stack (16k) should be used. */ -static unsigned long als[] __initdata = { FACILITIES_ALS }; +static unsigned long als[] = { FACILITIES_ALS }; -static void __init u16_to_hex(char *str, u16 val) +static void u16_to_hex(char *str, u16 val) { int i, num; @@ -33,9 +31,9 @@ static void __init u16_to_hex(char *str, u16 val) *str = '\0'; } -static void __init print_machine_type(void) +static void print_machine_type(void) { - static char mach_str[80] __initdata = "Detected machine-type number: "; + static char mach_str[80] = "Detected machine-type number: "; char type_str[5]; struct cpuid id; @@ -46,7 +44,7 @@ static void __init print_machine_type(void) sclp_early_printk(mach_str); } -static void __init u16_to_decimal(char *str, u16 val) +static void u16_to_decimal(char *str, u16 val) { int div = 1; @@ -60,9 +58,9 @@ static void __init u16_to_decimal(char *str, u16 val) *str = '\0'; } -static void __init print_missing_facilities(void) +static void print_missing_facilities(void) { - static char als_str[80] __initdata = "Missing facilities: "; + static char als_str[80] = "Missing facilities: "; unsigned long val; char val_str[6]; int i, j, first; @@ -95,7 +93,7 @@ static void __init print_missing_facilities(void) sclp_early_printk("See Principles of Operations for facility bits\n"); } -static void __init facility_mismatch(void) +static void facility_mismatch(void) { sclp_early_printk("The Linux kernel requires more recent processor hardware\n"); print_machine_type(); @@ -103,7 +101,7 @@ static void __init facility_mismatch(void) disabled_wait(0x8badcccc); } -void __init verify_facilities(void) +void verify_facilities(void) { int i; diff --git a/arch/s390/boot/compressed/.gitignore b/arch/s390/boot/compressed/.gitignore index 2088cc14062911..45aeb4f087520e 100644 --- a/arch/s390/boot/compressed/.gitignore +++ b/arch/s390/boot/compressed/.gitignore @@ -1,4 +1,5 @@ sizes.h vmlinux vmlinux.lds +vmlinux.scr.lds vmlinux.bin.full diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 5766f7b9b2710e..04609478d18b99 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -6,39 +6,29 @@ # KCOV_INSTRUMENT := n +GCOV_PROFILE := n +UBSAN_SANITIZE := n +obj-y := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,head.o misc.o) piggy.o targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4 -targets += misc.o piggy.o sizes.h head.o - -KBUILD_CFLAGS := -m64 -D__KERNEL__ -O2 -KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY -KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks -msoft-float -KBUILD_CFLAGS += -fno-asynchronous-unwind-tables -KBUILD_CFLAGS += $(call cc-option,-mpacked-stack) -KBUILD_CFLAGS += $(call cc-option,-ffreestanding) +targets += vmlinux.scr.lds $(obj-y) $(if $(CONFIG_KERNEL_UNCOMPRESSED),,sizes.h) -GCOV_PROFILE := n -UBSAN_SANITIZE := n +KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR) +KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR) -OBJECTS := $(addprefix $(objtree)/arch/s390/kernel/, head.o ebcdic.o als.o) -OBJECTS += $(objtree)/drivers/s390/char/sclp_early_core.o -OBJECTS += $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o +OBJECTS := $(addprefix $(obj)/,$(obj-y)) LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup -T -$(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS) +$(obj)/vmlinux: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS) $(call if_changed,ld) -TRIM_HEAD_SIZE := 0x11000 - -sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\)$$/\#define SZ\2 (0x\1 - $(TRIM_HEAD_SIZE))/p' +# extract required uncompressed vmlinux symbols and adjust them to reflect offsets inside vmlinux.bin +sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\)$$/\#define SZ\2 (0x\1 - 0x100000)/p' quiet_cmd_sizes = GEN $@ cmd_sizes = $(NM) $< | sed -n $(sed-sizes) > $@ -quiet_cmd_trim_head = TRIM $@ - cmd_trim_head = tail -c +$$(($(TRIM_HEAD_SIZE) + 1)) $< > $@ - $(obj)/sizes.h: vmlinux $(call if_changed,sizes) @@ -48,21 +38,18 @@ $(obj)/head.o: $(obj)/sizes.h CFLAGS_misc.o += -I$(objtree)/$(obj) $(obj)/misc.o: $(obj)/sizes.h -OBJCOPYFLAGS_vmlinux.bin.full := -R .comment -S -$(obj)/vmlinux.bin.full: vmlinux +OBJCOPYFLAGS_vmlinux.bin := -R .comment -S +$(obj)/vmlinux.bin: vmlinux $(call if_changed,objcopy) -$(obj)/vmlinux.bin: $(obj)/vmlinux.bin.full - $(call if_changed,trim_head) - vmlinux.bin.all-y := $(obj)/vmlinux.bin -suffix-$(CONFIG_KERNEL_GZIP) := gz -suffix-$(CONFIG_KERNEL_BZIP2) := bz2 -suffix-$(CONFIG_KERNEL_LZ4) := lz4 -suffix-$(CONFIG_KERNEL_LZMA) := lzma -suffix-$(CONFIG_KERNEL_LZO) := lzo -suffix-$(CONFIG_KERNEL_XZ) := xz +suffix-$(CONFIG_KERNEL_GZIP) := .gz +suffix-$(CONFIG_KERNEL_BZIP2) := .bz2 +suffix-$(CONFIG_KERNEL_LZ4) := .lz4 +suffix-$(CONFIG_KERNEL_LZMA) := .lzma +suffix-$(CONFIG_KERNEL_LZO) := .lzo +suffix-$(CONFIG_KERNEL_XZ) := .xz $(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) $(call if_changed,gzip) @@ -78,5 +65,9 @@ $(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) $(call if_changed,xzkern) LDFLAGS_piggy.o := -r --format binary --oformat $(LD_BFD) -T -$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix-y) +$(obj)/piggy.o: $(obj)/vmlinux.scr.lds $(obj)/vmlinux.bin$(suffix-y) $(call if_changed,ld) + +chkbss := $(filter-out $(obj)/misc.o $(obj)/piggy.o,$(OBJECTS)) +chkbss-target := $(obj)/vmlinux.bin +include $(srctree)/arch/s390/scripts/Makefile.chkbss diff --git a/arch/s390/boot/compressed/head.S b/arch/s390/boot/compressed/head.S index 9f94eca0f467c8..df8dbbc17bccdd 100644 --- a/arch/s390/boot/compressed/head.S +++ b/arch/s390/boot/compressed/head.S @@ -15,7 +15,7 @@ #include "sizes.h" __HEAD -ENTRY(startup_continue) +ENTRY(startup_decompressor) basr %r13,0 # get base .LPG1: # setup stack @@ -23,7 +23,7 @@ ENTRY(startup_continue) aghi %r15,-160 brasl %r14,decompress_kernel # Set up registers for memory mover. We move the decompressed image to - # 0x11000, where startup_continue of the decompressed image is supposed + # 0x100000, where startup_continue of the decompressed image is supposed # to be. lgr %r4,%r2 lg %r2,.Loffset-.LPG1(%r13) @@ -33,7 +33,7 @@ ENTRY(startup_continue) la %r1,0x200 mvc 0(mover_end-mover,%r1),mover-.LPG1(%r13) # When the memory mover is done we pass control to - # arch/s390/kernel/head64.S:startup_continue which lives at 0x11000 in + # arch/s390/kernel/head64.S:startup_continue which lives at 0x100000 in # the decompressed image. lgr %r6,%r2 br %r1 @@ -47,6 +47,6 @@ mover_end: .Lstack: .quad 0x8000 + (1<<(PAGE_SHIFT+THREAD_SIZE_ORDER)) .Loffset: - .quad 0x11000 + .quad 0x100000 .Lmvsize: .quad SZ__bss_start diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c index 511b2cc9b91ad4..f66ad73c205b79 100644 --- a/arch/s390/boot/compressed/misc.c +++ b/arch/s390/boot/compressed/misc.c @@ -71,43 +71,6 @@ static int puts(const char *s) return 0; } -void *memset(void *s, int c, size_t n) -{ - char *xs; - - xs = s; - while (n--) - *xs++ = c; - return s; -} - -void *memcpy(void *dest, const void *src, size_t n) -{ - const char *s = src; - char *d = dest; - - while (n--) - *d++ = *s++; - return dest; -} - -void *memmove(void *dest, const void *src, size_t n) -{ - const char *s = src; - char *d = dest; - - if (d <= s) { - while (n--) - *d++ = *s++; - } else { - d += n; - s += n; - while (n--) - *--d = *--s; - } - return dest; -} - static void error(char *x) { unsigned long long psw = 0x000a0000deadbeefULL; diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S index d43c2db12d306b..b16ac8b3c43939 100644 --- a/arch/s390/boot/compressed/vmlinux.lds.S +++ b/arch/s390/boot/compressed/vmlinux.lds.S @@ -23,13 +23,10 @@ SECTIONS *(.text.*) _etext = . ; } - .rodata.compressed : { - *(.rodata.compressed) - } .rodata : { _rodata = . ; *(.rodata) /* read-only data */ - *(.rodata.*) + *(EXCLUDE_FILE (*piggy.o) .rodata.compressed) _erodata = . ; } .data : { @@ -38,6 +35,15 @@ SECTIONS *(.data.*) _edata = . ; } + startup_continue = 0x100000; +#ifdef CONFIG_KERNEL_UNCOMPRESSED + . = 0x100000; +#else + . = ALIGN(8); +#endif + .rodata.compressed : { + *(.rodata.compressed) + } . = ALIGN(256); .bss : { _bss = . ; @@ -54,5 +60,6 @@ SECTIONS *(.eh_frame) *(__ex_table) *(*__ksymtab*) + *(___kcrctab*) } } diff --git a/arch/s390/boot/compressed/vmlinux.scr b/arch/s390/boot/compressed/vmlinux.scr.lds.S similarity index 70% rename from arch/s390/boot/compressed/vmlinux.scr rename to arch/s390/boot/compressed/vmlinux.scr.lds.S index 42a242597f3463..ff01d18c922205 100644 --- a/arch/s390/boot/compressed/vmlinux.scr +++ b/arch/s390/boot/compressed/vmlinux.scr.lds.S @@ -2,10 +2,14 @@ SECTIONS { .rodata.compressed : { +#ifndef CONFIG_KERNEL_UNCOMPRESSED input_len = .; LONG(input_data_end - input_data) input_data = .; +#endif *(.data) +#ifndef CONFIG_KERNEL_UNCOMPRESSED output_len = . - 4; input_data_end = .; +#endif } } diff --git a/arch/s390/boot/ebcdic.c b/arch/s390/boot/ebcdic.c new file mode 100644 index 00000000000000..7391e7d3608656 --- /dev/null +++ b/arch/s390/boot/ebcdic.c @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../kernel/ebcdic.c" diff --git a/arch/s390/kernel/head.S b/arch/s390/boot/head.S similarity index 98% rename from arch/s390/kernel/head.S rename to arch/s390/boot/head.S index 5c42f16a54c4e1..f721913b73f10c 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/boot/head.S @@ -272,14 +272,14 @@ iplstart: .org 0x10000 ENTRY(startup) j .Lep_startup_normal - .org 0x10008 + .org EP_OFFSET # # This is a list of s390 kernel entry points. At address 0x1000f the number of # valid entry points is stored. # # IMPORTANT: Do not change this table, it is s390 kernel ABI! # - .ascii "S390EP" + .ascii EP_STRING .byte 0x00,0x01 # # kdump startup-code at 0x10010, running in 64 bit absolute addressing mode @@ -310,10 +310,11 @@ ENTRY(startup_kdump) l %r15,.Lstack-.LPG0(%r13) ahi %r15,-STACK_FRAME_OVERHEAD brasl %r14,verify_facilities -# For uncompressed images, continue in -# arch/s390/kernel/head64.S. For compressed images, continue in -# arch/s390/boot/compressed/head.S. +#ifdef CONFIG_KERNEL_UNCOMPRESSED jg startup_continue +#else + jg startup_decompressor +#endif .Lstack: .long 0x8000 + (1<<(PAGE_SHIFT+THREAD_SIZE_ORDER)) diff --git a/arch/s390/kernel/head_kdump.S b/arch/s390/boot/head_kdump.S similarity index 100% rename from arch/s390/kernel/head_kdump.S rename to arch/s390/boot/head_kdump.S diff --git a/arch/s390/boot/mem.S b/arch/s390/boot/mem.S new file mode 100644 index 00000000000000..b33463633f03e4 --- /dev/null +++ b/arch/s390/boot/mem.S @@ -0,0 +1,2 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include "../lib/mem.S" diff --git a/arch/s390/boot/sclp_early_core.c b/arch/s390/boot/sclp_early_core.c new file mode 100644 index 00000000000000..5a19fd7020b5e8 --- /dev/null +++ b/arch/s390/boot/sclp_early_core.c @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../../drivers/s390/char/sclp_early_core.c" diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index a2945b289a2928..3452e18bb1ca8a 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -497,7 +497,7 @@ static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info) } diag224_idx2name(cpu_info__ctidx(diag204_info_type, cpu_info), buffer); rc = hypfs_create_str(cpu_dir, "type", buffer); - return PTR_RET(rc); + return PTR_ERR_OR_ZERO(rc); } static void *hypfs_create_lpar_files(struct dentry *systems_dir, void *part_hdr) @@ -544,7 +544,7 @@ static int hypfs_create_phys_cpu_files(struct dentry *cpus_dir, void *cpu_info) return PTR_ERR(rc); diag224_idx2name(phys_cpu__ctidx(diag204_info_type, cpu_info), buffer); rc = hypfs_create_str(cpu_dir, "type", buffer); - return PTR_RET(rc); + return PTR_ERR_OR_ZERO(rc); } static void *hypfs_create_phys_files(struct dentry *parent_dir, void *phys_hdr) diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 06b513d192b9bc..c681329fdeec6b 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -36,7 +36,7 @@ struct hypfs_sb_info { kuid_t uid; /* uid used for files and dirs */ kgid_t gid; /* gid used for files and dirs */ struct dentry *update_file; /* file to trigger update */ - time_t last_update; /* last update time in secs since 1970 */ + time64_t last_update; /* last update, CLOCK_MONOTONIC time */ struct mutex lock; /* lock to protect update process */ }; @@ -52,7 +52,7 @@ static void hypfs_update_update(struct super_block *sb) struct hypfs_sb_info *sb_info = sb->s_fs_info; struct inode *inode = d_inode(sb_info->update_file); - sb_info->last_update = get_seconds(); + sb_info->last_update = ktime_get_seconds(); inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); } @@ -179,7 +179,7 @@ static ssize_t hypfs_write_iter(struct kiocb *iocb, struct iov_iter *from) * to restart data collection in this case. */ mutex_lock(&fs_info->lock); - if (fs_info->last_update == get_seconds()) { + if (fs_info->last_update == ktime_get_seconds()) { rc = -EBUSY; goto out; } diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h index c1bedb4c8de083..046e044a48d096 100644 --- a/arch/s390/include/asm/ap.h +++ b/arch/s390/include/asm/ap.h @@ -46,6 +46,50 @@ struct ap_queue_status { unsigned int _pad2 : 16; }; +/** + * ap_intructions_available() - Test if AP instructions are available. + * + * Returns 0 if the AP instructions are installed. + */ +static inline int ap_instructions_available(void) +{ + register unsigned long reg0 asm ("0") = AP_MKQID(0, 0); + register unsigned long reg1 asm ("1") = -ENODEV; + register unsigned long reg2 asm ("2"); + + asm volatile( + " .long 0xb2af0000\n" /* PQAP(TAPQ) */ + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b, 1b) + : "+d" (reg1), "=d" (reg2) + : "d" (reg0) + : "cc"); + return reg1; +} + +/** + * ap_tapq(): Test adjunct processor queue. + * @qid: The AP queue number + * @info: Pointer to queue descriptor + * + * Returns AP queue status structure. + */ +static inline struct ap_queue_status ap_tapq(ap_qid_t qid, unsigned long *info) +{ + register unsigned long reg0 asm ("0") = qid; + register struct ap_queue_status reg1 asm ("1"); + register unsigned long reg2 asm ("2"); + + asm volatile(".long 0xb2af0000" /* PQAP(TAPQ) */ + : "=d" (reg1), "=d" (reg2) + : "d" (reg0) + : "cc"); + if (info) + *info = reg2; + return reg1; +} + /** * ap_test_queue(): Test adjunct processor queue. * @qid: The AP queue number @@ -54,10 +98,57 @@ struct ap_queue_status { * * Returns AP queue status structure. */ -struct ap_queue_status ap_test_queue(ap_qid_t qid, - int tbit, - unsigned long *info); +static inline struct ap_queue_status ap_test_queue(ap_qid_t qid, + int tbit, + unsigned long *info) +{ + if (tbit) + qid |= 1UL << 23; /* set T bit*/ + return ap_tapq(qid, info); +} +/** + * ap_pqap_rapq(): Reset adjunct processor queue. + * @qid: The AP queue number + * + * Returns AP queue status structure. + */ +static inline struct ap_queue_status ap_rapq(ap_qid_t qid) +{ + register unsigned long reg0 asm ("0") = qid | (1UL << 24); + register struct ap_queue_status reg1 asm ("1"); + + asm volatile( + ".long 0xb2af0000" /* PQAP(RAPQ) */ + : "=d" (reg1) + : "d" (reg0) + : "cc"); + return reg1; +} + +/** + * ap_pqap_zapq(): Reset and zeroize adjunct processor queue. + * @qid: The AP queue number + * + * Returns AP queue status structure. + */ +static inline struct ap_queue_status ap_zapq(ap_qid_t qid) +{ + register unsigned long reg0 asm ("0") = qid | (2UL << 24); + register struct ap_queue_status reg1 asm ("1"); + + asm volatile( + ".long 0xb2af0000" /* PQAP(ZAPQ) */ + : "=d" (reg1) + : "d" (reg0) + : "cc"); + return reg1; +} + +/** + * struct ap_config_info - convenience struct for AP crypto + * config info as returned by the ap_qci() function. + */ struct ap_config_info { unsigned int apsc : 1; /* S bit */ unsigned int apxa : 1; /* N bit */ @@ -74,50 +165,189 @@ struct ap_config_info { unsigned char _reserved4[16]; } __aligned(8); -/* - * ap_query_configuration(): Fetch cryptographic config info +/** + * ap_qci(): Get AP configuration data * - * Returns the ap configuration info fetched via PQAP(QCI). - * On success 0 is returned, on failure a negative errno - * is returned, e.g. if the PQAP(QCI) instruction is not - * available, the return value will be -EOPNOTSUPP. + * Returns 0 on success, or -EOPNOTSUPP. */ -int ap_query_configuration(struct ap_config_info *info); +static inline int ap_qci(struct ap_config_info *config) +{ + register unsigned long reg0 asm ("0") = 4UL << 24; + register unsigned long reg1 asm ("1") = -EOPNOTSUPP; + register struct ap_config_info *reg2 asm ("2") = config; + + asm volatile( + ".long 0xb2af0000\n" /* PQAP(QCI) */ + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b, 1b) + : "+d" (reg1) + : "d" (reg0), "d" (reg2) + : "cc", "memory"); + + return reg1; +} /* * struct ap_qirq_ctrl - convenient struct for easy invocation - * of the ap_queue_irq_ctrl() function. This struct is passed - * as GR1 parameter to the PQAP(AQIC) instruction. For details - * please see the AR documentation. + * of the ap_aqic() function. This struct is passed as GR1 + * parameter to the PQAP(AQIC) instruction. For details please + * see the AR documentation. */ struct ap_qirq_ctrl { unsigned int _res1 : 8; - unsigned int zone : 8; /* zone info */ - unsigned int ir : 1; /* ir flag: enable (1) or disable (0) irq */ + unsigned int zone : 8; /* zone info */ + unsigned int ir : 1; /* ir flag: enable (1) or disable (0) irq */ unsigned int _res2 : 4; - unsigned int gisc : 3; /* guest isc field */ + unsigned int gisc : 3; /* guest isc field */ unsigned int _res3 : 6; - unsigned int gf : 2; /* gisa format */ + unsigned int gf : 2; /* gisa format */ unsigned int _res4 : 1; - unsigned int gisa : 27; /* gisa origin */ + unsigned int gisa : 27; /* gisa origin */ unsigned int _res5 : 1; - unsigned int isc : 3; /* irq sub class */ + unsigned int isc : 3; /* irq sub class */ }; /** - * ap_queue_irq_ctrl(): Control interruption on a AP queue. + * ap_aqic(): Control interruption for a specific AP. * @qid: The AP queue number - * @qirqctrl: struct ap_qirq_ctrl, see above + * @qirqctrl: struct ap_qirq_ctrl (64 bit value) * @ind: The notification indicator byte * * Returns AP queue status. + */ +static inline struct ap_queue_status ap_aqic(ap_qid_t qid, + struct ap_qirq_ctrl qirqctrl, + void *ind) +{ + register unsigned long reg0 asm ("0") = qid | (3UL << 24); + register struct ap_qirq_ctrl reg1_in asm ("1") = qirqctrl; + register struct ap_queue_status reg1_out asm ("1"); + register void *reg2 asm ("2") = ind; + + asm volatile( + ".long 0xb2af0000" /* PQAP(AQIC) */ + : "=d" (reg1_out) + : "d" (reg0), "d" (reg1_in), "d" (reg2) + : "cc"); + return reg1_out; +} + +/* + * union ap_qact_ap_info - used together with the + * ap_aqic() function to provide a convenient way + * to handle the ap info needed by the qact function. + */ +union ap_qact_ap_info { + unsigned long val; + struct { + unsigned int : 3; + unsigned int mode : 3; + unsigned int : 26; + unsigned int cat : 8; + unsigned int : 8; + unsigned char ver[2]; + }; +}; + +/** + * ap_qact(): Query AP combatibility type. + * @qid: The AP queue number + * @apinfo: On input the info about the AP queue. On output the + * alternate AP queue info provided by the qact function + * in GR2 is stored in. * - * Control interruption on the given AP queue. - * Just a simple wrapper function for the low level PQAP(AQIC) - * instruction available for other kernel modules. + * Returns AP queue status. Check response_code field for failures. */ -struct ap_queue_status ap_queue_irq_ctrl(ap_qid_t qid, - struct ap_qirq_ctrl qirqctrl, - void *ind); +static inline struct ap_queue_status ap_qact(ap_qid_t qid, int ifbit, + union ap_qact_ap_info *apinfo) +{ + register unsigned long reg0 asm ("0") = qid | (5UL << 24) + | ((ifbit & 0x01) << 22); + register unsigned long reg1_in asm ("1") = apinfo->val; + register struct ap_queue_status reg1_out asm ("1"); + register unsigned long reg2 asm ("2"); + + asm volatile( + ".long 0xb2af0000" /* PQAP(QACT) */ + : "+d" (reg1_in), "=d" (reg1_out), "=d" (reg2) + : "d" (reg0) + : "cc"); + apinfo->val = reg2; + return reg1_out; +} + +/** + * ap_nqap(): Send message to adjunct processor queue. + * @qid: The AP queue number + * @psmid: The program supplied message identifier + * @msg: The message text + * @length: The message length + * + * Returns AP queue status structure. + * Condition code 1 on NQAP can't happen because the L bit is 1. + * Condition code 2 on NQAP also means the send is incomplete, + * because a segment boundary was reached. The NQAP is repeated. + */ +static inline struct ap_queue_status ap_nqap(ap_qid_t qid, + unsigned long long psmid, + void *msg, size_t length) +{ + register unsigned long reg0 asm ("0") = qid | 0x40000000UL; + register struct ap_queue_status reg1 asm ("1"); + register unsigned long reg2 asm ("2") = (unsigned long) msg; + register unsigned long reg3 asm ("3") = (unsigned long) length; + register unsigned long reg4 asm ("4") = (unsigned int) (psmid >> 32); + register unsigned long reg5 asm ("5") = psmid & 0xffffffff; + + asm volatile ( + "0: .long 0xb2ad0042\n" /* NQAP */ + " brc 2,0b" + : "+d" (reg0), "=d" (reg1), "+d" (reg2), "+d" (reg3) + : "d" (reg4), "d" (reg5) + : "cc", "memory"); + return reg1; +} + +/** + * ap_dqap(): Receive message from adjunct processor queue. + * @qid: The AP queue number + * @psmid: Pointer to program supplied message identifier + * @msg: The message text + * @length: The message length + * + * Returns AP queue status structure. + * Condition code 1 on DQAP means the receive has taken place + * but only partially. The response is incomplete, hence the + * DQAP is repeated. + * Condition code 2 on DQAP also means the receive is incomplete, + * this time because a segment boundary was reached. Again, the + * DQAP is repeated. + * Note that gpr2 is used by the DQAP instruction to keep track of + * any 'residual' length, in case the instruction gets interrupted. + * Hence it gets zeroed before the instruction. + */ +static inline struct ap_queue_status ap_dqap(ap_qid_t qid, + unsigned long long *psmid, + void *msg, size_t length) +{ + register unsigned long reg0 asm("0") = qid | 0x80000000UL; + register struct ap_queue_status reg1 asm ("1"); + register unsigned long reg2 asm("2") = 0UL; + register unsigned long reg4 asm("4") = (unsigned long) msg; + register unsigned long reg5 asm("5") = (unsigned long) length; + register unsigned long reg6 asm("6") = 0UL; + register unsigned long reg7 asm("7") = 0UL; + + + asm volatile( + "0: .long 0xb2ae0064\n" /* DQAP */ + " brc 6,0b\n" + : "+d" (reg0), "=d" (reg1), "+d" (reg2), + "+d" (reg4), "+d" (reg5), "+d" (reg6), "+d" (reg7) + : : "cc", "memory"); + *psmid = (((unsigned long long) reg6) << 32) + reg7; + return reg1; +} #endif /* _ASM_S390_AP_H_ */ diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index de023a9a88ca8e..bf2cbff926ef8c 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -2,7 +2,7 @@ /* * CPU-measurement facilities * - * Copyright IBM Corp. 2012 + * Copyright IBM Corp. 2012, 2018 * Author(s): Hendrik Brueckner * Jan Glauber */ @@ -139,8 +139,14 @@ struct hws_trailer_entry { unsigned char timestamp[16]; /* 16 - 31 timestamp */ unsigned long long reserved1; /* 32 -Reserved */ unsigned long long reserved2; /* */ - unsigned long long progusage1; /* 48 - reserved for programming use */ - unsigned long long progusage2; /* */ + union { /* 48 - reserved for programming use */ + struct { + unsigned int clock_base:1; /* in progusage2 */ + unsigned long long progusage1:63; + unsigned long long progusage2; + }; + unsigned long long progusage[2]; + }; } __packed; /* Load program parameter */ diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h index e07cce88dfb0d9..fcbd638fb9f4c3 100644 --- a/arch/s390/include/asm/gmap.h +++ b/arch/s390/include/asm/gmap.h @@ -9,6 +9,14 @@ #ifndef _ASM_S390_GMAP_H #define _ASM_S390_GMAP_H +/* Generic bits for GMAP notification on DAT table entry changes. */ +#define GMAP_NOTIFY_SHADOW 0x2 +#define GMAP_NOTIFY_MPROT 0x1 + +/* Status bits only for huge segment entries */ +#define _SEGMENT_ENTRY_GMAP_IN 0x8000 /* invalidation notify bit */ +#define _SEGMENT_ENTRY_GMAP_UC 0x4000 /* dirty (migration) */ + /** * struct gmap_struct - guest address space * @list: list head for the mm->context gmap list @@ -132,4 +140,6 @@ void gmap_pte_notify(struct mm_struct *, unsigned long addr, pte_t *, int gmap_mprotect_notify(struct gmap *, unsigned long start, unsigned long len, int prot); +void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4], + unsigned long gaddr, unsigned long vmaddr); #endif /* _ASM_S390_GMAP_H */ diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index 9c5fc50204dd63..2d1afa58a4b6bf 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -37,7 +37,10 @@ static inline int prepare_hugepage_range(struct file *file, return 0; } -#define arch_clear_hugepage_flags(page) do { } while (0) +static inline void arch_clear_hugepage_flags(struct page *page) +{ + clear_bit(PG_arch_1, &page->flags); +} static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long sz) diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 5bc888841eafe8..406d940173ab7a 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -185,7 +185,7 @@ struct lowcore { /* Transaction abort diagnostic block */ __u8 pgm_tdb[256]; /* 0x1800 */ __u8 pad_0x1900[0x2000-0x1900]; /* 0x1900 */ -} __packed; +} __packed __aligned(8192); #define S390_lowcore (*((struct lowcore *) 0)) diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index f5ff9dbad8ac95..f31a15044c24a5 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -24,6 +24,8 @@ typedef struct { unsigned int uses_skeys:1; /* The mmu context uses CMM. */ unsigned int uses_cmm:1; + /* The gmaps associated with this context are allowed to use huge pages. */ + unsigned int allow_gmap_hpage_1m:1; } mm_context_t; #define INIT_MM_CONTEXT(name) \ diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index d16bc79c30bbfe..0717ee76885d63 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -32,6 +32,7 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.has_pgste = 0; mm->context.uses_skeys = 0; mm->context.uses_cmm = 0; + mm->context.allow_gmap_hpage_1m = 0; #endif switch (mm->context.asce_limit) { case _REGION2_SIZE: diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h index a01f81186e86ac..123dac3717b33f 100644 --- a/arch/s390/include/asm/nospec-insn.h +++ b/arch/s390/include/asm/nospec-insn.h @@ -8,7 +8,7 @@ #ifdef __ASSEMBLY__ -#ifdef CONFIG_EXPOLINE +#ifdef CC_USING_EXPOLINE _LC_BR_R1 = __LC_BR_R1 @@ -189,7 +189,7 @@ _LC_BR_R1 = __LC_BR_R1 .macro BASR_EX rsave,rtarget,ruse=%r1 basr \rsave,\rtarget .endm -#endif +#endif /* CC_USING_EXPOLINE */ #endif /* __ASSEMBLY__ */ diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 94f8db468c9b46..10fe982f2b4bf9 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -51,6 +51,10 @@ struct zpci_fmb_fmt2 { u64 max_work_units; }; +struct zpci_fmb_fmt3 { + u64 tx_bytes; +}; + struct zpci_fmb { u32 format : 8; u32 fmt_ind : 24; @@ -66,6 +70,7 @@ struct zpci_fmb { struct zpci_fmb_fmt0 fmt0; struct zpci_fmb_fmt1 fmt1; struct zpci_fmb_fmt2 fmt2; + struct zpci_fmb_fmt3 fmt3; }; } __packed __aligned(128); diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 5ab636089c6052..0e7cb0dc9c33b7 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -268,8 +268,10 @@ static inline int is_module_addr(void *addr) #define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe2fUL /* Bits in the segment table entry */ -#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL -#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL +#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL +#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL +#define _SEGMENT_ENTRY_HARDWARE_BITS 0xfffffffffffffe30UL +#define _SEGMENT_ENTRY_HARDWARE_BITS_LARGE 0xfffffffffff00730UL #define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */ #define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* page table origin */ #define _SEGMENT_ENTRY_PROTECT 0x200 /* segment protection bit */ @@ -1101,7 +1103,8 @@ int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *sptep, pte_t *tptep, pte_t pte); void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep); -bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long address); +bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long address, + pte_t *ptep); int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned char key, bool nq); int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr, @@ -1116,6 +1119,10 @@ int set_pgste_bits(struct mm_struct *mm, unsigned long addr, int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep); int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, unsigned long *oldpte, unsigned long *oldpgste); +void gmap_pmdp_csp(struct mm_struct *mm, unsigned long vmaddr); +void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr); +void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr); +void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr); /* * Certain architectures need to do special things when PTEs diff --git a/arch/s390/include/asm/purgatory.h b/arch/s390/include/asm/purgatory.h index 6090670df51fce..e297bcfc476f65 100644 --- a/arch/s390/include/asm/purgatory.h +++ b/arch/s390/include/asm/purgatory.h @@ -13,11 +13,5 @@ int verify_sha256_digest(void); -extern u64 kernel_entry; -extern u64 kernel_type; - -extern u64 crash_start; -extern u64 crash_size; - #endif /* __ASSEMBLY__ */ #endif /* _S390_PURGATORY_H_ */ diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index de11ecc99c7c46..9c9970a5dfb107 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -262,7 +262,6 @@ struct qdio_outbuf_state { void *user; }; -#define QDIO_OUTBUF_STATE_FLAG_NONE 0x00 #define QDIO_OUTBUF_STATE_FLAG_PENDING 0x01 #define CHSC_AC1_INITIATE_INPUTQ 0x80 diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h index 54f81f8ed6622d..724faede8ac52d 100644 --- a/arch/s390/include/asm/sections.h +++ b/arch/s390/include/asm/sections.h @@ -4,6 +4,4 @@ #include -extern char _ehead[]; - #endif diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 9c30ebe046f3ec..1d66016f417020 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -9,8 +9,10 @@ #include #include - +#define EP_OFFSET 0x10008 +#define EP_STRING "S390EP" #define PARMAREA 0x10400 +#define PARMAREA_END 0x11000 /* * Machine features detected in early.c diff --git a/arch/s390/include/uapi/asm/chsc.h b/arch/s390/include/uapi/asm/chsc.h index dc329aa03f7600..83a574e95b3a87 100644 --- a/arch/s390/include/uapi/asm/chsc.h +++ b/arch/s390/include/uapi/asm/chsc.h @@ -23,29 +23,29 @@ struct chsc_async_header { __u32 key : 4; __u32 : 28; struct subchannel_id sid; -} __attribute__ ((packed)); +}; struct chsc_async_area { struct chsc_async_header header; __u8 data[CHSC_SIZE - sizeof(struct chsc_async_header)]; -} __attribute__ ((packed)); +}; struct chsc_header { __u16 length; __u16 code; -} __attribute__ ((packed)); +}; struct chsc_sync_area { struct chsc_header header; __u8 data[CHSC_SIZE - sizeof(struct chsc_header)]; -} __attribute__ ((packed)); +}; struct chsc_response_struct { __u16 length; __u16 code; __u32 parms; __u8 data[CHSC_SIZE - 2 * sizeof(__u16) - sizeof(__u32)]; -} __attribute__ ((packed)); +}; struct chsc_chp_cd { struct chp_id chpid; diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 2fed39b26b42e5..dbfd1730e631ac 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -9,38 +9,20 @@ ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) # Do not trace early setup code -CFLAGS_REMOVE_als.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_early.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_early_nobss.o = $(CC_FLAGS_FTRACE) endif -GCOV_PROFILE_als.o := n GCOV_PROFILE_early.o := n GCOV_PROFILE_early_nobss.o := n -KCOV_INSTRUMENT_als.o := n KCOV_INSTRUMENT_early.o := n KCOV_INSTRUMENT_early_nobss.o := n -UBSAN_SANITIZE_als.o := n UBSAN_SANITIZE_early.o := n UBSAN_SANITIZE_early_nobss.o := n -# -# Use -march=z900 for als.c to be able to print an error -# message if the kernel is started on a machine which is too old -# -ifneq ($(CC_FLAGS_MARCH),-march=z900) -CFLAGS_REMOVE_als.o += $(CC_FLAGS_MARCH) -CFLAGS_REMOVE_als.o += $(CC_FLAGS_EXPOLINE) -CFLAGS_als.o += -march=z900 -AFLAGS_REMOVE_head.o += $(CC_FLAGS_MARCH) -AFLAGS_head.o += -march=z900 -endif - -CFLAGS_als.o += -D__NO_FORTIFY - # # Passing null pointers is ok for smp code, since we access the lowcore here. # @@ -61,13 +43,13 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o -obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o als.o early_nobss.o +obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o early_nobss.o obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o obj-y += nospec-branch.o -extra-y += head.o head64.o vmlinux.lds +extra-y += head64.o vmlinux.lds obj-$(CONFIG_SYSFS) += nospec-sysfs.o CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE) @@ -99,5 +81,5 @@ obj-$(CONFIG_TRACEPOINTS) += trace.o obj-y += vdso64/ obj-$(CONFIG_COMPAT) += vdso32/ -chkbss := head.o head64.o als.o early_nobss.o +chkbss := head64.o early_nobss.o include $(srctree)/arch/s390/scripts/Makefile.chkbss diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 9f5ea9d870690a..c3620bafc374d3 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -306,6 +306,15 @@ static void *kzalloc_panic(int len) return rc; } +static const char *nt_name(Elf64_Word type) +{ + const char *name = "LINUX"; + + if (type == NT_PRPSINFO || type == NT_PRSTATUS || type == NT_PRFPREG) + name = KEXEC_CORE_NOTE_NAME; + return name; +} + /* * Initialize ELF note */ @@ -332,11 +341,26 @@ static void *nt_init_name(void *buf, Elf64_Word type, void *desc, int d_len, static inline void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len) { - const char *note_name = "LINUX"; + return nt_init_name(buf, type, desc, d_len, nt_name(type)); +} + +/* + * Calculate the size of ELF note + */ +static size_t nt_size_name(int d_len, const char *name) +{ + size_t size; - if (type == NT_PRPSINFO || type == NT_PRSTATUS || type == NT_PRFPREG) - note_name = KEXEC_CORE_NOTE_NAME; - return nt_init_name(buf, type, desc, d_len, note_name); + size = sizeof(Elf64_Nhdr); + size += roundup(strlen(name) + 1, 4); + size += roundup(d_len, 4); + + return size; +} + +static inline size_t nt_size(Elf64_Word type, int d_len) +{ + return nt_size_name(d_len, nt_name(type)); } /* @@ -374,6 +398,29 @@ static void *fill_cpu_elf_notes(void *ptr, int cpu, struct save_area *sa) return ptr; } +/* + * Calculate size of ELF notes per cpu + */ +static size_t get_cpu_elf_notes_size(void) +{ + struct save_area *sa = NULL; + size_t size; + + size = nt_size(NT_PRSTATUS, sizeof(struct elf_prstatus)); + size += nt_size(NT_PRFPREG, sizeof(elf_fpregset_t)); + size += nt_size(NT_S390_TIMER, sizeof(sa->timer)); + size += nt_size(NT_S390_TODCMP, sizeof(sa->todcmp)); + size += nt_size(NT_S390_TODPREG, sizeof(sa->todpreg)); + size += nt_size(NT_S390_CTRS, sizeof(sa->ctrs)); + size += nt_size(NT_S390_PREFIX, sizeof(sa->prefix)); + if (MACHINE_HAS_VX) { + size += nt_size(NT_S390_VXRS_HIGH, sizeof(sa->vxrs_high)); + size += nt_size(NT_S390_VXRS_LOW, sizeof(sa->vxrs_low)); + } + + return size; +} + /* * Initialize prpsinfo note (new kernel) */ @@ -429,6 +476,30 @@ static void *nt_vmcoreinfo(void *ptr) return nt_init_name(ptr, 0, vmcoreinfo, size, "VMCOREINFO"); } +static size_t nt_vmcoreinfo_size(void) +{ + const char *name = "VMCOREINFO"; + char nt_name[11]; + Elf64_Nhdr note; + void *addr; + + if (copy_oldmem_kernel(&addr, &S390_lowcore.vmcore_info, sizeof(addr))) + return 0; + + if (copy_oldmem_kernel(¬e, addr, sizeof(note))) + return 0; + + memset(nt_name, 0, sizeof(nt_name)); + if (copy_oldmem_kernel(nt_name, addr + sizeof(note), + sizeof(nt_name) - 1)) + return 0; + + if (strcmp(nt_name, name) != 0) + return 0; + + return nt_size_name(note.n_descsz, name); +} + /* * Initialize final note (needed for /proc/vmcore code) */ @@ -539,6 +610,27 @@ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset) return ptr; } +static size_t get_elfcorehdr_size(int mem_chunk_cnt) +{ + size_t size; + + size = sizeof(Elf64_Ehdr); + /* PT_NOTES */ + size += sizeof(Elf64_Phdr); + /* nt_prpsinfo */ + size += nt_size(NT_PRPSINFO, sizeof(struct elf_prpsinfo)); + /* regsets */ + size += get_cpu_cnt() * get_cpu_elf_notes_size(); + /* nt_vmcoreinfo */ + size += nt_vmcoreinfo_size(); + /* nt_final */ + size += sizeof(Elf64_Nhdr); + /* PT_LOADS */ + size += mem_chunk_cnt * sizeof(Elf64_Phdr); + + return size; +} + /* * Create ELF core header (new kernel) */ @@ -566,8 +658,8 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) mem_chunk_cnt = get_mem_chunk_cnt(); - alloc_size = 0x1000 + get_cpu_cnt() * 0x4a0 + - mem_chunk_cnt * sizeof(Elf64_Phdr); + alloc_size = get_elfcorehdr_size(mem_chunk_cnt); + hdr = kzalloc_panic(alloc_size); /* Init elf header */ ptr = ehdr_init(hdr, mem_chunk_cnt); diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 827699eb48fa98..5b28b434f8a153 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -331,8 +331,20 @@ static void __init setup_boot_command_line(void) append_to_cmdline(append_ipl_scpdata); } +static void __init check_image_bootable(void) +{ + if (!memcmp(EP_STRING, (void *)EP_OFFSET, strlen(EP_STRING))) + return; + + sclp_early_printk("Linux kernel boot failure: An attempt to boot a vmlinux ELF image failed.\n"); + sclp_early_printk("This image does not contain all parts necessary for starting up. Use\n"); + sclp_early_printk("bzImage or arch/s390/boot/compressed/vmlinux instead.\n"); + disabled_wait(0xbadb007); +} + void __init startup_init(void) { + check_image_bootable(); time_early_init(); init_kernel_storage_key(); lockdep_off(); diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 961abfac2c5fb0..472fa2f1a4a593 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -83,7 +83,6 @@ long sys_s390_sthyi(unsigned long function_code, void __user *buffer, u64 __user DECLARE_PER_CPU(u64, mt_cycles[8]); -void verify_facilities(void); void gs_load_bc_cb(struct pt_regs *regs); void set_fs_fixup(void); diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 791cb9000e8658..6d14ad42ba883b 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -48,11 +48,23 @@ ENTRY(startup_continue) # Early machine initialization and detection functions. # brasl %r14,startup_init - lpswe .Lentry-.LPG1(13) # jump to _stext in primary-space, - # virtual and never return ... + +# check control registers + stctg %c0,%c15,0(%r15) + oi 6(%r15),0x60 # enable sigp emergency & external call + oi 4(%r15),0x10 # switch on low address proctection + lctlg %c0,%c15,0(%r15) + + lam 0,15,.Laregs-.LPG1(%r13) # load acrs needed by uaccess + brasl %r14,start_kernel # go to C code +# +# We returned from start_kernel ?!? PANIK +# + basr %r13,0 + lpswe .Ldw-.(%r13) # load disabled wait psw + .align 16 .LPG1: -.Lentry:.quad 0x0000000180000000,_stext .Lctl: .quad 0x04040000 # cr0: AFP registers & secondary space .quad 0 # cr1: primary space segment table .quad .Lduct # cr2: dispatchable unit control table @@ -85,30 +97,5 @@ ENTRY(startup_continue) .endr .Llinkage_stack: .long 0,0,0x89000000,0,0,0,0x8a000000,0 - -ENTRY(_ehead) - - .org 0x100000 - 0x11000 # head.o ends at 0x11000 -# -# startup-code, running in absolute addressing mode -# -ENTRY(_stext) - basr %r13,0 # get base -.LPG3: -# check control registers - stctg %c0,%c15,0(%r15) - oi 6(%r15),0x60 # enable sigp emergency & external call - oi 4(%r15),0x10 # switch on low address proctection - lctlg %c0,%c15,0(%r15) - - lam 0,15,.Laregs-.LPG3(%r13) # load acrs needed by uaccess - brasl %r14,start_kernel # go to C code -# -# We returned from start_kernel ?!? PANIK -# - basr %r13,0 - lpswe .Ldw-.(%r13) # load disabled wait psw - - .align 8 .Ldw: .quad 0x0002000180000000,0x0000000000000000 .Laregs:.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c index 18ae7b9c71d6e3..bdddaae9655984 100644 --- a/arch/s390/kernel/nospec-branch.c +++ b/arch/s390/kernel/nospec-branch.c @@ -35,6 +35,8 @@ early_param("nospec", nospec_setup_early); static int __init nospec_report(void) { + if (test_facility(156)) + pr_info("Spectre V2 mitigation: etokens\n"); if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) pr_info("Spectre V2 mitigation: execute trampolines\n"); if (__test_facility(82, S390_lowcore.alt_stfle_fac_list)) @@ -56,7 +58,15 @@ early_param("nospectre_v2", nospectre_v2_setup_early); void __init nospec_auto_detect(void) { - if (IS_ENABLED(CC_USING_EXPOLINE)) { + if (test_facility(156)) { + /* + * The machine supports etokens. + * Disable expolines and disable nobp. + */ + if (IS_ENABLED(CC_USING_EXPOLINE)) + nospec_disable = 1; + __clear_facility(82, S390_lowcore.alt_stfle_fac_list); + } else if (IS_ENABLED(CC_USING_EXPOLINE)) { /* * The kernel has been compiled with expolines. * Keep expolines enabled and disable nobp. diff --git a/arch/s390/kernel/nospec-sysfs.c b/arch/s390/kernel/nospec-sysfs.c index 8affad5f18cb5d..e30e580ae36209 100644 --- a/arch/s390/kernel/nospec-sysfs.c +++ b/arch/s390/kernel/nospec-sysfs.c @@ -13,6 +13,8 @@ ssize_t cpu_show_spectre_v1(struct device *dev, ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) { + if (test_facility(156)) + return sprintf(buf, "Mitigation: etokens\n"); if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) return sprintf(buf, "Mitigation: execute trampolines\n"); if (__test_facility(82, S390_lowcore.alt_stfle_fac_list)) diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 0292d68e7dded7..cb198d4a6dca75 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -2,7 +2,7 @@ /* * Performance event support for the System z CPU-measurement Sampling Facility * - * Copyright IBM Corp. 2013 + * Copyright IBM Corp. 2013, 2018 * Author(s): Hendrik Brueckner */ #define KMSG_COMPONENT "cpum_sf" @@ -1587,6 +1587,17 @@ static void aux_buffer_free(void *data) "%lu SDBTs\n", num_sdbt); } +static void aux_sdb_init(unsigned long sdb) +{ + struct hws_trailer_entry *te; + + te = (struct hws_trailer_entry *)trailer_entry_ptr(sdb); + + /* Save clock base */ + te->clock_base = 1; + memcpy(&te->progusage2, &tod_clock_base[1], 8); +} + /* * aux_buffer_setup() - Setup AUX buffer for diagnostic mode sampling * @cpu: On which to allocate, -1 means current @@ -1666,6 +1677,7 @@ static void *aux_buffer_setup(int cpu, void **pages, int nr_pages, /* Tail is the entry in a SDBT */ *tail = (unsigned long)pages[i]; aux->sdb_index[i] = (unsigned long)pages[i]; + aux_sdb_init((unsigned long)pages[i]); } sfb->num_sdb = nr_pages; diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c index 54e2d634b849e1..4352a504f2354f 100644 --- a/arch/s390/kernel/perf_regs.c +++ b/arch/s390/kernel/perf_regs.c @@ -12,9 +12,6 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) { freg_t fp; - if (WARN_ON_ONCE((u32)idx >= PERF_REG_S390_MAX)) - return 0; - if (idx >= PERF_REG_S390_R0 && idx <= PERF_REG_S390_R15) return regs->gprs[idx]; @@ -33,7 +30,8 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) if (idx == PERF_REG_S390_PC) return regs->psw.addr; - return regs->gprs[idx]; + WARN_ON_ONCE((u32)idx >= PERF_REG_S390_MAX); + return 0; } #define REG_RESERVED (~((1UL << PERF_REG_S390_MAX) - 1)) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index d82a9ec64ea971..c637c12f9e37cc 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -674,12 +674,12 @@ static void __init reserve_kernel(void) #ifdef CONFIG_DMA_API_DEBUG /* * DMA_API_DEBUG code stumbles over addresses from the - * range [_ehead, _stext]. Mark the memory as reserved + * range [PARMAREA_END, _stext]. Mark the memory as reserved * so it is not used for CONFIG_DMA_API_DEBUG=y. */ memblock_reserve(0, PFN_PHYS(start_pfn)); #else - memblock_reserve(0, (unsigned long)_ehead); + memblock_reserve(0, PARMAREA_END); memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn) - (unsigned long)_stext); #endif diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 54f5496913fa77..12f80d1f041513 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -59,6 +59,8 @@ int stsi(void *sysinfo, int fc, int sel1, int sel2) } EXPORT_SYMBOL(stsi); +#ifdef CONFIG_PROC_FS + static bool convert_ext_name(unsigned char encoding, char *name, size_t len) { switch (encoding) { @@ -301,6 +303,8 @@ static int __init sysinfo_create_proc(void) } device_initcall(sysinfo_create_proc); +#endif /* CONFIG_PROC_FS */ + /* * Service levels interface. */ diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 4b6e0397f66d63..e8184a15578a33 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -579,41 +579,33 @@ early_param("topology", topology_setup); static int topology_ctl_handler(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - unsigned int len; + int enabled = topology_is_enabled(); int new_mode; - char buf[2]; + int zero = 0; + int one = 1; + int rc; + struct ctl_table ctl_entry = { + .procname = ctl->procname, + .data = &enabled, + .maxlen = sizeof(int), + .extra1 = &zero, + .extra2 = &one, + }; + + rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos); + if (rc < 0 || !write) + return rc; - if (!*lenp || *ppos) { - *lenp = 0; - return 0; - } - if (!write) { - strncpy(buf, topology_is_enabled() ? "1\n" : "0\n", - ARRAY_SIZE(buf)); - len = strnlen(buf, ARRAY_SIZE(buf)); - if (len > *lenp) - len = *lenp; - if (copy_to_user(buffer, buf, len)) - return -EFAULT; - goto out; - } - len = *lenp; - if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len)) - return -EFAULT; - if (buf[0] != '0' && buf[0] != '1') - return -EINVAL; mutex_lock(&smp_cpu_state_mutex); - new_mode = topology_get_mode(buf[0] == '1'); + new_mode = topology_get_mode(enabled); if (topology_mode != new_mode) { topology_mode = new_mode; topology_schedule_update(); } mutex_unlock(&smp_cpu_state_mutex); topology_flush_work(); -out: - *lenp = len; - *ppos += len; - return 0; + + return rc; } static struct ctl_table topology_ctl_table[] = { diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 09abae40f9178a..3031cc6dd0ab48 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -47,7 +47,7 @@ static struct page **vdso64_pagelist; */ unsigned int __read_mostly vdso_enabled = 1; -static int vdso_fault(const struct vm_special_mapping *sm, +static vm_fault_t vdso_fault(const struct vm_special_mapping *sm, struct vm_area_struct *vma, struct vm_fault *vmf) { struct page **vdso_pagelist; diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index f0414f52817b14..b43f8d33a3697d 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -19,7 +19,7 @@ OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") OUTPUT_ARCH(s390:64-bit) -ENTRY(startup) +ENTRY(startup_continue) jiffies = jiffies_64; PHDRS { @@ -30,16 +30,12 @@ PHDRS { SECTIONS { - . = 0x00000000; + . = 0x100000; + _stext = .; /* Start of text section */ .text : { /* Text and read-only data */ + _text = .; HEAD_TEXT - /* - * E.g. perf doesn't like symbols starting at address zero, - * therefore skip the initial PSW and channel program located - * at address zero and let _text start at 0x200. - */ - _text = 0x200; TEXT_TEXT SCHED_TEXT CPUIDLE_TEXT @@ -47,6 +43,7 @@ SECTIONS KPROBES_TEXT IRQENTRY_TEXT SOFTIRQENTRY_TEXT + *(.text.*_indirect_*) *(.fixup) *(.gnu.warning) } :text = 0x0700 diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 3b7a5151b6a5ef..f9d90337e64a6c 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -172,6 +172,10 @@ static int nested; module_param(nested, int, S_IRUGO); MODULE_PARM_DESC(nested, "Nested virtualization support"); +/* allow 1m huge page guest backing, if !nested */ +static int hpage; +module_param(hpage, int, 0444); +MODULE_PARM_DESC(hpage, "1m huge page backing support"); /* * For now we handle at most 16 double words as this is what the s390 base @@ -475,6 +479,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_AIS_MIGRATION: r = 1; break; + case KVM_CAP_S390_HPAGE_1M: + r = 0; + if (hpage) + r = 1; + break; case KVM_CAP_S390_MEM_OP: r = MEM_OP_MAX_SIZE; break; @@ -511,19 +520,30 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) } static void kvm_s390_sync_dirty_log(struct kvm *kvm, - struct kvm_memory_slot *memslot) + struct kvm_memory_slot *memslot) { + int i; gfn_t cur_gfn, last_gfn; - unsigned long address; + unsigned long gaddr, vmaddr; struct gmap *gmap = kvm->arch.gmap; + DECLARE_BITMAP(bitmap, _PAGE_ENTRIES); - /* Loop over all guest pages */ + /* Loop over all guest segments */ + cur_gfn = memslot->base_gfn; last_gfn = memslot->base_gfn + memslot->npages; - for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) { - address = gfn_to_hva_memslot(memslot, cur_gfn); + for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) { + gaddr = gfn_to_gpa(cur_gfn); + vmaddr = gfn_to_hva_memslot(memslot, cur_gfn); + if (kvm_is_error_hva(vmaddr)) + continue; + + bitmap_zero(bitmap, _PAGE_ENTRIES); + gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr); + for (i = 0; i < _PAGE_ENTRIES; i++) { + if (test_bit(i, bitmap)) + mark_page_dirty(kvm, cur_gfn + i); + } - if (test_and_clear_guest_dirty(gmap->mm, address)) - mark_page_dirty(kvm, cur_gfn); if (fatal_signal_pending(current)) return; cond_resched(); @@ -667,6 +687,27 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s", r ? "(not available)" : "(success)"); break; + case KVM_CAP_S390_HPAGE_1M: + mutex_lock(&kvm->lock); + if (kvm->created_vcpus) + r = -EBUSY; + else if (!hpage || kvm->arch.use_cmma) + r = -EINVAL; + else { + r = 0; + kvm->mm->context.allow_gmap_hpage_1m = 1; + /* + * We might have to create fake 4k page + * tables. To avoid that the hardware works on + * stale PGSTEs, we emulate these instructions. + */ + kvm->arch.use_skf = 0; + kvm->arch.use_pfmfi = 0; + } + mutex_unlock(&kvm->lock); + VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s", + r ? "(not available)" : "(success)"); + break; case KVM_CAP_S390_USER_STSI: VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); kvm->arch.user_stsi = 1; @@ -714,10 +755,13 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att if (!sclp.has_cmma) break; - ret = -EBUSY; VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support"); mutex_lock(&kvm->lock); - if (!kvm->created_vcpus) { + if (kvm->created_vcpus) + ret = -EBUSY; + else if (kvm->mm->context.allow_gmap_hpage_1m) + ret = -EINVAL; + else { kvm->arch.use_cmma = 1; /* Not compatible with cmma. */ kvm->arch.use_pfmfi = 0; @@ -1540,6 +1584,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) uint8_t *keys; uint64_t hva; int srcu_idx, i, r = 0; + bool unlocked; if (args->flags != 0) return -EINVAL; @@ -1564,9 +1609,11 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) if (r) goto out; + i = 0; down_read(¤t->mm->mmap_sem); srcu_idx = srcu_read_lock(&kvm->srcu); - for (i = 0; i < args->count; i++) { + while (i < args->count) { + unlocked = false; hva = gfn_to_hva(kvm, args->start_gfn + i); if (kvm_is_error_hva(hva)) { r = -EFAULT; @@ -1580,8 +1627,14 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) } r = set_guest_storage_key(current->mm, hva, keys[i], 0); - if (r) - break; + if (r) { + r = fixup_user_fault(current, current->mm, hva, + FAULT_FLAG_WRITE, &unlocked); + if (r) + break; + } + if (!r) + i++; } srcu_read_unlock(&kvm->srcu, srcu_idx); up_read(¤t->mm->mmap_sem); @@ -4082,6 +4135,11 @@ static int __init kvm_s390_init(void) return -ENODEV; } + if (nested && hpage) { + pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently"); + return -EINVAL; + } + for (i = 0; i < 16; i++) kvm_s390_fac_base[i] |= S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index eb0eb60c7be6a2..cfc5a62329f607 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -246,9 +246,10 @@ static int try_handle_skey(struct kvm_vcpu *vcpu) static int handle_iske(struct kvm_vcpu *vcpu) { - unsigned long addr; + unsigned long gaddr, vmaddr; unsigned char key; int reg1, reg2; + bool unlocked; int rc; vcpu->stat.instruction_iske++; @@ -262,18 +263,28 @@ static int handle_iske(struct kvm_vcpu *vcpu) kvm_s390_get_regs_rre(vcpu, ®1, ®2); - addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; - addr = kvm_s390_logical_to_effective(vcpu, addr); - addr = kvm_s390_real_to_abs(vcpu, addr); - addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr)); - if (kvm_is_error_hva(addr)) + gaddr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; + gaddr = kvm_s390_logical_to_effective(vcpu, gaddr); + gaddr = kvm_s390_real_to_abs(vcpu, gaddr); + vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gaddr)); + if (kvm_is_error_hva(vmaddr)) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - +retry: + unlocked = false; down_read(¤t->mm->mmap_sem); - rc = get_guest_storage_key(current->mm, addr, &key); - up_read(¤t->mm->mmap_sem); + rc = get_guest_storage_key(current->mm, vmaddr, &key); + + if (rc) { + rc = fixup_user_fault(current, current->mm, vmaddr, + FAULT_FLAG_WRITE, &unlocked); + if (!rc) { + up_read(¤t->mm->mmap_sem); + goto retry; + } + } if (rc) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + up_read(¤t->mm->mmap_sem); vcpu->run->s.regs.gprs[reg1] &= ~0xff; vcpu->run->s.regs.gprs[reg1] |= key; return 0; @@ -281,8 +292,9 @@ static int handle_iske(struct kvm_vcpu *vcpu) static int handle_rrbe(struct kvm_vcpu *vcpu) { - unsigned long addr; + unsigned long vmaddr, gaddr; int reg1, reg2; + bool unlocked; int rc; vcpu->stat.instruction_rrbe++; @@ -296,19 +308,27 @@ static int handle_rrbe(struct kvm_vcpu *vcpu) kvm_s390_get_regs_rre(vcpu, ®1, ®2); - addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; - addr = kvm_s390_logical_to_effective(vcpu, addr); - addr = kvm_s390_real_to_abs(vcpu, addr); - addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr)); - if (kvm_is_error_hva(addr)) + gaddr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; + gaddr = kvm_s390_logical_to_effective(vcpu, gaddr); + gaddr = kvm_s390_real_to_abs(vcpu, gaddr); + vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gaddr)); + if (kvm_is_error_hva(vmaddr)) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - +retry: + unlocked = false; down_read(¤t->mm->mmap_sem); - rc = reset_guest_reference_bit(current->mm, addr); - up_read(¤t->mm->mmap_sem); + rc = reset_guest_reference_bit(current->mm, vmaddr); + if (rc < 0) { + rc = fixup_user_fault(current, current->mm, vmaddr, + FAULT_FLAG_WRITE, &unlocked); + if (!rc) { + up_read(¤t->mm->mmap_sem); + goto retry; + } + } if (rc < 0) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - + up_read(¤t->mm->mmap_sem); kvm_s390_set_psw_cc(vcpu, rc); return 0; } @@ -323,6 +343,7 @@ static int handle_sske(struct kvm_vcpu *vcpu) unsigned long start, end; unsigned char key, oldkey; int reg1, reg2; + bool unlocked; int rc; vcpu->stat.instruction_sske++; @@ -355,19 +376,28 @@ static int handle_sske(struct kvm_vcpu *vcpu) } while (start != end) { - unsigned long addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start)); + unsigned long vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start)); + unlocked = false; - if (kvm_is_error_hva(addr)) + if (kvm_is_error_hva(vmaddr)) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); down_read(¤t->mm->mmap_sem); - rc = cond_set_guest_storage_key(current->mm, addr, key, &oldkey, + rc = cond_set_guest_storage_key(current->mm, vmaddr, key, &oldkey, m3 & SSKE_NQ, m3 & SSKE_MR, m3 & SSKE_MC); - up_read(¤t->mm->mmap_sem); - if (rc < 0) + + if (rc < 0) { + rc = fixup_user_fault(current, current->mm, vmaddr, + FAULT_FLAG_WRITE, &unlocked); + rc = !rc ? -EAGAIN : rc; + } + if (rc == -EFAULT) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - start += PAGE_SIZE; + + up_read(¤t->mm->mmap_sem); + if (rc >= 0) + start += PAGE_SIZE; } if (m3 & (SSKE_MC | SSKE_MR)) { @@ -948,15 +978,16 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) } while (start != end) { - unsigned long useraddr; + unsigned long vmaddr; + bool unlocked = false; /* Translate guest address to host address */ - useraddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start)); - if (kvm_is_error_hva(useraddr)) + vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start)); + if (kvm_is_error_hva(vmaddr)) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) { - if (clear_user((void __user *)useraddr, PAGE_SIZE)) + if (clear_user((void __user *)vmaddr, PAGE_SIZE)) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); } @@ -966,14 +997,20 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) if (rc) return rc; down_read(¤t->mm->mmap_sem); - rc = cond_set_guest_storage_key(current->mm, useraddr, + rc = cond_set_guest_storage_key(current->mm, vmaddr, key, NULL, nq, mr, mc); - up_read(¤t->mm->mmap_sem); - if (rc < 0) + if (rc < 0) { + rc = fixup_user_fault(current, current->mm, vmaddr, + FAULT_FLAG_WRITE, &unlocked); + rc = !rc ? -EAGAIN : rc; + } + if (rc == -EFAULT) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - } - start += PAGE_SIZE; + up_read(¤t->mm->mmap_sem); + if (rc >= 0) + start += PAGE_SIZE; + } } if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) { if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT) { diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S index 2311f15be9cf04..40c4d59c926e52 100644 --- a/arch/s390/lib/mem.S +++ b/arch/s390/lib/mem.S @@ -17,7 +17,7 @@ ENTRY(memmove) ltgr %r4,%r4 lgr %r1,%r2 - bzr %r14 + jz .Lmemmove_exit aghi %r4,-1 clgr %r2,%r3 jnh .Lmemmove_forward @@ -36,6 +36,7 @@ ENTRY(memmove) .Lmemmove_forward_remainder: larl %r5,.Lmemmove_mvc ex %r4,0(%r5) +.Lmemmove_exit: BR_EX %r14 .Lmemmove_reverse: ic %r0,0(%r4,%r3) @@ -65,7 +66,7 @@ EXPORT_SYMBOL(memmove) */ ENTRY(memset) ltgr %r4,%r4 - bzr %r14 + jz .Lmemset_exit ltgr %r3,%r3 jnz .Lmemset_fill aghi %r4,-1 @@ -80,6 +81,7 @@ ENTRY(memset) .Lmemset_clear_remainder: larl %r3,.Lmemset_xc ex %r4,0(%r3) +.Lmemset_exit: BR_EX %r14 .Lmemset_fill: cghi %r4,1 @@ -115,7 +117,7 @@ EXPORT_SYMBOL(memset) */ ENTRY(memcpy) ltgr %r4,%r4 - bzr %r14 + jz .Lmemcpy_exit aghi %r4,-1 srlg %r5,%r4,8 ltgr %r5,%r5 @@ -124,6 +126,7 @@ ENTRY(memcpy) .Lmemcpy_remainder: larl %r5,.Lmemcpy_mvc ex %r4,0(%r5) +.Lmemcpy_exit: BR_EX %r14 .Lmemcpy_loop: mvc 0(256,%r1),0(%r3) @@ -145,9 +148,9 @@ EXPORT_SYMBOL(memcpy) .macro __MEMSET bits,bytes,insn ENTRY(__memset\bits) ltgr %r4,%r4 - bzr %r14 + jz .L__memset_exit\bits cghi %r4,\bytes - je .L__memset_exit\bits + je .L__memset_store\bits aghi %r4,-(\bytes+1) srlg %r5,%r4,8 ltgr %r5,%r5 @@ -163,8 +166,9 @@ ENTRY(__memset\bits) larl %r5,.L__memset_mvc\bits ex %r4,0(%r5) BR_EX %r14 -.L__memset_exit\bits: +.L__memset_store\bits: \insn %r3,0(%r2) +.L__memset_exit\bits: BR_EX %r14 .L__memset_mvc\bits: mvc \bytes(1,%r1),0(%r1) diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 6cf024eb2085d8..510a18299196f3 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -191,12 +191,7 @@ static void cmm_set_timer(void) del_timer(&cmm_timer); return; } - if (timer_pending(&cmm_timer)) { - if (mod_timer(&cmm_timer, jiffies + cmm_timeout_seconds*HZ)) - return; - } - cmm_timer.expires = jiffies + cmm_timeout_seconds*HZ; - add_timer(&cmm_timer); + mod_timer(&cmm_timer, jiffies + cmm_timeout_seconds * HZ); } static void cmm_timer_fn(struct timer_list *unused) @@ -251,45 +246,42 @@ static int cmm_skip_blanks(char *cp, char **endp) return str != cp; } -static struct ctl_table cmm_table[]; - static int cmm_pages_handler(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - char buf[16], *p; - unsigned int len; - long nr; + long nr = cmm_get_pages(); + struct ctl_table ctl_entry = { + .procname = ctl->procname, + .data = &nr, + .maxlen = sizeof(long), + }; + int rc; - if (!*lenp || (*ppos && !write)) { - *lenp = 0; - return 0; - } + rc = proc_doulongvec_minmax(&ctl_entry, write, buffer, lenp, ppos); + if (rc < 0 || !write) + return rc; - if (write) { - len = *lenp; - if (copy_from_user(buf, buffer, - len > sizeof(buf) ? sizeof(buf) : len)) - return -EFAULT; - buf[sizeof(buf) - 1] = '\0'; - cmm_skip_blanks(buf, &p); - nr = simple_strtoul(p, &p, 0); - if (ctl == &cmm_table[0]) - cmm_set_pages(nr); - else - cmm_add_timed_pages(nr); - } else { - if (ctl == &cmm_table[0]) - nr = cmm_get_pages(); - else - nr = cmm_get_timed_pages(); - len = sprintf(buf, "%ld\n", nr); - if (len > *lenp) - len = *lenp; - if (copy_to_user(buffer, buf, len)) - return -EFAULT; - } - *lenp = len; - *ppos += len; + cmm_set_pages(nr); + return 0; +} + +static int cmm_timed_pages_handler(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + long nr = cmm_get_timed_pages(); + struct ctl_table ctl_entry = { + .procname = ctl->procname, + .data = &nr, + .maxlen = sizeof(long), + }; + int rc; + + rc = proc_doulongvec_minmax(&ctl_entry, write, buffer, lenp, ppos); + if (rc < 0 || !write) + return rc; + + cmm_add_timed_pages(nr); return 0; } @@ -338,7 +330,7 @@ static struct ctl_table cmm_table[] = { { .procname = "cmm_timed_pages", .mode = 0644, - .proc_handler = cmm_pages_handler, + .proc_handler = cmm_timed_pages_handler, }, { .procname = "cmm_timeout", diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 6ad15d3fab819d..84111a43ea2932 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -80,7 +80,7 @@ struct qin64 { struct dcss_segment { struct list_head list; char dcss_name[8]; - char res_name[15]; + char res_name[16]; unsigned long start_addr; unsigned long end; atomic_t ref_count; @@ -433,7 +433,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long memcpy(&seg->res_name, seg->dcss_name, 8); EBCASC(seg->res_name, 8); seg->res_name[8] = '\0'; - strncat(seg->res_name, " (DCSS)", 7); + strlcat(seg->res_name, " (DCSS)", sizeof(seg->res_name)); seg->res->name = seg->res_name; rc = seg->vm_segtype; if (rc == SEG_TYPE_SC || diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index e074480d3598c0..4cc3f06b0ab335 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -502,6 +502,8 @@ static inline int do_exception(struct pt_regs *regs, int access) /* No reason to continue if interrupted by SIGKILL. */ if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) { fault = VM_FAULT_SIGNAL; + if (flags & FAULT_FLAG_RETRY_NOWAIT) + goto out_up; goto out; } if (unlikely(fault & VM_FAULT_ERROR)) diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index bc56ec8abcf7f7..bb44990c821208 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2,8 +2,10 @@ /* * KVM guest address space mapping code * - * Copyright IBM Corp. 2007, 2016 + * Copyright IBM Corp. 2007, 2016, 2018 * Author(s): Martin Schwidefsky + * David Hildenbrand + * Janosch Frank */ #include @@ -521,6 +523,9 @@ void gmap_unlink(struct mm_struct *mm, unsigned long *table, rcu_read_unlock(); } +static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *old, pmd_t new, + unsigned long gaddr); + /** * gmap_link - set up shadow page tables to connect a host to a guest address * @gmap: pointer to guest mapping meta data structure @@ -541,6 +546,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) p4d_t *p4d; pud_t *pud; pmd_t *pmd; + u64 unprot; int rc; BUG_ON(gmap_is_shadow(gmap)); @@ -584,8 +590,8 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) return -EFAULT; pmd = pmd_offset(pud, vmaddr); VM_BUG_ON(pmd_none(*pmd)); - /* large pmds cannot yet be handled */ - if (pmd_large(*pmd)) + /* Are we allowed to use huge pages? */ + if (pmd_large(*pmd) && !gmap->mm->context.allow_gmap_hpage_1m) return -EFAULT; /* Link gmap segment table entry location to page table. */ rc = radix_tree_preload(GFP_KERNEL); @@ -596,10 +602,22 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) if (*table == _SEGMENT_ENTRY_EMPTY) { rc = radix_tree_insert(&gmap->host_to_guest, vmaddr >> PMD_SHIFT, table); - if (!rc) - *table = pmd_val(*pmd); - } else - rc = 0; + if (!rc) { + if (pmd_large(*pmd)) { + *table = (pmd_val(*pmd) & + _SEGMENT_ENTRY_HARDWARE_BITS_LARGE) + | _SEGMENT_ENTRY_GMAP_UC; + } else + *table = pmd_val(*pmd) & + _SEGMENT_ENTRY_HARDWARE_BITS; + } + } else if (*table & _SEGMENT_ENTRY_PROTECT && + !(pmd_val(*pmd) & _SEGMENT_ENTRY_PROTECT)) { + unprot = (u64)*table; + unprot &= ~_SEGMENT_ENTRY_PROTECT; + unprot |= _SEGMENT_ENTRY_GMAP_UC; + gmap_pmdp_xchg(gmap, (pmd_t *)table, __pmd(unprot), gaddr); + } spin_unlock(&gmap->guest_table_lock); spin_unlock(ptl); radix_tree_preload_end(); @@ -690,6 +708,12 @@ void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to) vmaddr |= gaddr & ~PMD_MASK; /* Find vma in the parent mm */ vma = find_vma(gmap->mm, vmaddr); + /* + * We do not discard pages that are backed by + * hugetlbfs, so we don't have to refault them. + */ + if (vma && is_vm_hugetlb_page(vma)) + continue; size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK)); zap_page_range(vma, vmaddr, size); } @@ -864,7 +888,128 @@ static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr, */ static void gmap_pte_op_end(spinlock_t *ptl) { - spin_unlock(ptl); + if (ptl) + spin_unlock(ptl); +} + +/** + * gmap_pmd_op_walk - walk the gmap tables, get the guest table lock + * and return the pmd pointer + * @gmap: pointer to guest mapping meta data structure + * @gaddr: virtual address in the guest address space + * + * Returns a pointer to the pmd for a guest address, or NULL + */ +static inline pmd_t *gmap_pmd_op_walk(struct gmap *gmap, unsigned long gaddr) +{ + pmd_t *pmdp; + + BUG_ON(gmap_is_shadow(gmap)); + spin_lock(&gmap->guest_table_lock); + pmdp = (pmd_t *) gmap_table_walk(gmap, gaddr, 1); + + if (!pmdp || pmd_none(*pmdp)) { + spin_unlock(&gmap->guest_table_lock); + return NULL; + } + + /* 4k page table entries are locked via the pte (pte_alloc_map_lock). */ + if (!pmd_large(*pmdp)) + spin_unlock(&gmap->guest_table_lock); + return pmdp; +} + +/** + * gmap_pmd_op_end - release the guest_table_lock if needed + * @gmap: pointer to the guest mapping meta data structure + * @pmdp: pointer to the pmd + */ +static inline void gmap_pmd_op_end(struct gmap *gmap, pmd_t *pmdp) +{ + if (pmd_large(*pmdp)) + spin_unlock(&gmap->guest_table_lock); +} + +/* + * gmap_protect_pmd - remove access rights to memory and set pmd notification bits + * @pmdp: pointer to the pmd to be protected + * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE + * @bits: notification bits to set + * + * Returns: + * 0 if successfully protected + * -EAGAIN if a fixup is needed + * -EINVAL if unsupported notifier bits have been specified + * + * Expected to be called with sg->mm->mmap_sem in read and + * guest_table_lock held. + */ +static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr, + pmd_t *pmdp, int prot, unsigned long bits) +{ + int pmd_i = pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID; + int pmd_p = pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT; + pmd_t new = *pmdp; + + /* Fixup needed */ + if ((pmd_i && (prot != PROT_NONE)) || (pmd_p && (prot == PROT_WRITE))) + return -EAGAIN; + + if (prot == PROT_NONE && !pmd_i) { + pmd_val(new) |= _SEGMENT_ENTRY_INVALID; + gmap_pmdp_xchg(gmap, pmdp, new, gaddr); + } + + if (prot == PROT_READ && !pmd_p) { + pmd_val(new) &= ~_SEGMENT_ENTRY_INVALID; + pmd_val(new) |= _SEGMENT_ENTRY_PROTECT; + gmap_pmdp_xchg(gmap, pmdp, new, gaddr); + } + + if (bits & GMAP_NOTIFY_MPROT) + pmd_val(*pmdp) |= _SEGMENT_ENTRY_GMAP_IN; + + /* Shadow GMAP protection needs split PMDs */ + if (bits & GMAP_NOTIFY_SHADOW) + return -EINVAL; + + return 0; +} + +/* + * gmap_protect_pte - remove access rights to memory and set pgste bits + * @gmap: pointer to guest mapping meta data structure + * @gaddr: virtual address in the guest address space + * @pmdp: pointer to the pmd associated with the pte + * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE + * @bits: notification bits to set + * + * Returns 0 if successfully protected, -ENOMEM if out of memory and + * -EAGAIN if a fixup is needed. + * + * Expected to be called with sg->mm->mmap_sem in read + */ +static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr, + pmd_t *pmdp, int prot, unsigned long bits) +{ + int rc; + pte_t *ptep; + spinlock_t *ptl = NULL; + unsigned long pbits = 0; + + if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID) + return -EAGAIN; + + ptep = pte_alloc_map_lock(gmap->mm, pmdp, gaddr, &ptl); + if (!ptep) + return -ENOMEM; + + pbits |= (bits & GMAP_NOTIFY_MPROT) ? PGSTE_IN_BIT : 0; + pbits |= (bits & GMAP_NOTIFY_SHADOW) ? PGSTE_VSIE_BIT : 0; + /* Protect and unlock. */ + rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, pbits); + gmap_pte_op_end(ptl); + return rc; } /* @@ -883,30 +1028,45 @@ static void gmap_pte_op_end(spinlock_t *ptl) static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr, unsigned long len, int prot, unsigned long bits) { - unsigned long vmaddr; - spinlock_t *ptl; - pte_t *ptep; + unsigned long vmaddr, dist; + pmd_t *pmdp; int rc; BUG_ON(gmap_is_shadow(gmap)); while (len) { rc = -EAGAIN; - ptep = gmap_pte_op_walk(gmap, gaddr, &ptl); - if (ptep) { - rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, bits); - gmap_pte_op_end(ptl); + pmdp = gmap_pmd_op_walk(gmap, gaddr); + if (pmdp) { + if (!pmd_large(*pmdp)) { + rc = gmap_protect_pte(gmap, gaddr, pmdp, prot, + bits); + if (!rc) { + len -= PAGE_SIZE; + gaddr += PAGE_SIZE; + } + } else { + rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot, + bits); + if (!rc) { + dist = HPAGE_SIZE - (gaddr & ~HPAGE_MASK); + len = len < dist ? 0 : len - dist; + gaddr = (gaddr & HPAGE_MASK) + HPAGE_SIZE; + } + } + gmap_pmd_op_end(gmap, pmdp); } if (rc) { + if (rc == -EINVAL) + return rc; + + /* -EAGAIN, fixup of userspace mm and gmap */ vmaddr = __gmap_translate(gmap, gaddr); if (IS_ERR_VALUE(vmaddr)) return vmaddr; rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, prot); if (rc) return rc; - continue; } - gaddr += PAGE_SIZE; - len -= PAGE_SIZE; } return 0; } @@ -935,7 +1095,7 @@ int gmap_mprotect_notify(struct gmap *gmap, unsigned long gaddr, if (!MACHINE_HAS_ESOP && prot == PROT_READ) return -EINVAL; down_read(&gmap->mm->mmap_sem); - rc = gmap_protect_range(gmap, gaddr, len, prot, PGSTE_IN_BIT); + rc = gmap_protect_range(gmap, gaddr, len, prot, GMAP_NOTIFY_MPROT); up_read(&gmap->mm->mmap_sem); return rc; } @@ -1474,6 +1634,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, unsigned long limit; int rc; + BUG_ON(parent->mm->context.allow_gmap_hpage_1m); BUG_ON(gmap_is_shadow(parent)); spin_lock(&parent->shadow_lock); sg = gmap_find_shadow(parent, asce, edat_level); @@ -1526,7 +1687,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, down_read(&parent->mm->mmap_sem); rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN, ((asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE, - PROT_READ, PGSTE_VSIE_BIT); + PROT_READ, GMAP_NOTIFY_SHADOW); up_read(&parent->mm->mmap_sem); spin_lock(&parent->shadow_lock); new->initialized = true; @@ -2092,6 +2253,225 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, } EXPORT_SYMBOL_GPL(ptep_notify); +static void pmdp_notify_gmap(struct gmap *gmap, pmd_t *pmdp, + unsigned long gaddr) +{ + pmd_val(*pmdp) &= ~_SEGMENT_ENTRY_GMAP_IN; + gmap_call_notifier(gmap, gaddr, gaddr + HPAGE_SIZE - 1); +} + +/** + * gmap_pmdp_xchg - exchange a gmap pmd with another + * @gmap: pointer to the guest address space structure + * @pmdp: pointer to the pmd entry + * @new: replacement entry + * @gaddr: the affected guest address + * + * This function is assumed to be called with the guest_table_lock + * held. + */ +static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new, + unsigned long gaddr) +{ + gaddr &= HPAGE_MASK; + pmdp_notify_gmap(gmap, pmdp, gaddr); + pmd_val(new) &= ~_SEGMENT_ENTRY_GMAP_IN; + if (MACHINE_HAS_TLB_GUEST) + __pmdp_idte(gaddr, (pmd_t *)pmdp, IDTE_GUEST_ASCE, gmap->asce, + IDTE_GLOBAL); + else if (MACHINE_HAS_IDTE) + __pmdp_idte(gaddr, (pmd_t *)pmdp, 0, 0, IDTE_GLOBAL); + else + __pmdp_csp(pmdp); + *pmdp = new; +} + +static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr, + int purge) +{ + pmd_t *pmdp; + struct gmap *gmap; + unsigned long gaddr; + + rcu_read_lock(); + list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { + spin_lock(&gmap->guest_table_lock); + pmdp = (pmd_t *)radix_tree_delete(&gmap->host_to_guest, + vmaddr >> PMD_SHIFT); + if (pmdp) { + gaddr = __gmap_segment_gaddr((unsigned long *)pmdp); + pmdp_notify_gmap(gmap, pmdp, gaddr); + WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | + _SEGMENT_ENTRY_GMAP_UC)); + if (purge) + __pmdp_csp(pmdp); + pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; + } + spin_unlock(&gmap->guest_table_lock); + } + rcu_read_unlock(); +} + +/** + * gmap_pmdp_invalidate - invalidate all affected guest pmd entries without + * flushing + * @mm: pointer to the process mm_struct + * @vmaddr: virtual address in the process address space + */ +void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr) +{ + gmap_pmdp_clear(mm, vmaddr, 0); +} +EXPORT_SYMBOL_GPL(gmap_pmdp_invalidate); + +/** + * gmap_pmdp_csp - csp all affected guest pmd entries + * @mm: pointer to the process mm_struct + * @vmaddr: virtual address in the process address space + */ +void gmap_pmdp_csp(struct mm_struct *mm, unsigned long vmaddr) +{ + gmap_pmdp_clear(mm, vmaddr, 1); +} +EXPORT_SYMBOL_GPL(gmap_pmdp_csp); + +/** + * gmap_pmdp_idte_local - invalidate and clear a guest pmd entry + * @mm: pointer to the process mm_struct + * @vmaddr: virtual address in the process address space + */ +void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr) +{ + unsigned long *entry, gaddr; + struct gmap *gmap; + pmd_t *pmdp; + + rcu_read_lock(); + list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { + spin_lock(&gmap->guest_table_lock); + entry = radix_tree_delete(&gmap->host_to_guest, + vmaddr >> PMD_SHIFT); + if (entry) { + pmdp = (pmd_t *)entry; + gaddr = __gmap_segment_gaddr(entry); + pmdp_notify_gmap(gmap, pmdp, gaddr); + WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | + _SEGMENT_ENTRY_GMAP_UC)); + if (MACHINE_HAS_TLB_GUEST) + __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, + gmap->asce, IDTE_LOCAL); + else if (MACHINE_HAS_IDTE) + __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_LOCAL); + *entry = _SEGMENT_ENTRY_EMPTY; + } + spin_unlock(&gmap->guest_table_lock); + } + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(gmap_pmdp_idte_local); + +/** + * gmap_pmdp_idte_global - invalidate and clear a guest pmd entry + * @mm: pointer to the process mm_struct + * @vmaddr: virtual address in the process address space + */ +void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr) +{ + unsigned long *entry, gaddr; + struct gmap *gmap; + pmd_t *pmdp; + + rcu_read_lock(); + list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { + spin_lock(&gmap->guest_table_lock); + entry = radix_tree_delete(&gmap->host_to_guest, + vmaddr >> PMD_SHIFT); + if (entry) { + pmdp = (pmd_t *)entry; + gaddr = __gmap_segment_gaddr(entry); + pmdp_notify_gmap(gmap, pmdp, gaddr); + WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | + _SEGMENT_ENTRY_GMAP_UC)); + if (MACHINE_HAS_TLB_GUEST) + __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, + gmap->asce, IDTE_GLOBAL); + else if (MACHINE_HAS_IDTE) + __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_GLOBAL); + else + __pmdp_csp(pmdp); + *entry = _SEGMENT_ENTRY_EMPTY; + } + spin_unlock(&gmap->guest_table_lock); + } + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(gmap_pmdp_idte_global); + +/** + * gmap_test_and_clear_dirty_pmd - test and reset segment dirty status + * @gmap: pointer to guest address space + * @pmdp: pointer to the pmd to be tested + * @gaddr: virtual address in the guest address space + * + * This function is assumed to be called with the guest_table_lock + * held. + */ +bool gmap_test_and_clear_dirty_pmd(struct gmap *gmap, pmd_t *pmdp, + unsigned long gaddr) +{ + if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID) + return false; + + /* Already protected memory, which did not change is clean */ + if (pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT && + !(pmd_val(*pmdp) & _SEGMENT_ENTRY_GMAP_UC)) + return false; + + /* Clear UC indication and reset protection */ + pmd_val(*pmdp) &= ~_SEGMENT_ENTRY_GMAP_UC; + gmap_protect_pmd(gmap, gaddr, pmdp, PROT_READ, 0); + return true; +} + +/** + * gmap_sync_dirty_log_pmd - set bitmap based on dirty status of segment + * @gmap: pointer to guest address space + * @bitmap: dirty bitmap for this pmd + * @gaddr: virtual address in the guest address space + * @vmaddr: virtual address in the host address space + * + * This function is assumed to be called with the guest_table_lock + * held. + */ +void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4], + unsigned long gaddr, unsigned long vmaddr) +{ + int i; + pmd_t *pmdp; + pte_t *ptep; + spinlock_t *ptl; + + pmdp = gmap_pmd_op_walk(gmap, gaddr); + if (!pmdp) + return; + + if (pmd_large(*pmdp)) { + if (gmap_test_and_clear_dirty_pmd(gmap, pmdp, gaddr)) + bitmap_fill(bitmap, _PAGE_ENTRIES); + } else { + for (i = 0; i < _PAGE_ENTRIES; i++, vmaddr += PAGE_SIZE) { + ptep = pte_alloc_map_lock(gmap->mm, pmdp, vmaddr, &ptl); + if (!ptep) + continue; + if (ptep_test_and_clear_uc(gmap->mm, vmaddr, ptep)) + set_bit(i, bitmap); + spin_unlock(ptl); + } + } + gmap_pmd_op_end(gmap, pmdp); +} +EXPORT_SYMBOL_GPL(gmap_sync_dirty_log_pmd); + static inline void thp_split_mm(struct mm_struct *mm) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -2168,17 +2548,45 @@ EXPORT_SYMBOL_GPL(s390_enable_sie); * Enable storage key handling from now on and initialize the storage * keys with the default key. */ -static int __s390_enable_skey(pte_t *pte, unsigned long addr, - unsigned long next, struct mm_walk *walk) +static int __s390_enable_skey_pte(pte_t *pte, unsigned long addr, + unsigned long next, struct mm_walk *walk) { /* Clear storage key */ ptep_zap_key(walk->mm, addr, pte); return 0; } +static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr, + unsigned long hmask, unsigned long next, + struct mm_walk *walk) +{ + pmd_t *pmd = (pmd_t *)pte; + unsigned long start, end; + struct page *page = pmd_page(*pmd); + + /* + * The write check makes sure we do not set a key on shared + * memory. This is needed as the walker does not differentiate + * between actual guest memory and the process executable or + * shared libraries. + */ + if (pmd_val(*pmd) & _SEGMENT_ENTRY_INVALID || + !(pmd_val(*pmd) & _SEGMENT_ENTRY_WRITE)) + return 0; + + start = pmd_val(*pmd) & HPAGE_MASK; + end = start + HPAGE_SIZE - 1; + __storage_key_init_range(start, end); + set_bit(PG_arch_1, &page->flags); + return 0; +} + int s390_enable_skey(void) { - struct mm_walk walk = { .pte_entry = __s390_enable_skey }; + struct mm_walk walk = { + .hugetlb_entry = __s390_enable_skey_hugetlb, + .pte_entry = __s390_enable_skey_pte, + }; struct mm_struct *mm = current->mm; struct vm_area_struct *vma; int rc = 0; diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index e804090f4470fc..b0246c705a192a 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -123,6 +123,29 @@ static inline pte_t __rste_to_pte(unsigned long rste) return pte; } +static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste) +{ + struct page *page; + unsigned long size, paddr; + + if (!mm_uses_skeys(mm) || + rste & _SEGMENT_ENTRY_INVALID) + return; + + if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) { + page = pud_page(__pud(rste)); + size = PUD_SIZE; + paddr = rste & PUD_MASK; + } else { + page = pmd_page(__pmd(rste)); + size = PMD_SIZE; + paddr = rste & PMD_MASK; + } + + if (!test_and_set_bit(PG_arch_1, &page->flags)) + __storage_key_init_range(paddr, paddr + size - 1); +} + void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { @@ -137,6 +160,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, rste |= _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE; else rste |= _SEGMENT_ENTRY_LARGE; + clear_huge_pte_skeys(mm, rste); pte_val(*ptep) = rste; } diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c index 382153ff17e30e..dc3cede7f2ec9d 100644 --- a/arch/s390/mm/page-states.c +++ b/arch/s390/mm/page-states.c @@ -271,7 +271,7 @@ void arch_set_page_states(int make_stable) list_for_each(l, &zone->free_area[order].free_list[t]) { page = list_entry(l, struct page, lru); if (make_stable) - set_page_stable_dat(page, 0); + set_page_stable_dat(page, order); else set_page_unused(page, order); } diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index c44171588d0814..f8c6faab41f4b3 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -14,7 +14,7 @@ static inline unsigned long sske_frame(unsigned long addr, unsigned char skey) { - asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],9,0" + asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],1,0" : [addr] "+a" (addr) : [skey] "d" (skey)); return addr; } @@ -23,8 +23,6 @@ void __storage_key_init_range(unsigned long start, unsigned long end) { unsigned long boundary, size; - if (!PAGE_DEFAULT_KEY) - return; while (start < end) { if (MACHINE_HAS_EDAT1) { /* set storage keys for a 1MB frame */ @@ -37,7 +35,7 @@ void __storage_key_init_range(unsigned long start, unsigned long end) continue; } } - page_set_storage_key(start, PAGE_DEFAULT_KEY, 0); + page_set_storage_key(start, PAGE_DEFAULT_KEY, 1); start += PAGE_SIZE; } } diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index e3bd5627afef34..76d89ee8b42883 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -28,7 +28,7 @@ static struct ctl_table page_table_sysctl[] = { .data = &page_table_allocate_pgste, .maxlen = sizeof(int), .mode = S_IRUGO | S_IWUSR, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, .extra1 = &page_table_allocate_pgste_min, .extra2 = &page_table_allocate_pgste_max, }, diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 301e466e4263d8..f2cc7da473e4ed 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -347,18 +347,27 @@ static inline void pmdp_idte_local(struct mm_struct *mm, mm->context.asce, IDTE_LOCAL); else __pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL); + if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) + gmap_pmdp_idte_local(mm, addr); } static inline void pmdp_idte_global(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { - if (MACHINE_HAS_TLB_GUEST) + if (MACHINE_HAS_TLB_GUEST) { __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE, mm->context.asce, IDTE_GLOBAL); - else if (MACHINE_HAS_IDTE) + if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) + gmap_pmdp_idte_global(mm, addr); + } else if (MACHINE_HAS_IDTE) { __pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL); - else + if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) + gmap_pmdp_idte_global(mm, addr); + } else { __pmdp_csp(pmdp); + if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) + gmap_pmdp_csp(mm, addr); + } } static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, @@ -392,6 +401,8 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, cpumask_of(smp_processor_id()))) { pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; mm->context.flush_mm = 1; + if (mm_has_pgste(mm)) + gmap_pmdp_invalidate(mm, addr); } else { pmdp_idte_global(mm, addr, pmdp); } @@ -399,6 +410,24 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, return old; } +static pmd_t *pmd_alloc_map(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + pgd = pgd_offset(mm, addr); + p4d = p4d_alloc(mm, pgd, addr); + if (!p4d) + return NULL; + pud = pud_alloc(mm, p4d, addr); + if (!pud) + return NULL; + pmd = pmd_alloc(mm, pud, addr); + return pmd; +} + pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t new) { @@ -693,40 +722,14 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) /* * Test and reset if a guest page is dirty */ -bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) +bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) { - spinlock_t *ptl; - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; pgste_t pgste; - pte_t *ptep; pte_t pte; bool dirty; int nodat; - pgd = pgd_offset(mm, addr); - p4d = p4d_alloc(mm, pgd, addr); - if (!p4d) - return false; - pud = pud_alloc(mm, p4d, addr); - if (!pud) - return false; - pmd = pmd_alloc(mm, pud, addr); - if (!pmd) - return false; - /* We can't run guests backed by huge pages, but userspace can - * still set them up and then try to migrate them without any - * migration support. - */ - if (pmd_large(*pmd)) - return true; - - ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl); - if (unlikely(!ptep)) - return false; - pgste = pgste_get_lock(ptep); dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); pgste_val(pgste) &= ~PGSTE_UC_BIT; @@ -742,21 +745,43 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) *ptep = pte; } pgste_set_unlock(ptep, pgste); - - spin_unlock(ptl); return dirty; } -EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty); +EXPORT_SYMBOL_GPL(ptep_test_and_clear_uc); int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned char key, bool nq) { - unsigned long keyul; + unsigned long keyul, paddr; spinlock_t *ptl; pgste_t old, new; + pmd_t *pmdp; pte_t *ptep; - ptep = get_locked_pte(mm, addr, &ptl); + pmdp = pmd_alloc_map(mm, addr); + if (unlikely(!pmdp)) + return -EFAULT; + + ptl = pmd_lock(mm, pmdp); + if (!pmd_present(*pmdp)) { + spin_unlock(ptl); + return -EFAULT; + } + + if (pmd_large(*pmdp)) { + paddr = pmd_val(*pmdp) & HPAGE_MASK; + paddr |= addr & ~HPAGE_MASK; + /* + * Huge pmds need quiescing operations, they are + * always mapped. + */ + page_set_storage_key(paddr, key, 1); + spin_unlock(ptl); + return 0; + } + spin_unlock(ptl); + + ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl); if (unlikely(!ptep)) return -EFAULT; @@ -767,14 +792,14 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; if (!(pte_val(*ptep) & _PAGE_INVALID)) { - unsigned long address, bits, skey; + unsigned long bits, skey; - address = pte_val(*ptep) & PAGE_MASK; - skey = (unsigned long) page_get_storage_key(address); + paddr = pte_val(*ptep) & PAGE_MASK; + skey = (unsigned long) page_get_storage_key(paddr); bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT); /* Set storage key ACC and FP */ - page_set_storage_key(address, skey, !nq); + page_set_storage_key(paddr, skey, !nq); /* Merge host changed & referenced into pgste */ pgste_val(new) |= bits << 52; } @@ -830,11 +855,32 @@ EXPORT_SYMBOL(cond_set_guest_storage_key); int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr) { spinlock_t *ptl; + unsigned long paddr; pgste_t old, new; + pmd_t *pmdp; pte_t *ptep; int cc = 0; - ptep = get_locked_pte(mm, addr, &ptl); + pmdp = pmd_alloc_map(mm, addr); + if (unlikely(!pmdp)) + return -EFAULT; + + ptl = pmd_lock(mm, pmdp); + if (!pmd_present(*pmdp)) { + spin_unlock(ptl); + return -EFAULT; + } + + if (pmd_large(*pmdp)) { + paddr = pmd_val(*pmdp) & HPAGE_MASK; + paddr |= addr & ~HPAGE_MASK; + cc = page_reset_referenced(paddr); + spin_unlock(ptl); + return cc; + } + spin_unlock(ptl); + + ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl); if (unlikely(!ptep)) return -EFAULT; @@ -843,7 +889,8 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr) pgste_val(new) &= ~PGSTE_GR_BIT; if (!(pte_val(*ptep) & _PAGE_INVALID)) { - cc = page_reset_referenced(pte_val(*ptep) & PAGE_MASK); + paddr = pte_val(*ptep) & PAGE_MASK; + cc = page_reset_referenced(paddr); /* Merge real referenced bit into host-set */ pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT; } @@ -862,18 +909,42 @@ EXPORT_SYMBOL(reset_guest_reference_bit); int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned char *key) { + unsigned long paddr; spinlock_t *ptl; pgste_t pgste; + pmd_t *pmdp; pte_t *ptep; - ptep = get_locked_pte(mm, addr, &ptl); + pmdp = pmd_alloc_map(mm, addr); + if (unlikely(!pmdp)) + return -EFAULT; + + ptl = pmd_lock(mm, pmdp); + if (!pmd_present(*pmdp)) { + /* Not yet mapped memory has a zero key */ + spin_unlock(ptl); + *key = 0; + return 0; + } + + if (pmd_large(*pmdp)) { + paddr = pmd_val(*pmdp) & HPAGE_MASK; + paddr |= addr & ~HPAGE_MASK; + *key = page_get_storage_key(paddr); + spin_unlock(ptl); + return 0; + } + spin_unlock(ptl); + + ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl); if (unlikely(!ptep)) return -EFAULT; pgste = pgste_get_lock(ptep); *key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; + paddr = pte_val(*ptep) & PAGE_MASK; if (!(pte_val(*ptep) & _PAGE_INVALID)) - *key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK); + *key = page_get_storage_key(paddr); /* Reflect guest's logical view, not physical */ *key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; pgste_set_unlock(ptep, pgste); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 5f0234ec8038eb..d7052cbe984f81 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -485,8 +485,6 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) /* br %r1 */ _EMIT2(0x07f1); } else { - /* larl %r1,.+14 */ - EMIT6_PCREL_RILB(0xc0000000, REG_1, jit->prg + 14); /* ex 0,S390_lowcore.br_r1_tampoline */ EMIT4_DISP(0x44000000, REG_0, REG_0, offsetof(struct lowcore, br_r1_trampoline)); diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c index 06a80434cfe63f..5bd374491f9461 100644 --- a/arch/s390/numa/numa.c +++ b/arch/s390/numa/numa.c @@ -134,26 +134,14 @@ void __init numa_setup(void) { pr_info("NUMA mode: %s\n", mode->name); nodes_clear(node_possible_map); + /* Initially attach all possible CPUs to node 0. */ + cpumask_copy(&node_to_cpumask_map[0], cpu_possible_mask); if (mode->setup) mode->setup(); numa_setup_memory(); memblock_dump_all(); } -/* - * numa_init_early() - Initialization initcall - * - * This runs when only one CPU is online and before the first - * topology update is called for by the scheduler. - */ -static int __init numa_init_early(void) -{ - /* Attach all possible CPUs to node 0 for now. */ - cpumask_copy(&node_to_cpumask_map[0], cpu_possible_mask); - return 0; -} -early_initcall(numa_init_early); - /* * numa_init_late() - Initialization initcall * diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c index b482e95b6249e3..57f7cdac70a311 100644 --- a/arch/s390/pci/pci_debug.c +++ b/arch/s390/pci/pci_debug.c @@ -48,6 +48,10 @@ static char *pci_fmt2_names[] = { "Maximum work units", }; +static char *pci_fmt3_names[] = { + "Transmitted bytes", +}; + static char *pci_sw_names[] = { "Allocated pages", "Mapped pages", @@ -112,6 +116,10 @@ static int pci_perf_show(struct seq_file *m, void *v) pci_fmb_show(m, pci_fmt2_names, ARRAY_SIZE(pci_fmt2_names), &zdev->fmb->fmt2.consumed_work_units); break; + case 3: + pci_fmb_show(m, pci_fmt3_names, ARRAY_SIZE(pci_fmt3_names), + &zdev->fmb->fmt3.tx_bytes); + break; default: seq_puts(m, "Unknown format\n"); } diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile index 1ace023cbdcec3..8d61218a71aa87 100644 --- a/arch/s390/purgatory/Makefile +++ b/arch/s390/purgatory/Makefile @@ -7,13 +7,13 @@ purgatory-y := head.o purgatory.o string.o sha256.o mem.o targets += $(purgatory-y) purgatory.ro kexec-purgatory.c PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) -$(obj)/sha256.o: $(srctree)/lib/sha256.c +$(obj)/sha256.o: $(srctree)/lib/sha256.c FORCE $(call if_changed_rule,cc_o_c) -$(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S +$(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE $(call if_changed_rule,as_o_S) -$(obj)/string.o: $(srctree)/arch/s390/lib/string.c +$(obj)/string.o: $(srctree)/arch/s390/lib/string.c FORCE $(call if_changed_rule,cc_o_c) LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib @@ -21,8 +21,9 @@ LDFLAGS_purgatory.ro += -z nodefaultlib KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding -KBUILD_CFLAGS += -c -MD -Os -m64 -msoft-float +KBUILD_CFLAGS += -c -MD -Os -m64 -msoft-float -fno-common KBUILD_CFLAGS += $(call cc-option,-fno-PIE) +KBUILD_AFLAGS := $(filter-out -DCC_USING_EXPOLINE,$(KBUILD_AFLAGS)) $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE $(call if_changed,ld) diff --git a/arch/s390/purgatory/head.S b/arch/s390/purgatory/head.S index 660c96a05a9b64..2e3707b12eddbb 100644 --- a/arch/s390/purgatory/head.S +++ b/arch/s390/purgatory/head.S @@ -243,33 +243,26 @@ gprregs: .quad 0 .endr -purgatory_sha256_digest: - .global purgatory_sha256_digest - .rept 32 /* SHA256_DIGEST_SIZE */ - .byte 0 - .endr - -purgatory_sha_regions: - .global purgatory_sha_regions - .rept 16 * __KEXEC_SHA_REGION_SIZE /* KEXEC_SEGMENTS_MAX */ - .byte 0 - .endr - -kernel_entry: - .global kernel_entry - .quad 0 - -kernel_type: - .global kernel_type - .quad 0 - -crash_start: - .global crash_start - .quad 0 +/* Macro to define a global variable with name and size (in bytes) to be + * shared with C code. + * + * Add the .size and .type attribute to satisfy checks on the Elf_Sym during + * purgatory load. + */ +.macro GLOBAL_VARIABLE name,size +\name: + .global \name + .size \name,\size + .type \name,object + .skip \size,0 +.endm -crash_size: - .global crash_size - .quad 0 +GLOBAL_VARIABLE purgatory_sha256_digest,32 +GLOBAL_VARIABLE purgatory_sha_regions,16*__KEXEC_SHA_REGION_SIZE +GLOBAL_VARIABLE kernel_entry,8 +GLOBAL_VARIABLE kernel_type,8 +GLOBAL_VARIABLE crash_start,8 +GLOBAL_VARIABLE crash_size,8 .align PAGE_SIZE stack: diff --git a/arch/s390/purgatory/purgatory.c b/arch/s390/purgatory/purgatory.c index 4e2beb3c29b797..3528e6da4e8792 100644 --- a/arch/s390/purgatory/purgatory.c +++ b/arch/s390/purgatory/purgatory.c @@ -12,15 +12,6 @@ #include #include -struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX]; -u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE]; - -u64 kernel_entry; -u64 kernel_type; - -u64 crash_start; -u64 crash_size; - int verify_sha256_digest(void) { struct kexec_sha_region *ptr, *end; diff --git a/arch/s390/scripts/Makefile.chkbss b/arch/s390/scripts/Makefile.chkbss index d92f2d94a5d94f..9bba2c14e0cafb 100644 --- a/arch/s390/scripts/Makefile.chkbss +++ b/arch/s390/scripts/Makefile.chkbss @@ -2,13 +2,22 @@ quiet_cmd_chkbss = CHKBSS $< define cmd_chkbss + rm -f $@; \ if ! $(OBJDUMP) -j .bss -w -h $< | awk 'END { if ($$3) exit 1 }'; then \ echo "error: $< .bss section is not empty" >&2; exit 1; \ fi; \ touch $@; endef -$(obj)/built-in.a: $(patsubst %, $(obj)/%.chkbss, $(chkbss)) +chkbss-target ?= $(obj)/built-in.a +ifneq (,$(findstring /,$(chkbss))) +chkbss-files := $(patsubst %, %.chkbss, $(chkbss)) +else +chkbss-files := $(patsubst %, $(obj)/%.chkbss, $(chkbss)) +endif + +$(chkbss-target): $(chkbss-files) +targets += $(notdir $(chkbss-files)) %.o.chkbss: %.o $(call cmd,chkbss) diff --git a/arch/s390/tools/gen_opcode_table.c b/arch/s390/tools/gen_opcode_table.c index 259aa0680d1a74..a1bc02b29c8138 100644 --- a/arch/s390/tools/gen_opcode_table.c +++ b/arch/s390/tools/gen_opcode_table.c @@ -257,7 +257,7 @@ static void add_to_group(struct gen_opcode *desc, struct insn *insn, int offset) if (!desc->group) exit(EXIT_FAILURE); group = &desc->group[desc->nr_groups - 1]; - strncpy(group->opcode, insn->opcode, 2); + memcpy(group->opcode, insn->opcode, 2); group->type = insn->type; group->offset = offset; group->count = 1; @@ -283,7 +283,7 @@ static void print_opcode_table(struct gen_opcode *desc) continue; add_to_group(desc, insn, offset); if (strncmp(opcode, insn->opcode, 2)) { - strncpy(opcode, insn->opcode, 2); + memcpy(opcode, insn->opcode, 2); printf("\t/* %.2s */ \\\n", opcode); } print_opcode(insn, offset); diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index a9f60d0ee02ea2..a23e7d394a0ad1 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -73,8 +73,8 @@ static int dasd_alloc_queue(struct dasd_block *); static void dasd_setup_queue(struct dasd_block *); static void dasd_free_queue(struct dasd_block *); static int dasd_flush_block_queue(struct dasd_block *); -static void dasd_device_tasklet(struct dasd_device *); -static void dasd_block_tasklet(struct dasd_block *); +static void dasd_device_tasklet(unsigned long); +static void dasd_block_tasklet(unsigned long); static void do_kick_device(struct work_struct *); static void do_restore_device(struct work_struct *); static void do_reload_device(struct work_struct *); @@ -125,8 +125,7 @@ struct dasd_device *dasd_alloc_device(void) dasd_init_chunklist(&device->erp_chunks, device->erp_mem, PAGE_SIZE); spin_lock_init(&device->mem_lock); atomic_set(&device->tasklet_scheduled, 0); - tasklet_init(&device->tasklet, - (void (*)(unsigned long)) dasd_device_tasklet, + tasklet_init(&device->tasklet, dasd_device_tasklet, (unsigned long) device); INIT_LIST_HEAD(&device->ccw_queue); timer_setup(&device->timer, dasd_device_timeout, 0); @@ -166,8 +165,7 @@ struct dasd_block *dasd_alloc_block(void) atomic_set(&block->open_count, -1); atomic_set(&block->tasklet_scheduled, 0); - tasklet_init(&block->tasklet, - (void (*)(unsigned long)) dasd_block_tasklet, + tasklet_init(&block->tasklet, dasd_block_tasklet, (unsigned long) block); INIT_LIST_HEAD(&block->ccw_queue); spin_lock_init(&block->queue_lock); @@ -2064,8 +2062,9 @@ EXPORT_SYMBOL_GPL(dasd_flush_device_queue); /* * Acquire the device lock and process queues for the device. */ -static void dasd_device_tasklet(struct dasd_device *device) +static void dasd_device_tasklet(unsigned long data) { + struct dasd_device *device = (struct dasd_device *) data; struct list_head final_queue; atomic_set (&device->tasklet_scheduled, 0); @@ -2783,8 +2782,9 @@ static void __dasd_block_start_head(struct dasd_block *block) * block layer request queue, creates ccw requests, enqueues them on * a dasd_device and processes ccw requests that have been returned. */ -static void dasd_block_tasklet(struct dasd_block *block) +static void dasd_block_tasklet(unsigned long data) { + struct dasd_block *block = (struct dasd_block *) data; struct list_head final_queue; struct list_head *l, *n; struct dasd_ccw_req *cqr; @@ -3127,6 +3127,7 @@ static int dasd_alloc_queue(struct dasd_block *block) block->tag_set.nr_hw_queues = nr_hw_queues; block->tag_set.queue_depth = queue_depth; block->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + block->tag_set.numa_node = NUMA_NO_NODE; rc = blk_mq_alloc_tag_set(&block->tag_set); if (rc) diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c index e36a114354fc36..b9ce93e9df8929 100644 --- a/drivers/s390/block/dasd_alias.c +++ b/drivers/s390/block/dasd_alias.c @@ -708,7 +708,7 @@ static int reset_summary_unit_check(struct alias_lcu *lcu, struct ccw1 *ccw; cqr = lcu->rsu_cqr; - strncpy((char *) &cqr->magic, "ECKD", 4); + memcpy((char *) &cqr->magic, "ECKD", 4); ASCEBC((char *) &cqr->magic, 4); ccw = cqr->cpaddr; ccw->cmd_code = DASD_ECKD_CCW_RSCK; diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index b9ebb565ee2c70..fab35c6170cc81 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -426,7 +426,7 @@ dasd_add_busid(const char *bus_id, int features) if (!devmap) { /* This bus_id is new. */ new->devindex = dasd_max_devindex++; - strncpy(new->bus_id, bus_id, DASD_BUS_ID_SIZE); + strlcpy(new->bus_id, bus_id, DASD_BUS_ID_SIZE); new->features = features; new->device = NULL; list_add(&new->list, &dasd_hashlists[hash]); diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index bbf95b78ef5d9e..4e7b55a14b1a46 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1780,6 +1780,9 @@ static void dasd_eckd_uncheck_device(struct dasd_device *device) struct dasd_eckd_private *private = device->private; int i; + if (!private) + return; + dasd_alias_disconnect_device_from_lcu(device); private->ned = NULL; private->sneq = NULL; @@ -2035,8 +2038,11 @@ static int dasd_eckd_basic_to_ready(struct dasd_device *device) static int dasd_eckd_online_to_ready(struct dasd_device *device) { - cancel_work_sync(&device->reload_device); - cancel_work_sync(&device->kick_validate); + if (cancel_work_sync(&device->reload_device)) + dasd_put_device(device); + if (cancel_work_sync(&device->kick_validate)) + dasd_put_device(device); + return 0; }; @@ -3535,7 +3541,7 @@ static int prepare_itcw(struct itcw *itcw, dcw = itcw_add_dcw(itcw, pfx_cmd, 0, &pfxdata, sizeof(pfxdata), total_data_size); - return PTR_RET(dcw); + return PTR_ERR_OR_ZERO(dcw); } static struct dasd_ccw_req *dasd_eckd_build_cp_tpm_track( diff --git a/drivers/s390/block/dasd_eer.c b/drivers/s390/block/dasd_eer.c index 6ef8714dc69350..93bb09da7fdc4e 100644 --- a/drivers/s390/block/dasd_eer.c +++ b/drivers/s390/block/dasd_eer.c @@ -313,7 +313,7 @@ static void dasd_eer_write_standard_trigger(struct dasd_device *device, ktime_get_real_ts64(&ts); header.tv_sec = ts.tv_sec; header.tv_usec = ts.tv_nsec / NSEC_PER_USEC; - strncpy(header.busid, dev_name(&device->cdev->dev), + strlcpy(header.busid, dev_name(&device->cdev->dev), DASD_EER_BUSID_SIZE); spin_lock_irqsave(&bufferlock, flags); @@ -356,7 +356,7 @@ static void dasd_eer_write_snss_trigger(struct dasd_device *device, ktime_get_real_ts64(&ts); header.tv_sec = ts.tv_sec; header.tv_usec = ts.tv_nsec / NSEC_PER_USEC; - strncpy(header.busid, dev_name(&device->cdev->dev), + strlcpy(header.busid, dev_name(&device->cdev->dev), DASD_EER_BUSID_SIZE); spin_lock_irqsave(&bufferlock, flags); diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c index b1fcb76dd272e0..98f66b7b679451 100644 --- a/drivers/s390/block/scm_blk.c +++ b/drivers/s390/block/scm_blk.c @@ -455,6 +455,7 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev) bdev->tag_set.nr_hw_queues = nr_requests; bdev->tag_set.queue_depth = nr_requests_per_io * nr_requests; bdev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + bdev->tag_set.numa_node = NUMA_NO_NODE; ret = blk_mq_alloc_tag_set(&bdev->tag_set); if (ret) diff --git a/drivers/s390/char/Makefile b/drivers/s390/char/Makefile index 0a4c13e1e76eae..c6ab34f94b1b54 100644 --- a/drivers/s390/char/Makefile +++ b/drivers/s390/char/Makefile @@ -12,11 +12,6 @@ GCOV_PROFILE_sclp_early_core.o := n KCOV_INSTRUMENT_sclp_early_core.o := n UBSAN_SANITIZE_sclp_early_core.o := n -ifneq ($(CC_FLAGS_MARCH),-march=z900) -CFLAGS_REMOVE_sclp_early_core.o += $(CC_FLAGS_MARCH) -CFLAGS_sclp_early_core.o += -march=z900 -endif - CFLAGS_sclp_early_core.o += -D__NO_FORTIFY CFLAGS_REMOVE_sclp_early_core.o += $(CC_FLAGS_EXPOLINE) diff --git a/drivers/s390/char/keyboard.c b/drivers/s390/char/keyboard.c index 79eb60958015a5..bbb3001b0961f3 100644 --- a/drivers/s390/char/keyboard.c +++ b/drivers/s390/char/keyboard.c @@ -334,37 +334,41 @@ do_kdsk_ioctl(struct kbd_data *kbd, struct kbentry __user *user_kbe, int cmd, int perm) { struct kbentry tmp; + unsigned long kb_index, kb_table; ushort *key_map, val, ov; if (copy_from_user(&tmp, user_kbe, sizeof(struct kbentry))) return -EFAULT; + kb_index = (unsigned long) tmp.kb_index; #if NR_KEYS < 256 - if (tmp.kb_index >= NR_KEYS) + if (kb_index >= NR_KEYS) return -EINVAL; #endif + kb_table = (unsigned long) tmp.kb_table; #if MAX_NR_KEYMAPS < 256 - if (tmp.kb_table >= MAX_NR_KEYMAPS) + if (kb_table >= MAX_NR_KEYMAPS) return -EINVAL; + kb_table = array_index_nospec(kb_table , MAX_NR_KEYMAPS); #endif switch (cmd) { case KDGKBENT: - key_map = kbd->key_maps[tmp.kb_table]; + key_map = kbd->key_maps[kb_table]; if (key_map) { - val = U(key_map[tmp.kb_index]); + val = U(key_map[kb_index]); if (KTYP(val) >= KBD_NR_TYPES) val = K_HOLE; } else - val = (tmp.kb_index ? K_HOLE : K_NOSUCHMAP); + val = (kb_index ? K_HOLE : K_NOSUCHMAP); return put_user(val, &user_kbe->kb_value); case KDSKBENT: if (!perm) return -EPERM; - if (!tmp.kb_index && tmp.kb_value == K_NOSUCHMAP) { + if (!kb_index && tmp.kb_value == K_NOSUCHMAP) { /* disallocate map */ - key_map = kbd->key_maps[tmp.kb_table]; + key_map = kbd->key_maps[kb_table]; if (key_map) { - kbd->key_maps[tmp.kb_table] = NULL; + kbd->key_maps[kb_table] = NULL; kfree(key_map); } break; @@ -375,18 +379,18 @@ do_kdsk_ioctl(struct kbd_data *kbd, struct kbentry __user *user_kbe, if (KVAL(tmp.kb_value) > kbd_max_vals[KTYP(tmp.kb_value)]) return -EINVAL; - if (!(key_map = kbd->key_maps[tmp.kb_table])) { + if (!(key_map = kbd->key_maps[kb_table])) { int j; key_map = kmalloc(sizeof(plain_map), GFP_KERNEL); if (!key_map) return -ENOMEM; - kbd->key_maps[tmp.kb_table] = key_map; + kbd->key_maps[kb_table] = key_map; for (j = 0; j < NR_KEYS; j++) key_map[j] = U(K_HOLE); } - ov = U(key_map[tmp.kb_index]); + ov = U(key_map[kb_index]); if (tmp.kb_value == ov) break; /* nothing to do */ /* @@ -395,7 +399,7 @@ do_kdsk_ioctl(struct kbd_data *kbd, struct kbentry __user *user_kbe, if (((ov == K_SAK) || (tmp.kb_value == K_SAK)) && !capable(CAP_SYS_ADMIN)) return -EPERM; - key_map[tmp.kb_index] = U(tmp.kb_value); + key_map[kb_index] = U(tmp.kb_value); break; } return 0; diff --git a/drivers/s390/char/monwriter.c b/drivers/s390/char/monwriter.c index 76c158c4151037..4f1a69c9d81d62 100644 --- a/drivers/s390/char/monwriter.c +++ b/drivers/s390/char/monwriter.c @@ -61,7 +61,7 @@ static int monwrite_diag(struct monwrite_hdr *myhdr, char *buffer, int fcn) struct appldata_product_id id; int rc; - strncpy(id.prod_nr, "LNXAPPL", 7); + memcpy(id.prod_nr, "LNXAPPL", 7); id.prod_fn = myhdr->applid; id.record_nr = myhdr->record_num; id.version_nr = myhdr->version; diff --git a/drivers/s390/char/sclp_async.c b/drivers/s390/char/sclp_async.c index ee6f3b56372831..e69b12a406362f 100644 --- a/drivers/s390/char/sclp_async.c +++ b/drivers/s390/char/sclp_async.c @@ -64,42 +64,18 @@ static struct notifier_block call_home_panic_nb = { .priority = INT_MAX, }; -static int proc_handler_callhome(struct ctl_table *ctl, int write, - void __user *buffer, size_t *count, - loff_t *ppos) -{ - unsigned long val; - int len, rc; - char buf[3]; - - if (!*count || (*ppos && !write)) { - *count = 0; - return 0; - } - if (!write) { - len = snprintf(buf, sizeof(buf), "%d\n", callhome_enabled); - rc = copy_to_user(buffer, buf, sizeof(buf)); - if (rc != 0) - return -EFAULT; - } else { - len = *count; - rc = kstrtoul_from_user(buffer, len, 0, &val); - if (rc) - return rc; - if (val != 0 && val != 1) - return -EINVAL; - callhome_enabled = val; - } - *count = len; - *ppos += len; - return 0; -} +static int zero; +static int one = 1; static struct ctl_table callhome_table[] = { { .procname = "callhome", + .data = &callhome_enabled, + .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_handler_callhome, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, }, {} }; diff --git a/drivers/s390/char/tape_3590.c b/drivers/s390/char/tape_3590.c index 37e65a05517f50..cdcde18e72203e 100644 --- a/drivers/s390/char/tape_3590.c +++ b/drivers/s390/char/tape_3590.c @@ -113,16 +113,16 @@ static int crypt_enabled(struct tape_device *device) static void ext_to_int_kekl(struct tape390_kekl *in, struct tape3592_kekl *out) { - int i; + int len; memset(out, 0, sizeof(*out)); if (in->type == TAPE390_KEKL_TYPE_HASH) out->flags |= 0x40; if (in->type_on_tape == TAPE390_KEKL_TYPE_HASH) out->flags |= 0x80; - strncpy(out->label, in->label, 64); - for (i = strlen(in->label); i < sizeof(out->label); i++) - out->label[i] = ' '; + len = min(sizeof(out->label), strlen(in->label)); + memcpy(out->label, in->label, len); + memset(out->label + len, ' ', sizeof(out->label) - len); ASCEBC(out->label, sizeof(out->label)); } diff --git a/drivers/s390/char/tape_class.c b/drivers/s390/char/tape_class.c index a07102472ce97e..b58df0dd0039c0 100644 --- a/drivers/s390/char/tape_class.c +++ b/drivers/s390/char/tape_class.c @@ -54,10 +54,10 @@ struct tape_class_device *register_tape_dev( if (!tcd) return ERR_PTR(-ENOMEM); - strncpy(tcd->device_name, device_name, TAPECLASS_NAME_LEN); + strlcpy(tcd->device_name, device_name, TAPECLASS_NAME_LEN); for (s = strchr(tcd->device_name, '/'); s; s = strchr(s, '/')) *s = '!'; - strncpy(tcd->mode_name, mode_name, TAPECLASS_NAME_LEN); + strlcpy(tcd->mode_name, mode_name, TAPECLASS_NAME_LEN); for (s = strchr(tcd->mode_name, '/'); s; s = strchr(s, '/')) *s = '!'; @@ -77,7 +77,7 @@ struct tape_class_device *register_tape_dev( tcd->class_device = device_create(tape_class, device, tcd->char_device->dev, NULL, "%s", tcd->device_name); - rc = PTR_RET(tcd->class_device); + rc = PTR_ERR_OR_ZERO(tcd->class_device); if (rc) goto fail_with_cdev; rc = sysfs_create_link( diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c index afbdee74147dd0..51038ec309c12e 100644 --- a/drivers/s390/cio/chp.c +++ b/drivers/s390/cio/chp.c @@ -471,14 +471,17 @@ int chp_new(struct chp_id chpid) { struct channel_subsystem *css = css_by_id(chpid.cssid); struct channel_path *chp; - int ret; + int ret = 0; + mutex_lock(&css->mutex); if (chp_is_registered(chpid)) - return 0; - chp = kzalloc(sizeof(struct channel_path), GFP_KERNEL); - if (!chp) - return -ENOMEM; + goto out; + chp = kzalloc(sizeof(struct channel_path), GFP_KERNEL); + if (!chp) { + ret = -ENOMEM; + goto out; + } /* fill in status, etc. */ chp->chpid = chpid; chp->state = 1; @@ -505,21 +508,20 @@ int chp_new(struct chp_id chpid) put_device(&chp->dev); goto out; } - mutex_lock(&css->mutex); + if (css->cm_enabled) { ret = chp_add_cmg_attr(chp); if (ret) { device_unregister(&chp->dev); - mutex_unlock(&css->mutex); goto out; } } css->chps[chpid.id] = chp; - mutex_unlock(&css->mutex); goto out; out_free: kfree(chp); out: + mutex_unlock(&css->mutex); return ret; } @@ -585,8 +587,7 @@ static void chp_process_crw(struct crw *crw0, struct crw *crw1, switch (crw0->erc) { case CRW_ERC_IPARM: /* Path has come. */ case CRW_ERC_INIT: - if (!chp_is_registered(chpid)) - chp_new(chpid); + chp_new(chpid); chsc_chp_online(chpid); break; case CRW_ERC_PERRI: /* Path has gone. */ diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index 9029804dcd225b..a0baee25134c0c 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -91,7 +91,7 @@ struct chsc_ssd_area { u16 sch; /* subchannel */ u8 chpid[8]; /* chpids 0-7 */ u16 fla[8]; /* full link addresses 0-7 */ -} __attribute__ ((packed)); +} __packed __aligned(PAGE_SIZE); int chsc_get_ssd_info(struct subchannel_id schid, struct chsc_ssd_info *ssd) { @@ -319,7 +319,7 @@ struct chsc_sei { struct chsc_sei_nt2_area nt2_area; u8 nt_area[PAGE_SIZE - 24]; } u; -} __packed; +} __packed __aligned(PAGE_SIZE); /* * Node Descriptor as defined in SA22-7204, "Common I/O-Device Commands" @@ -841,7 +841,7 @@ int __chsc_do_secm(struct channel_subsystem *css, int enable) u32 : 4; u32 fmt : 4; u32 : 16; - } __attribute__ ((packed)) *secm_area; + } *secm_area; unsigned long flags; int ret, ccode; @@ -1014,7 +1014,7 @@ int chsc_get_channel_measurement_chars(struct channel_path *chp) u32 cmg : 8; u32 zeroes3; u32 data[NR_MEASUREMENT_CHARS]; - } __attribute__ ((packed)) *scmc_area; + } *scmc_area; chp->shared = -1; chp->cmg = -1; @@ -1142,7 +1142,7 @@ int __init chsc_get_cssid(int idx) u8 cssid; u32 : 24; } list[0]; - } __packed *sdcal_area; + } *sdcal_area; int ret; spin_lock_irq(&chsc_page_lock); @@ -1192,7 +1192,7 @@ chsc_determine_css_characteristics(void) u32 reserved4; u32 general_char[510]; u32 chsc_char[508]; - } __attribute__ ((packed)) *scsc_area; + } *scsc_area; spin_lock_irqsave(&chsc_page_lock, flags); memset(chsc_page, 0, PAGE_SIZE); @@ -1236,7 +1236,7 @@ int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta) unsigned int rsvd3[3]; u64 clock_delta; unsigned int rsvd4[2]; - } __attribute__ ((packed)) *rr; + } *rr; int rc; memset(page, 0, PAGE_SIZE); @@ -1261,7 +1261,7 @@ int chsc_sstpi(void *page, void *result, size_t size) unsigned int rsvd0[3]; struct chsc_header response; char data[]; - } __attribute__ ((packed)) *rr; + } *rr; int rc; memset(page, 0, PAGE_SIZE); @@ -1284,7 +1284,7 @@ int chsc_siosl(struct subchannel_id schid) u32 word3; struct chsc_header response; u32 word[11]; - } __attribute__ ((packed)) *siosl_area; + } *siosl_area; unsigned long flags; int ccode; int rc; diff --git a/drivers/s390/cio/chsc.h b/drivers/s390/cio/chsc.h index 5c9f0dd33f4ee3..78aba8d94eec33 100644 --- a/drivers/s390/cio/chsc.h +++ b/drivers/s390/cio/chsc.h @@ -15,12 +15,12 @@ #define NR_MEASUREMENT_CHARS 5 struct cmg_chars { u32 values[NR_MEASUREMENT_CHARS]; -} __attribute__ ((packed)); +}; #define NR_MEASUREMENT_ENTRIES 8 struct cmg_entry { u32 values[NR_MEASUREMENT_ENTRIES]; -} __attribute__ ((packed)); +}; struct channel_path_desc_fmt1 { u8 flags; @@ -38,7 +38,7 @@ struct channel_path_desc_fmt1 { u8 s:1; u8 f:1; u32 zeros[2]; -} __attribute__ ((packed)); +}; struct channel_path_desc_fmt3 { struct channel_path_desc_fmt1 fmt1_desc; @@ -59,7 +59,7 @@ struct css_chsc_char { u32:7; u32 pnso:1; /* bit 116 */ u32:11; -}__attribute__((packed)); +} __packed; extern struct css_chsc_char css_chsc_characteristics; @@ -82,7 +82,7 @@ struct chsc_ssqd_area { struct chsc_header response; u32:32; struct qdio_ssqd_desc qdio_ssqd; -} __packed; +} __packed __aligned(PAGE_SIZE); struct chsc_scssc_area { struct chsc_header request; @@ -102,7 +102,7 @@ struct chsc_scssc_area { u32 reserved[1004]; struct chsc_header response; u32:32; -} __packed; +} __packed __aligned(PAGE_SIZE); struct chsc_scpd { struct chsc_header request; @@ -120,7 +120,7 @@ struct chsc_scpd { struct chsc_header response; u32:32; u8 data[0]; -} __packed; +} __packed __aligned(PAGE_SIZE); struct chsc_sda_area { struct chsc_header request; @@ -199,7 +199,7 @@ struct chsc_scm_info { u32 reserved2[10]; u64 restok; struct sale scmal[248]; -} __packed; +} __packed __aligned(PAGE_SIZE); int chsc_scm_info(struct chsc_scm_info *scm_area, u64 token); @@ -243,7 +243,7 @@ struct chsc_pnso_area { struct qdio_brinfo_entry_l3_ipv4 l3_ipv4[0]; struct qdio_brinfo_entry_l2 l2[0]; } entries; -} __packed; +} __packed __aligned(PAGE_SIZE); int chsc_pnso_brinfo(struct subchannel_id schid, struct chsc_pnso_area *brinfo_area, diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 5130d7c67239b3..de744ca158fdf7 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -526,76 +526,6 @@ int cio_disable_subchannel(struct subchannel *sch) } EXPORT_SYMBOL_GPL(cio_disable_subchannel); -static int cio_check_devno_blacklisted(struct subchannel *sch) -{ - if (is_blacklisted(sch->schid.ssid, sch->schib.pmcw.dev)) { - /* - * This device must not be known to Linux. So we simply - * say that there is no device and return ENODEV. - */ - CIO_MSG_EVENT(6, "Blacklisted device detected " - "at devno %04X, subchannel set %x\n", - sch->schib.pmcw.dev, sch->schid.ssid); - return -ENODEV; - } - return 0; -} - -/** - * cio_validate_subchannel - basic validation of subchannel - * @sch: subchannel structure to be filled out - * @schid: subchannel id - * - * Find out subchannel type and initialize struct subchannel. - * Return codes: - * 0 on success - * -ENXIO for non-defined subchannels - * -ENODEV for invalid subchannels or blacklisted devices - * -EIO for subchannels in an invalid subchannel set - */ -int cio_validate_subchannel(struct subchannel *sch, struct subchannel_id schid) -{ - char dbf_txt[15]; - int ccode; - int err; - - sprintf(dbf_txt, "valsch%x", schid.sch_no); - CIO_TRACE_EVENT(4, dbf_txt); - - /* - * The first subchannel that is not-operational (ccode==3) - * indicates that there aren't any more devices available. - * If stsch gets an exception, it means the current subchannel set - * is not valid. - */ - ccode = stsch(schid, &sch->schib); - if (ccode) { - err = (ccode == 3) ? -ENXIO : ccode; - goto out; - } - sch->st = sch->schib.pmcw.st; - sch->schid = schid; - - switch (sch->st) { - case SUBCHANNEL_TYPE_IO: - case SUBCHANNEL_TYPE_MSG: - if (!css_sch_is_valid(&sch->schib)) - err = -ENODEV; - else - err = cio_check_devno_blacklisted(sch); - break; - default: - err = 0; - } - if (err) - goto out; - - CIO_MSG_EVENT(4, "Subchannel 0.%x.%04x reports subchannel type %04X\n", - sch->schid.ssid, sch->schid.sch_no, sch->st); -out: - return err; -} - /* * do_cio_interrupt() handles all normal I/O device IRQ's */ @@ -719,6 +649,7 @@ struct subchannel *cio_probe_console(void) { struct subchannel_id schid; struct subchannel *sch; + struct schib schib; int sch_no, ret; sch_no = cio_get_console_sch_no(); @@ -728,7 +659,11 @@ struct subchannel *cio_probe_console(void) } init_subchannel_id(&schid); schid.sch_no = sch_no; - sch = css_alloc_subchannel(schid); + ret = stsch(schid, &schib); + if (ret) + return ERR_PTR(-ENODEV); + + sch = css_alloc_subchannel(schid, &schib); if (IS_ERR(sch)) return sch; diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h index 94cd813bdcfef8..9811fd8a0c7310 100644 --- a/drivers/s390/cio/cio.h +++ b/drivers/s390/cio/cio.h @@ -119,7 +119,6 @@ DECLARE_PER_CPU(struct irb, cio_irb); #define to_subchannel(n) container_of(n, struct subchannel, dev) -extern int cio_validate_subchannel (struct subchannel *, struct subchannel_id); extern int cio_enable_subchannel(struct subchannel *, u32); extern int cio_disable_subchannel (struct subchannel *); extern int cio_cancel (struct subchannel *); diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index 9263a0fb385845..aea50292264629 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -25,6 +25,7 @@ #include "css.h" #include "cio.h" +#include "blacklist.h" #include "cio_debug.h" #include "ioasm.h" #include "chsc.h" @@ -168,18 +169,53 @@ static void css_subchannel_release(struct device *dev) kfree(sch); } -struct subchannel *css_alloc_subchannel(struct subchannel_id schid) +static int css_validate_subchannel(struct subchannel_id schid, + struct schib *schib) +{ + int err; + + switch (schib->pmcw.st) { + case SUBCHANNEL_TYPE_IO: + case SUBCHANNEL_TYPE_MSG: + if (!css_sch_is_valid(schib)) + err = -ENODEV; + else if (is_blacklisted(schid.ssid, schib->pmcw.dev)) { + CIO_MSG_EVENT(6, "Blacklisted device detected " + "at devno %04X, subchannel set %x\n", + schib->pmcw.dev, schid.ssid); + err = -ENODEV; + } else + err = 0; + break; + default: + err = 0; + } + if (err) + goto out; + + CIO_MSG_EVENT(4, "Subchannel 0.%x.%04x reports subchannel type %04X\n", + schid.ssid, schid.sch_no, schib->pmcw.st); +out: + return err; +} + +struct subchannel *css_alloc_subchannel(struct subchannel_id schid, + struct schib *schib) { struct subchannel *sch; int ret; + ret = css_validate_subchannel(schid, schib); + if (ret < 0) + return ERR_PTR(ret); + sch = kzalloc(sizeof(*sch), GFP_KERNEL | GFP_DMA); if (!sch) return ERR_PTR(-ENOMEM); - ret = cio_validate_subchannel(sch, schid); - if (ret < 0) - goto err; + sch->schid = schid; + sch->schib = *schib; + sch->st = schib->pmcw.st; ret = css_sch_create_locks(sch); if (ret) @@ -244,8 +280,7 @@ static void ssd_register_chpids(struct chsc_ssd_info *ssd) for (i = 0; i < 8; i++) { mask = 0x80 >> i; if (ssd->path_mask & mask) - if (!chp_is_registered(ssd->chpid[i])) - chp_new(ssd->chpid[i]); + chp_new(ssd->chpid[i]); } } @@ -382,12 +417,12 @@ int css_register_subchannel(struct subchannel *sch) return ret; } -static int css_probe_device(struct subchannel_id schid) +static int css_probe_device(struct subchannel_id schid, struct schib *schib) { struct subchannel *sch; int ret; - sch = css_alloc_subchannel(schid); + sch = css_alloc_subchannel(schid, schib); if (IS_ERR(sch)) return PTR_ERR(sch); @@ -436,23 +471,23 @@ EXPORT_SYMBOL_GPL(css_sch_is_valid); static int css_evaluate_new_subchannel(struct subchannel_id schid, int slow) { struct schib schib; + int ccode; if (!slow) { /* Will be done on the slow path. */ return -EAGAIN; } - if (stsch(schid, &schib)) { - /* Subchannel is not provided. */ - return -ENXIO; - } - if (!css_sch_is_valid(&schib)) { - /* Unusable - ignore. */ - return 0; - } - CIO_MSG_EVENT(4, "event: sch 0.%x.%04x, new\n", schid.ssid, - schid.sch_no); + /* + * The first subchannel that is not-operational (ccode==3) + * indicates that there aren't any more devices available. + * If stsch gets an exception, it means the current subchannel set + * is not valid. + */ + ccode = stsch(schid, &schib); + if (ccode) + return (ccode == 3) ? -ENXIO : ccode; - return css_probe_device(schid); + return css_probe_device(schid, &schib); } static int css_evaluate_known_subchannel(struct subchannel *sch, int slow) @@ -1081,6 +1116,11 @@ static int __init channel_subsystem_init(void) if (ret) goto out_wq; + /* Register subchannels which are already in use. */ + cio_register_early_subchannels(); + /* Start initial subchannel evaluation. */ + css_schedule_eval_all(); + return ret; out_wq: destroy_workqueue(cio_work_q); @@ -1120,10 +1160,6 @@ int css_complete_work(void) */ static int __init channel_subsystem_init_sync(void) { - /* Register subchannels which are already in use. */ - cio_register_early_subchannels(); - /* Start initial subchannel evaluation. */ - css_schedule_eval_all(); css_complete_work(); return 0; } diff --git a/drivers/s390/cio/css.h b/drivers/s390/cio/css.h index 30357cbf350afb..8d832900a63dd4 100644 --- a/drivers/s390/cio/css.h +++ b/drivers/s390/cio/css.h @@ -103,7 +103,8 @@ extern void css_driver_unregister(struct css_driver *); extern void css_sch_device_unregister(struct subchannel *); extern int css_register_subchannel(struct subchannel *); -extern struct subchannel *css_alloc_subchannel(struct subchannel_id); +extern struct subchannel *css_alloc_subchannel(struct subchannel_id, + struct schib *schib); extern struct subchannel *get_subchannel_by_schid(struct subchannel_id); extern int css_init_done; extern int max_ssid; diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index f4ca72dd862ff3..9c7d9da42ba082 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -631,21 +631,20 @@ static inline unsigned long qdio_aob_for_buffer(struct qdio_output_q *q, unsigned long phys_aob = 0; if (!q->use_cq) - goto out; + return 0; if (!q->aobs[bufnr]) { struct qaob *aob = qdio_allocate_aob(); q->aobs[bufnr] = aob; } if (q->aobs[bufnr]) { - q->sbal_state[bufnr].flags = QDIO_OUTBUF_STATE_FLAG_NONE; q->sbal_state[bufnr].aob = q->aobs[bufnr]; q->aobs[bufnr]->user1 = (u64) q->sbal_state[bufnr].user; phys_aob = virt_to_phys(q->aobs[bufnr]); WARN_ON_ONCE(phys_aob & 0xFF); } -out: + q->sbal_state[bufnr].flags = 0; return phys_aob; } diff --git a/drivers/s390/cio/trace.h b/drivers/s390/cio/trace.h index 1f8d1c1e566de7..0ebb29b6fd6df2 100644 --- a/drivers/s390/cio/trace.h +++ b/drivers/s390/cio/trace.h @@ -30,6 +30,17 @@ DECLARE_EVENT_CLASS(s390_class_schib, __field(u16, schno) __field(u16, devno) __field_struct(struct schib, schib) + __field(u8, pmcw_ena) + __field(u8, pmcw_st) + __field(u8, pmcw_dnv) + __field(u16, pmcw_dev) + __field(u8, pmcw_lpm) + __field(u8, pmcw_pnom) + __field(u8, pmcw_lpum) + __field(u8, pmcw_pim) + __field(u8, pmcw_pam) + __field(u8, pmcw_pom) + __field(u64, pmcw_chpid) __field(int, cc) ), TP_fast_assign( @@ -38,18 +49,29 @@ DECLARE_EVENT_CLASS(s390_class_schib, __entry->schno = schid.sch_no; __entry->devno = schib->pmcw.dev; __entry->schib = *schib; + __entry->pmcw_ena = schib->pmcw.ena; + __entry->pmcw_st = schib->pmcw.ena; + __entry->pmcw_dnv = schib->pmcw.dnv; + __entry->pmcw_dev = schib->pmcw.dev; + __entry->pmcw_lpm = schib->pmcw.lpm; + __entry->pmcw_pnom = schib->pmcw.pnom; + __entry->pmcw_lpum = schib->pmcw.lpum; + __entry->pmcw_pim = schib->pmcw.pim; + __entry->pmcw_pam = schib->pmcw.pam; + __entry->pmcw_pom = schib->pmcw.pom; + memcpy(&__entry->pmcw_chpid, &schib->pmcw.chpid, 8); __entry->cc = cc; ), TP_printk("schid=%x.%x.%04x cc=%d ena=%d st=%d dnv=%d dev=%04x " "lpm=0x%02x pnom=0x%02x lpum=0x%02x pim=0x%02x pam=0x%02x " "pom=0x%02x chpids=%016llx", __entry->cssid, __entry->ssid, __entry->schno, __entry->cc, - __entry->schib.pmcw.ena, __entry->schib.pmcw.st, - __entry->schib.pmcw.dnv, __entry->schib.pmcw.dev, - __entry->schib.pmcw.lpm, __entry->schib.pmcw.pnom, - __entry->schib.pmcw.lpum, __entry->schib.pmcw.pim, - __entry->schib.pmcw.pam, __entry->schib.pmcw.pom, - *((u64 *) __entry->schib.pmcw.chpid) + __entry->pmcw_ena, __entry->pmcw_st, + __entry->pmcw_dnv, __entry->pmcw_dev, + __entry->pmcw_lpm, __entry->pmcw_pnom, + __entry->pmcw_lpum, __entry->pmcw_pim, + __entry->pmcw_pam, __entry->pmcw_pom, + __entry->pmcw_chpid ) ); @@ -89,6 +111,13 @@ TRACE_EVENT(s390_cio_tsch, __field(u8, ssid) __field(u16, schno) __field_struct(struct irb, irb) + __field(u8, scsw_dcc) + __field(u8, scsw_pno) + __field(u8, scsw_fctl) + __field(u8, scsw_actl) + __field(u8, scsw_stctl) + __field(u8, scsw_dstat) + __field(u8, scsw_cstat) __field(int, cc) ), TP_fast_assign( @@ -96,15 +125,22 @@ TRACE_EVENT(s390_cio_tsch, __entry->ssid = schid.ssid; __entry->schno = schid.sch_no; __entry->irb = *irb; + __entry->scsw_dcc = scsw_cc(&irb->scsw); + __entry->scsw_pno = scsw_pno(&irb->scsw); + __entry->scsw_fctl = scsw_fctl(&irb->scsw); + __entry->scsw_actl = scsw_actl(&irb->scsw); + __entry->scsw_stctl = scsw_stctl(&irb->scsw); + __entry->scsw_dstat = scsw_dstat(&irb->scsw); + __entry->scsw_cstat = scsw_cstat(&irb->scsw); __entry->cc = cc; ), TP_printk("schid=%x.%x.%04x cc=%d dcc=%d pno=%d fctl=0x%x actl=0x%x " "stctl=0x%x dstat=0x%x cstat=0x%x", __entry->cssid, __entry->ssid, __entry->schno, __entry->cc, - scsw_cc(&__entry->irb.scsw), scsw_pno(&__entry->irb.scsw), - scsw_fctl(&__entry->irb.scsw), scsw_actl(&__entry->irb.scsw), - scsw_stctl(&__entry->irb.scsw), - scsw_dstat(&__entry->irb.scsw), scsw_cstat(&__entry->irb.scsw) + __entry->scsw_dcc, __entry->scsw_pno, + __entry->scsw_fctl, __entry->scsw_actl, + __entry->scsw_stctl, + __entry->scsw_dstat, __entry->scsw_cstat ) ); @@ -122,6 +158,9 @@ TRACE_EVENT(s390_cio_tpi, __field(u8, cssid) __field(u8, ssid) __field(u16, schno) + __field(u8, adapter_IO) + __field(u8, isc) + __field(u8, type) ), TP_fast_assign( __entry->cc = cc; @@ -136,11 +175,14 @@ TRACE_EVENT(s390_cio_tpi, __entry->cssid = __entry->tpi_info.schid.cssid; __entry->ssid = __entry->tpi_info.schid.ssid; __entry->schno = __entry->tpi_info.schid.sch_no; + __entry->adapter_IO = __entry->tpi_info.adapter_IO; + __entry->isc = __entry->tpi_info.isc; + __entry->type = __entry->tpi_info.type; ), TP_printk("schid=%x.%x.%04x cc=%d a=%d isc=%d type=%d", __entry->cssid, __entry->ssid, __entry->schno, __entry->cc, - __entry->tpi_info.adapter_IO, __entry->tpi_info.isc, - __entry->tpi_info.type + __entry->adapter_IO, __entry->isc, + __entry->type ) ); @@ -299,16 +341,20 @@ TRACE_EVENT(s390_cio_interrupt, __field(u8, cssid) __field(u8, ssid) __field(u16, schno) + __field(u8, isc) + __field(u8, type) ), TP_fast_assign( __entry->tpi_info = *tpi_info; - __entry->cssid = __entry->tpi_info.schid.cssid; - __entry->ssid = __entry->tpi_info.schid.ssid; - __entry->schno = __entry->tpi_info.schid.sch_no; + __entry->cssid = tpi_info->schid.cssid; + __entry->ssid = tpi_info->schid.ssid; + __entry->schno = tpi_info->schid.sch_no; + __entry->isc = tpi_info->isc; + __entry->type = tpi_info->type; ), TP_printk("schid=%x.%x.%04x isc=%d type=%d", __entry->cssid, __entry->ssid, __entry->schno, - __entry->tpi_info.isc, __entry->tpi_info.type + __entry->isc, __entry->type ) ); @@ -321,11 +367,13 @@ TRACE_EVENT(s390_cio_adapter_int, TP_ARGS(tpi_info), TP_STRUCT__entry( __field_struct(struct tpi_info, tpi_info) + __field(u8, isc) ), TP_fast_assign( __entry->tpi_info = *tpi_info; + __entry->isc = tpi_info->isc; ), - TP_printk("isc=%d", __entry->tpi_info.isc) + TP_printk("isc=%d", __entry->isc) ); /** @@ -339,16 +387,30 @@ TRACE_EVENT(s390_cio_stcrw, TP_STRUCT__entry( __field_struct(struct crw, crw) __field(int, cc) + __field(u8, slct) + __field(u8, oflw) + __field(u8, chn) + __field(u8, rsc) + __field(u8, anc) + __field(u8, erc) + __field(u16, rsid) ), TP_fast_assign( __entry->crw = *crw; __entry->cc = cc; + __entry->slct = crw->slct; + __entry->oflw = crw->oflw; + __entry->chn = crw->chn; + __entry->rsc = crw->rsc; + __entry->anc = crw->anc; + __entry->erc = crw->erc; + __entry->rsid = crw->rsid; ), TP_printk("cc=%d slct=%d oflw=%d chn=%d rsc=%d anc=%d erc=0x%x " "rsid=0x%x", - __entry->cc, __entry->crw.slct, __entry->crw.oflw, - __entry->crw.chn, __entry->crw.rsc, __entry->crw.anc, - __entry->crw.erc, __entry->crw.rsid + __entry->cc, __entry->slct, __entry->oflw, + __entry->chn, __entry->rsc, __entry->anc, + __entry->erc, __entry->rsid ) ); diff --git a/drivers/s390/crypto/ap_asm.h b/drivers/s390/crypto/ap_asm.h deleted file mode 100644 index 16b59ce5e01d9f..00000000000000 --- a/drivers/s390/crypto/ap_asm.h +++ /dev/null @@ -1,236 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright IBM Corp. 2016 - * Author(s): Martin Schwidefsky - * - * Adjunct processor bus inline assemblies. - */ - -#ifndef _AP_ASM_H_ -#define _AP_ASM_H_ - -#include - -/** - * ap_intructions_available() - Test if AP instructions are available. - * - * Returns 0 if the AP instructions are installed. - */ -static inline int ap_instructions_available(void) -{ - register unsigned long reg0 asm ("0") = AP_MKQID(0, 0); - register unsigned long reg1 asm ("1") = -ENODEV; - register unsigned long reg2 asm ("2") = 0UL; - - asm volatile( - " .long 0xb2af0000\n" /* PQAP(TAPQ) */ - "0: la %1,0\n" - "1:\n" - EX_TABLE(0b, 1b) - : "+d" (reg0), "+d" (reg1), "+d" (reg2) : : "cc"); - return reg1; -} - -/** - * ap_tapq(): Test adjunct processor queue. - * @qid: The AP queue number - * @info: Pointer to queue descriptor - * - * Returns AP queue status structure. - */ -static inline struct ap_queue_status ap_tapq(ap_qid_t qid, unsigned long *info) -{ - register unsigned long reg0 asm ("0") = qid; - register struct ap_queue_status reg1 asm ("1"); - register unsigned long reg2 asm ("2") = 0UL; - - asm volatile(".long 0xb2af0000" /* PQAP(TAPQ) */ - : "+d" (reg0), "=d" (reg1), "+d" (reg2) : : "cc"); - if (info) - *info = reg2; - return reg1; -} - -/** - * ap_pqap_rapq(): Reset adjunct processor queue. - * @qid: The AP queue number - * - * Returns AP queue status structure. - */ -static inline struct ap_queue_status ap_rapq(ap_qid_t qid) -{ - register unsigned long reg0 asm ("0") = qid | 0x01000000UL; - register struct ap_queue_status reg1 asm ("1"); - register unsigned long reg2 asm ("2") = 0UL; - - asm volatile( - ".long 0xb2af0000" /* PQAP(RAPQ) */ - : "+d" (reg0), "=d" (reg1), "+d" (reg2) : : "cc"); - return reg1; -} - -/** - * ap_aqic(): Control interruption for a specific AP. - * @qid: The AP queue number - * @qirqctrl: struct ap_qirq_ctrl (64 bit value) - * @ind: The notification indicator byte - * - * Returns AP queue status. - */ -static inline struct ap_queue_status ap_aqic(ap_qid_t qid, - struct ap_qirq_ctrl qirqctrl, - void *ind) -{ - register unsigned long reg0 asm ("0") = qid | (3UL << 24); - register struct ap_qirq_ctrl reg1_in asm ("1") = qirqctrl; - register struct ap_queue_status reg1_out asm ("1"); - register void *reg2 asm ("2") = ind; - - asm volatile( - ".long 0xb2af0000" /* PQAP(AQIC) */ - : "+d" (reg0), "+d" (reg1_in), "=d" (reg1_out), "+d" (reg2) - : - : "cc"); - return reg1_out; -} - -/** - * ap_qci(): Get AP configuration data - * - * Returns 0 on success, or -EOPNOTSUPP. - */ -static inline int ap_qci(void *config) -{ - register unsigned long reg0 asm ("0") = 0x04000000UL; - register unsigned long reg1 asm ("1") = -EINVAL; - register void *reg2 asm ("2") = (void *) config; - - asm volatile( - ".long 0xb2af0000\n" /* PQAP(QCI) */ - "0: la %1,0\n" - "1:\n" - EX_TABLE(0b, 1b) - : "+d" (reg0), "+d" (reg1), "+d" (reg2) - : - : "cc", "memory"); - - return reg1; -} - -/* - * union ap_qact_ap_info - used together with the - * ap_aqic() function to provide a convenient way - * to handle the ap info needed by the qact function. - */ -union ap_qact_ap_info { - unsigned long val; - struct { - unsigned int : 3; - unsigned int mode : 3; - unsigned int : 26; - unsigned int cat : 8; - unsigned int : 8; - unsigned char ver[2]; - }; -}; - -/** - * ap_qact(): Query AP combatibility type. - * @qid: The AP queue number - * @apinfo: On input the info about the AP queue. On output the - * alternate AP queue info provided by the qact function - * in GR2 is stored in. - * - * Returns AP queue status. Check response_code field for failures. - */ -static inline struct ap_queue_status ap_qact(ap_qid_t qid, int ifbit, - union ap_qact_ap_info *apinfo) -{ - register unsigned long reg0 asm ("0") = qid | (5UL << 24) - | ((ifbit & 0x01) << 22); - register unsigned long reg1_in asm ("1") = apinfo->val; - register struct ap_queue_status reg1_out asm ("1"); - register unsigned long reg2 asm ("2") = 0; - - asm volatile( - ".long 0xb2af0000" /* PQAP(QACT) */ - : "+d" (reg0), "+d" (reg1_in), "=d" (reg1_out), "+d" (reg2) - : : "cc"); - apinfo->val = reg2; - return reg1_out; -} - -/** - * ap_nqap(): Send message to adjunct processor queue. - * @qid: The AP queue number - * @psmid: The program supplied message identifier - * @msg: The message text - * @length: The message length - * - * Returns AP queue status structure. - * Condition code 1 on NQAP can't happen because the L bit is 1. - * Condition code 2 on NQAP also means the send is incomplete, - * because a segment boundary was reached. The NQAP is repeated. - */ -static inline struct ap_queue_status ap_nqap(ap_qid_t qid, - unsigned long long psmid, - void *msg, size_t length) -{ - register unsigned long reg0 asm ("0") = qid | 0x40000000UL; - register struct ap_queue_status reg1 asm ("1"); - register unsigned long reg2 asm ("2") = (unsigned long) msg; - register unsigned long reg3 asm ("3") = (unsigned long) length; - register unsigned long reg4 asm ("4") = (unsigned int) (psmid >> 32); - register unsigned long reg5 asm ("5") = psmid & 0xffffffff; - - asm volatile ( - "0: .long 0xb2ad0042\n" /* NQAP */ - " brc 2,0b" - : "+d" (reg0), "=d" (reg1), "+d" (reg2), "+d" (reg3) - : "d" (reg4), "d" (reg5) - : "cc", "memory"); - return reg1; -} - -/** - * ap_dqap(): Receive message from adjunct processor queue. - * @qid: The AP queue number - * @psmid: Pointer to program supplied message identifier - * @msg: The message text - * @length: The message length - * - * Returns AP queue status structure. - * Condition code 1 on DQAP means the receive has taken place - * but only partially. The response is incomplete, hence the - * DQAP is repeated. - * Condition code 2 on DQAP also means the receive is incomplete, - * this time because a segment boundary was reached. Again, the - * DQAP is repeated. - * Note that gpr2 is used by the DQAP instruction to keep track of - * any 'residual' length, in case the instruction gets interrupted. - * Hence it gets zeroed before the instruction. - */ -static inline struct ap_queue_status ap_dqap(ap_qid_t qid, - unsigned long long *psmid, - void *msg, size_t length) -{ - register unsigned long reg0 asm("0") = qid | 0x80000000UL; - register struct ap_queue_status reg1 asm ("1"); - register unsigned long reg2 asm("2") = 0UL; - register unsigned long reg4 asm("4") = (unsigned long) msg; - register unsigned long reg5 asm("5") = (unsigned long) length; - register unsigned long reg6 asm("6") = 0UL; - register unsigned long reg7 asm("7") = 0UL; - - - asm volatile( - "0: .long 0xb2ae0064\n" /* DQAP */ - " brc 6,0b\n" - : "+d" (reg0), "=d" (reg1), "+d" (reg2), - "+d" (reg4), "+d" (reg5), "+d" (reg6), "+d" (reg7) - : : "cc", "memory"); - *psmid = (((unsigned long long) reg6) << 32) + reg7; - return reg1; -} - -#endif /* _AP_ASM_H_ */ diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 35a0c2b52f8237..bf27fc4d133591 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -36,7 +36,6 @@ #include #include "ap_bus.h" -#include "ap_asm.h" #include "ap_debug.h" /* @@ -174,24 +173,6 @@ static inline int ap_qact_available(void) return 0; } -/** - * ap_test_queue(): Test adjunct processor queue. - * @qid: The AP queue number - * @tbit: Test facilities bit - * @info: Pointer to queue descriptor - * - * Returns AP queue status structure. - */ -struct ap_queue_status ap_test_queue(ap_qid_t qid, - int tbit, - unsigned long *info) -{ - if (tbit) - qid |= 1UL << 23; /* set T bit*/ - return ap_tapq(qid, info); -} -EXPORT_SYMBOL(ap_test_queue); - /* * ap_query_configuration(): Fetch cryptographic config info * @@ -200,7 +181,7 @@ EXPORT_SYMBOL(ap_test_queue); * is returned, e.g. if the PQAP(QCI) instruction is not * available, the return value will be -EOPNOTSUPP. */ -int ap_query_configuration(struct ap_config_info *info) +static inline int ap_query_configuration(struct ap_config_info *info) { if (!ap_configuration_available()) return -EOPNOTSUPP; @@ -493,7 +474,7 @@ static int ap_poll_thread_start(void) return 0; mutex_lock(&ap_poll_thread_mutex); ap_poll_kthread = kthread_run(ap_poll_thread, NULL, "appoll"); - rc = PTR_RET(ap_poll_kthread); + rc = PTR_ERR_OR_ZERO(ap_poll_kthread); if (rc) ap_poll_kthread = NULL; mutex_unlock(&ap_poll_thread_mutex); @@ -1261,7 +1242,7 @@ static int __init ap_module_init(void) /* Create /sys/devices/ap. */ ap_root_device = root_device_register("ap"); - rc = PTR_RET(ap_root_device); + rc = PTR_ERR_OR_ZERO(ap_root_device); if (rc) goto out_bus; diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 6a273c5ebca5b0..936541937e15d1 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -15,6 +15,7 @@ #include #include +#include #include #define AP_DEVICES 256 /* Number of AP devices. */ diff --git a/drivers/s390/crypto/ap_card.c b/drivers/s390/crypto/ap_card.c index 2c726df210f664..c13e43292cb740 100644 --- a/drivers/s390/crypto/ap_card.c +++ b/drivers/s390/crypto/ap_card.c @@ -14,7 +14,6 @@ #include #include "ap_bus.h" -#include "ap_asm.h" /* * AP card related attributes. diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index ba3a2e13b0ebe2..e365171fe28f3b 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -14,26 +14,6 @@ #include #include "ap_bus.h" -#include "ap_asm.h" - -/** - * ap_queue_irq_ctrl(): Control interruption on a AP queue. - * @qirqctrl: struct ap_qirq_ctrl (64 bit value) - * @ind: The notification indicator byte - * - * Returns AP queue status. - * - * Control interruption on the given AP queue. - * Just a simple wrapper function for the low level PQAP(AQIC) - * instruction available for other kernel modules. - */ -struct ap_queue_status ap_queue_irq_ctrl(ap_qid_t qid, - struct ap_qirq_ctrl qirqctrl, - void *ind) -{ - return ap_aqic(qid, qirqctrl, ind); -} -EXPORT_SYMBOL(ap_queue_irq_ctrl); /** * ap_queue_enable_interruption(): Enable interruption on an AP queue. diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index 3929c8be8098b4..e663432395c1da 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -699,7 +699,7 @@ static int query_crypto_facility(u16 cardnr, u16 domain, /* fill request cprb param block with FQ request */ preqparm = (struct fqreqparm *) preqcblk->req_parmb; memcpy(preqparm->subfunc_code, "FQ", 2); - strncpy(preqparm->rule_array, keyword, sizeof(preqparm->rule_array)); + memcpy(preqparm->rule_array, keyword, sizeof(preqparm->rule_array)); preqparm->rule_array_len = sizeof(preqparm->rule_array_len) + sizeof(preqparm->rule_array); preqparm->lv1.len = sizeof(preqparm->lv1); diff --git a/drivers/s390/crypto/zcrypt_card.c b/drivers/s390/crypto/zcrypt_card.c index 233e1e695208b9..da2c8dfd4d741e 100644 --- a/drivers/s390/crypto/zcrypt_card.c +++ b/drivers/s390/crypto/zcrypt_card.c @@ -83,9 +83,21 @@ static ssize_t zcrypt_card_online_store(struct device *dev, static DEVICE_ATTR(online, 0644, zcrypt_card_online_show, zcrypt_card_online_store); +static ssize_t zcrypt_card_load_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct zcrypt_card *zc = to_ap_card(dev)->private; + + return snprintf(buf, PAGE_SIZE, "%d\n", atomic_read(&zc->load)); +} + +static DEVICE_ATTR(load, 0444, zcrypt_card_load_show, NULL); + static struct attribute *zcrypt_card_attrs[] = { &dev_attr_type.attr, &dev_attr_online.attr, + &dev_attr_load.attr, NULL, }; diff --git a/drivers/s390/crypto/zcrypt_cca_key.h b/drivers/s390/crypto/zcrypt_cca_key.h index 011d61d8a4ae58..1752622b95f701 100644 --- a/drivers/s390/crypto/zcrypt_cca_key.h +++ b/drivers/s390/crypto/zcrypt_cca_key.h @@ -99,7 +99,7 @@ struct cca_pvt_ext_CRT_sec { * @mex: pointer to user input data * @p: pointer to memory area for the key * - * Returns the size of the key area or -EFAULT + * Returns the size of the key area or negative errno value. */ static inline int zcrypt_type6_mex_key_en(struct ica_rsa_modexpo *mex, void *p) { @@ -118,6 +118,15 @@ static inline int zcrypt_type6_mex_key_en(struct ica_rsa_modexpo *mex, void *p) unsigned char *temp; int i; + /* + * The inputdatalength was a selection criteria in the dispatching + * function zcrypt_rsa_modexpo(). However, do a plausibility check + * here to make sure the following copy_from_user() can't be utilized + * to compromise the system. + */ + if (WARN_ON_ONCE(mex->inputdatalength > 512)) + return -EINVAL; + memset(key, 0, sizeof(*key)); key->pubHdr = static_pub_hdr; @@ -178,6 +187,15 @@ static inline int zcrypt_type6_crt_key(struct ica_rsa_modexpo_crt *crt, void *p) struct cca_public_sec *pub; int short_len, long_len, pad_len, key_len, size; + /* + * The inputdatalength was a selection criteria in the dispatching + * function zcrypt_rsa_crt(). However, do a plausibility check + * here to make sure the following copy_from_user() can't be utilized + * to compromise the system. + */ + if (WARN_ON_ONCE(crt->inputdatalength > 512)) + return -EINVAL; + memset(key, 0, sizeof(*key)); short_len = (crt->inputdatalength + 1) / 2; diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index 97d4bacbc44220..e70ae078c86b85 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -246,7 +246,7 @@ int speed_idx_ep11(int req_type) * @ap_msg: pointer to AP message * @mex: pointer to user input data * - * Returns 0 on success or -EFAULT. + * Returns 0 on success or negative errno value. */ static int ICAMEX_msg_to_type6MEX_msgX(struct zcrypt_queue *zq, struct ap_message *ap_msg, @@ -272,6 +272,14 @@ static int ICAMEX_msg_to_type6MEX_msgX(struct zcrypt_queue *zq, } __packed * msg = ap_msg->message; int size; + /* + * The inputdatalength was a selection criteria in the dispatching + * function zcrypt_rsa_modexpo(). However, make sure the following + * copy_from_user() never exceeds the allocated buffer space. + */ + if (WARN_ON_ONCE(mex->inputdatalength > PAGE_SIZE)) + return -EINVAL; + /* VUD.ciphertext */ msg->length = mex->inputdatalength + 2; if (copy_from_user(msg->text, mex->inputdata, mex->inputdatalength)) @@ -307,7 +315,7 @@ static int ICAMEX_msg_to_type6MEX_msgX(struct zcrypt_queue *zq, * @ap_msg: pointer to AP message * @crt: pointer to user input data * - * Returns 0 on success or -EFAULT. + * Returns 0 on success or negative errno value. */ static int ICACRT_msg_to_type6CRT_msgX(struct zcrypt_queue *zq, struct ap_message *ap_msg, @@ -334,6 +342,14 @@ static int ICACRT_msg_to_type6CRT_msgX(struct zcrypt_queue *zq, } __packed * msg = ap_msg->message; int size; + /* + * The inputdatalength was a selection criteria in the dispatching + * function zcrypt_rsa_crt(). However, make sure the following + * copy_from_user() never exceeds the allocated buffer space. + */ + if (WARN_ON_ONCE(crt->inputdatalength > PAGE_SIZE)) + return -EINVAL; + /* VUD.ciphertext */ msg->length = crt->inputdatalength + 2; if (copy_from_user(msg->text, crt->inputdata, crt->inputdatalength)) diff --git a/drivers/s390/crypto/zcrypt_queue.c b/drivers/s390/crypto/zcrypt_queue.c index 720434e18007e3..91a52f26835307 100644 --- a/drivers/s390/crypto/zcrypt_queue.c +++ b/drivers/s390/crypto/zcrypt_queue.c @@ -75,8 +75,20 @@ static ssize_t zcrypt_queue_online_store(struct device *dev, static DEVICE_ATTR(online, 0644, zcrypt_queue_online_show, zcrypt_queue_online_store); +static ssize_t zcrypt_queue_load_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct zcrypt_queue *zq = to_ap_queue(dev)->private; + + return snprintf(buf, PAGE_SIZE, "%d\n", atomic_read(&zq->load)); +} + +static DEVICE_ATTR(load, 0444, zcrypt_queue_load_show, NULL); + static struct attribute *zcrypt_queue_attrs[] = { &dev_attr_online.attr, + &dev_attr_load.attr, NULL, }; diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c index a3a8c8d9d7171a..94f4d8fe85e0e5 100644 --- a/drivers/s390/scsi/zfcp_aux.c +++ b/drivers/s390/scsi/zfcp_aux.c @@ -101,7 +101,7 @@ static void __init zfcp_init_device_setup(char *devstr) token = strsep(&str, ","); if (!token || strlen(token) >= ZFCP_BUS_ID_SIZE) goto err_out; - strncpy(busid, token, ZFCP_BUS_ID_SIZE); + strlcpy(busid, token, ZFCP_BUS_ID_SIZE); token = strsep(&str, ","); if (!token || kstrtoull(token, 0, (unsigned long long *) &wwpn)) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index b6270a3b38e9f3..b955b986b3413a 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -949,6 +949,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_GET_MSR_FEATURES 153 #define KVM_CAP_HYPERV_EVENTFD 154 #define KVM_CAP_HYPERV_TLBFLUSH 155 +#define KVM_CAP_S390_HPAGE_1M 156 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/init/Kconfig b/init/Kconfig index 041f3a022122d5..8b1ab81ecda8f8 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -125,10 +125,13 @@ config HAVE_KERNEL_LZO config HAVE_KERNEL_LZ4 bool +config HAVE_KERNEL_UNCOMPRESSED + bool + choice prompt "Kernel compression mode" default KERNEL_GZIP - depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO || HAVE_KERNEL_LZ4 + depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO || HAVE_KERNEL_LZ4 || HAVE_KERNEL_UNCOMPRESSED help The linux kernel is a kind of self-extracting executable. Several compression algorithms are available, which differ @@ -207,6 +210,16 @@ config KERNEL_LZ4 is about 8% bigger than LZO. But the decompression speed is faster than LZO. +config KERNEL_UNCOMPRESSED + bool "None" + depends on HAVE_KERNEL_UNCOMPRESSED + help + Produce uncompressed kernel image. This option is usually not what + you want. It is useful for debugging the kernel in slow simulation + environments, where decompressing and moving the kernel is awfully + slow. This option allows early boot code to skip the decompressor + and jump right at uncompressed kernel image. + endchoice config DEFAULT_HOSTNAME diff --git a/lib/raid6/s390vx.uc b/lib/raid6/s390vx.uc index 140fa8bb5c2352..914ebe98fc211d 100644 --- a/lib/raid6/s390vx.uc +++ b/lib/raid6/s390vx.uc @@ -55,22 +55,24 @@ static inline void XOR(int x, int y, int z) asm volatile ("VX %0,%1,%2" : : "i" (x), "i" (y), "i" (z)); } -static inline void LOAD_DATA(int x, int n, u8 *ptr) +static inline void LOAD_DATA(int x, u8 *ptr) { - typedef struct { u8 _[16*n]; } addrtype; + typedef struct { u8 _[16 * $#]; } addrtype; register addrtype *__ptr asm("1") = (addrtype *) ptr; asm volatile ("VLM %2,%3,0,%r1" - : : "m" (*__ptr), "a" (__ptr), "i" (x), "i" (x + n - 1)); + : : "m" (*__ptr), "a" (__ptr), "i" (x), + "i" (x + $# - 1)); } -static inline void STORE_DATA(int x, int n, u8 *ptr) +static inline void STORE_DATA(int x, u8 *ptr) { - typedef struct { u8 _[16*n]; } addrtype; + typedef struct { u8 _[16 * $#]; } addrtype; register addrtype *__ptr asm("1") = (addrtype *) ptr; asm volatile ("VSTM %2,%3,0,1" - : "=m" (*__ptr) : "a" (__ptr), "i" (x), "i" (x + n - 1)); + : "=m" (*__ptr) : "a" (__ptr), "i" (x), + "i" (x + $# - 1)); } static inline void COPY_VEC(int x, int y) @@ -93,19 +95,19 @@ static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs) q = dptr[z0 + 2]; /* RS syndrome */ for (d = 0; d < bytes; d += $#*NSIZE) { - LOAD_DATA(0,$#,&dptr[z0][d]); + LOAD_DATA(0,&dptr[z0][d]); COPY_VEC(8+$$,0+$$); for (z = z0 - 1; z >= 0; z--) { MASK(16+$$,8+$$); AND(16+$$,16+$$,25); SHLBYTE(8+$$,8+$$); XOR(8+$$,8+$$,16+$$); - LOAD_DATA(16,$#,&dptr[z][d]); + LOAD_DATA(16,&dptr[z][d]); XOR(0+$$,0+$$,16+$$); XOR(8+$$,8+$$,16+$$); } - STORE_DATA(0,$#,&p[d]); - STORE_DATA(8,$#,&q[d]); + STORE_DATA(0,&p[d]); + STORE_DATA(8,&q[d]); } kernel_fpu_end(&vxstate, KERNEL_VXR); } @@ -127,14 +129,14 @@ static void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop, for (d = 0; d < bytes; d += $#*NSIZE) { /* P/Q data pages */ - LOAD_DATA(0,$#,&dptr[z0][d]); + LOAD_DATA(0,&dptr[z0][d]); COPY_VEC(8+$$,0+$$); for (z = z0 - 1; z >= start; z--) { MASK(16+$$,8+$$); AND(16+$$,16+$$,25); SHLBYTE(8+$$,8+$$); XOR(8+$$,8+$$,16+$$); - LOAD_DATA(16,$#,&dptr[z][d]); + LOAD_DATA(16,&dptr[z][d]); XOR(0+$$,0+$$,16+$$); XOR(8+$$,8+$$,16+$$); } @@ -145,12 +147,12 @@ static void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop, SHLBYTE(8+$$,8+$$); XOR(8+$$,8+$$,16+$$); } - LOAD_DATA(16,$#,&p[d]); + LOAD_DATA(16,&p[d]); XOR(16+$$,16+$$,0+$$); - STORE_DATA(16,$#,&p[d]); - LOAD_DATA(16,$#,&q[d]); + STORE_DATA(16,&p[d]); + LOAD_DATA(16,&q[d]); XOR(16+$$,16+$$,8+$$); - STORE_DATA(16,$#,&q[d]); + STORE_DATA(16,&q[d]); } kernel_fpu_end(&vxstate, KERNEL_VXR); } diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c index 61525233181341..91cf50b2b0e596 100644 --- a/tools/testing/selftests/rseq/param_test.c +++ b/tools/testing/selftests/rseq/param_test.c @@ -90,6 +90,30 @@ unsigned int yield_mod_cnt, nr_abort; #error "Unsupported architecture" #endif +#elif defined(__s390__) + +#define RSEQ_INJECT_INPUT \ + , [loop_cnt_1]"m"(loop_cnt[1]) \ + , [loop_cnt_2]"m"(loop_cnt[2]) \ + , [loop_cnt_3]"m"(loop_cnt[3]) \ + , [loop_cnt_4]"m"(loop_cnt[4]) \ + , [loop_cnt_5]"m"(loop_cnt[5]) \ + , [loop_cnt_6]"m"(loop_cnt[6]) + +#define INJECT_ASM_REG "r12" + +#define RSEQ_INJECT_CLOBBER \ + , INJECT_ASM_REG + +#define RSEQ_INJECT_ASM(n) \ + "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ + "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \ + "je 333f\n\t" \ + "222:\n\t" \ + "ahi %%" INJECT_ASM_REG ", -1\n\t" \ + "jnz 222b\n\t" \ + "333:\n\t" + #elif defined(__ARMEL__) #define RSEQ_INJECT_INPUT \ diff --git a/tools/testing/selftests/rseq/rseq-s390.h b/tools/testing/selftests/rseq/rseq-s390.h new file mode 100644 index 00000000000000..1069e85258ce33 --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-s390.h @@ -0,0 +1,513 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ + +#define RSEQ_SIG 0x53053053 + +#define rseq_smp_mb() __asm__ __volatile__ ("bcr 15,0" ::: "memory") +#define rseq_smp_rmb() rseq_smp_mb() +#define rseq_smp_wmb() rseq_smp_mb() + +#define rseq_smp_load_acquire(p) \ +__extension__ ({ \ + __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p); \ + rseq_barrier(); \ + ____p1; \ +}) + +#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb() + +#define rseq_smp_store_release(p, v) \ +do { \ + rseq_barrier(); \ + RSEQ_WRITE_ONCE(*p, v); \ +} while (0) + +#ifdef RSEQ_SKIP_FASTPATH +#include "rseq-skip.h" +#else /* !RSEQ_SKIP_FASTPATH */ + +#ifdef __s390x__ + +#define LONG_L "lg" +#define LONG_S "stg" +#define LONG_LT_R "ltgr" +#define LONG_CMP "cg" +#define LONG_CMP_R "cgr" +#define LONG_ADDI "aghi" +#define LONG_ADD_R "agr" + +#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \ + start_ip, post_commit_offset, abort_ip) \ + ".pushsection __rseq_table, \"aw\"\n\t" \ + ".balign 32\n\t" \ + __rseq_str(label) ":\n\t" \ + ".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \ + ".quad " __rseq_str(start_ip) ", " __rseq_str(post_commit_offset) ", " __rseq_str(abort_ip) "\n\t" \ + ".popsection\n\t" + +#elif __s390__ + +#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \ + start_ip, post_commit_offset, abort_ip) \ + ".pushsection __rseq_table, \"aw\"\n\t" \ + ".balign 32\n\t" \ + __rseq_str(label) ":\n\t" \ + ".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \ + ".long 0x0, " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) "\n\t" \ + ".popsection\n\t" + +#define LONG_L "l" +#define LONG_S "st" +#define LONG_LT_R "ltr" +#define LONG_CMP "c" +#define LONG_CMP_R "cr" +#define LONG_ADDI "ahi" +#define LONG_ADD_R "ar" + +#endif + +#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \ + __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \ + (post_commit_ip - start_ip), abort_ip) + +#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \ + RSEQ_INJECT_ASM(1) \ + "larl %%r0, " __rseq_str(cs_label) "\n\t" \ + LONG_S " %%r0, %[" __rseq_str(rseq_cs) "]\n\t" \ + __rseq_str(label) ":\n\t" + +#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \ + RSEQ_INJECT_ASM(2) \ + "c %[" __rseq_str(cpu_id) "], %[" __rseq_str(current_cpu_id) "]\n\t" \ + "jnz " __rseq_str(label) "\n\t" + +#define RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label) \ + ".pushsection __rseq_failure, \"ax\"\n\t" \ + ".long " __rseq_str(RSEQ_SIG) "\n\t" \ + __rseq_str(label) ":\n\t" \ + teardown \ + "j %l[" __rseq_str(abort_label) "]\n\t" \ + ".popsection\n\t" + +#define RSEQ_ASM_DEFINE_CMPFAIL(label, teardown, cmpfail_label) \ + ".pushsection __rseq_failure, \"ax\"\n\t" \ + __rseq_str(label) ":\n\t" \ + teardown \ + "j %l[" __rseq_str(cmpfail_label) "]\n\t" \ + ".popsection\n\t" + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + LONG_CMP " %[expect], %[v]\n\t" + "jnz %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + LONG_CMP " %[expect], %[v]\n\t" + "jnz %l[error2]\n\t" +#endif + /* final store */ + LONG_S " %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", "cc", "r0" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +/* + * Compare @v against @expectnot. When it does _not_ match, load @v + * into @load, and store the content of *@v + voffp into @v. + */ +static inline __attribute__((always_inline)) +int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, + off_t voffp, intptr_t *load, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + LONG_L " %%r1, %[v]\n\t" + LONG_CMP_R " %%r1, %[expectnot]\n\t" + "je %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + LONG_L " %%r1, %[v]\n\t" + LONG_CMP_R " %%r1, %[expectnot]\n\t" + "je %l[error2]\n\t" +#endif + LONG_S " %%r1, %[load]\n\t" + LONG_ADD_R " %%r1, %[voffp]\n\t" + LONG_L " %%r1, 0(%%r1)\n\t" + /* final store */ + LONG_S " %%r1, %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* final store input */ + [v] "m" (*v), + [expectnot] "r" (expectnot), + [voffp] "r" (voffp), + [load] "m" (*load) + RSEQ_INJECT_INPUT + : "memory", "cc", "r0", "r1" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_addv(intptr_t *v, intptr_t count, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) +#endif + LONG_L " %%r0, %[v]\n\t" + LONG_ADD_R " %%r0, %[count]\n\t" + /* final store */ + LONG_S " %%r0, %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(4) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* final store input */ + [v] "m" (*v), + [count] "r" (count) + RSEQ_INJECT_INPUT + : "memory", "cc", "r0" + RSEQ_INJECT_CLOBBER + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + LONG_CMP " %[expect], %[v]\n\t" + "jnz %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + LONG_CMP " %[expect], %[v]\n\t" + "jnz %l[error2]\n\t" +#endif + /* try store */ + LONG_S " %[newv2], %[v2]\n\t" + RSEQ_INJECT_ASM(5) + /* final store */ + LONG_S " %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* try store input */ + [v2] "m" (*v2), + [newv2] "r" (newv2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", "cc", "r0" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +/* s390 is TSO. */ +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + return rseq_cmpeqv_trystorev_storev(v, expect, v2, newv2, newv, cpu); +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t expect2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + LONG_CMP " %[expect], %[v]\n\t" + "jnz %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) + LONG_CMP " %[expect2], %[v2]\n\t" + "jnz %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + LONG_CMP " %[expect], %[v]\n\t" + "jnz %l[error2]\n\t" + LONG_CMP " %[expect2], %[v2]\n\t" + "jnz %l[error3]\n\t" +#endif + /* final store */ + LONG_S " %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* cmp2 input */ + [v2] "m" (*v2), + [expect2] "r" (expect2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", "cc", "r0" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2, error3 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("1st expected value comparison failed"); +error3: + rseq_bug("2nd expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + uint64_t rseq_scratch[3]; + + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ + LONG_S " %[src], %[rseq_scratch0]\n\t" + LONG_S " %[dst], %[rseq_scratch1]\n\t" + LONG_S " %[len], %[rseq_scratch2]\n\t" + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + LONG_CMP " %[expect], %[v]\n\t" + "jnz 5f\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f) + LONG_CMP " %[expect], %[v]\n\t" + "jnz 7f\n\t" +#endif + /* try memcpy */ + LONG_LT_R " %[len], %[len]\n\t" + "jz 333f\n\t" + "222:\n\t" + "ic %%r0,0(%[src])\n\t" + "stc %%r0,0(%[dst])\n\t" + LONG_ADDI " %[src], 1\n\t" + LONG_ADDI " %[dst], 1\n\t" + LONG_ADDI " %[len], -1\n\t" + "jnz 222b\n\t" + "333:\n\t" + RSEQ_INJECT_ASM(5) + /* final store */ + LONG_S " %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + /* teardown */ + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t" + RSEQ_ASM_DEFINE_ABORT(4, + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t", + abort) + RSEQ_ASM_DEFINE_CMPFAIL(5, + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t", + cmpfail) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_CMPFAIL(6, + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t", + error1) + RSEQ_ASM_DEFINE_CMPFAIL(7, + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t", + error2) +#endif + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv), + /* try memcpy input */ + [dst] "r" (dst), + [src] "r" (src), + [len] "r" (len), + [rseq_scratch0] "m" (rseq_scratch[0]), + [rseq_scratch1] "m" (rseq_scratch[1]), + [rseq_scratch2] "m" (rseq_scratch[2]) + RSEQ_INJECT_INPUT + : "memory", "cc", "r0" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +/* s390 is TSO. */ +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + return rseq_cmpeqv_trymemcpy_storev(v, expect, dst, src, len, + newv, cpu); +} +#endif /* !RSEQ_SKIP_FASTPATH */ diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h index 86ce22417e0d7f..ca332efe971353 100644 --- a/tools/testing/selftests/rseq/rseq.h +++ b/tools/testing/selftests/rseq/rseq.h @@ -75,6 +75,8 @@ extern __thread volatile struct rseq __rseq_abi; #include #elif defined(__mips__) #include +#elif defined(__s390__) +#include #else #error unsupported target #endif