From 8ce7da474f3063f57ac446eb428945a9852e102d Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Mon, 8 Jun 2015 11:26:30 +0300 Subject: [PATCH 01/39] drm/i915: Properly initialize SDVO analog connectors In the commit below, I missed the connector allocation in the function intel_sdvo_analog_init(), leading to those connectors to have a NULL state pointer. commit 08d9bc920d465bbbbd762cac9383249c19bf69a2 Author: Ander Conselvan de Oliveira Date: Fri Apr 10 10:59:10 2015 +0300 drm/i915: Allocate connector state together with the connectors Reported-by: Stefan Lippers-Hollmann Tested-by: Stefan Lippers-Hollmann Signed-off-by: Ander Conselvan de Oliveira Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_sdvo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index e87d2f418de4f3..987b81f31b0e69 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -2550,7 +2550,7 @@ intel_sdvo_analog_init(struct intel_sdvo *intel_sdvo, int device) DRM_DEBUG_KMS("initialising analog device %d\n", device); - intel_sdvo_connector = kzalloc(sizeof(*intel_sdvo_connector), GFP_KERNEL); + intel_sdvo_connector = intel_sdvo_connector_alloc(); if (!intel_sdvo_connector) return false; From 7f2ca8b55aeff1fe51ed3570200ef88a96060917 Mon Sep 17 00:00:00 2001 From: Peter Hutterer Date: Mon, 8 Jun 2015 10:17:32 -0700 Subject: [PATCH 02/39] Input: synaptics - add min/max quirk for Lenovo S540 https://bugzilla.redhat.com/show_bug.cgi?id=1223051#c2 Cc: stable@vger.kernel.org Tested-by: tommy.gagnes@gmail.com Signed-off-by: Peter Hutterer Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 630af73e98c488..35c8d0ceabeebf 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -150,6 +150,11 @@ static const struct min_max_quirk min_max_pnpid_table[] = { {ANY_BOARD_ID, 2961}, 1024, 5112, 2024, 4832 }, + { + (const char * const []){"LEN2000", NULL}, + {ANY_BOARD_ID, ANY_BOARD_ID}, + 1024, 5113, 2021, 4832 + }, { (const char * const []){"LEN2001", NULL}, {ANY_BOARD_ID, ANY_BOARD_ID}, @@ -191,7 +196,7 @@ static const char * const topbuttonpad_pnp_ids[] = { "LEN0045", "LEN0047", "LEN0049", - "LEN2000", + "LEN2000", /* S540 */ "LEN2001", /* Edge E431 */ "LEN2002", /* Edge E531 */ "LEN2003", From 3f5f1554ee715639e78d9be87623ee82772537e0 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 2 Jun 2015 19:21:15 +0300 Subject: [PATCH 03/39] drm/i915: Fix DDC probe for passive adapters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Passive DP->DVI/HDMI dongles on DP++ ports show up to the system as HDMI devices, as they do not have a sink device in them to respond to any AUX traffic. When probing these dongles over the DDC, sometimes they will NAK the first attempt even though the transaction is valid and they support the DDC protocol. The retry loop inside of drm_do_probe_ddc_edid() would normally catch this case and try the transaction again, resulting in success. That, however, was thwarted by the fix for [1]: commit 9292f37e1f5c79400254dca46f83313488093825 Author: Eugeni Dodonov Date: Thu Jan 5 09:34:28 2012 -0200 drm: give up on edid retries when i2c bus is not responding This added code to exit immediately if the return code from the i2c_transfer function was -ENXIO in order to reduce the amount of time spent in waiting for unresponsive or disconnected devices. That was possible because the underlying i2c bit banging algorithm had retries of its own (which, of course, were part of the reason for the bug the commit fixes). Since its introduction in commit f899fc64cda8569d0529452aafc0da31c042df2e Author: Chris Wilson Date: Tue Jul 20 15:44:45 2010 -0700 drm/i915: use GMBUS to manage i2c links we've been flipping back and forth enabling the GMBUS transfers, but we've settled since then. The GMBUS implementation does not do any retries, however, bailing out of the drm_do_probe_ddc_edid() retry loop on first encounter of -ENXIO. This, combined with Eugeni's commit, broke the retry on -ENXIO. Retry GMBUS once on -ENXIO on first message to mitigate the issues with passive adapters. This patch is based on the work, and commit message, by Todd Previte . [1] https://bugs.freedesktop.org/show_bug.cgi?id=41059 v2: Don't retry if using bit banging. v3: Move retry within gmbux_xfer, retry only on first message. v4: Initialize GMBUS0 on retry (Ville). v5: Take index reads into account (Ville). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=85924 Cc: Todd Previte Cc: stable@vger.kernel.org Tested-by: Oliver Grafe (v2) Tested-by: Jim Bride Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_i2c.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index 56e437e3158021..ae628001fd9787 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -435,7 +435,7 @@ gmbus_xfer(struct i2c_adapter *adapter, struct intel_gmbus, adapter); struct drm_i915_private *dev_priv = bus->dev_priv; - int i, reg_offset; + int i = 0, inc, try = 0, reg_offset; int ret = 0; intel_aux_display_runtime_get(dev_priv); @@ -448,12 +448,14 @@ gmbus_xfer(struct i2c_adapter *adapter, reg_offset = dev_priv->gpio_mmio_base; +retry: I915_WRITE(GMBUS0 + reg_offset, bus->reg0); - for (i = 0; i < num; i++) { + for (; i < num; i += inc) { + inc = 1; if (gmbus_is_index_read(msgs, i, num)) { ret = gmbus_xfer_index_read(dev_priv, &msgs[i]); - i += 1; /* set i to the index of the read xfer */ + inc = 2; /* an index read is two msgs */ } else if (msgs[i].flags & I2C_M_RD) { ret = gmbus_xfer_read(dev_priv, &msgs[i], 0); } else { @@ -525,6 +527,18 @@ gmbus_xfer(struct i2c_adapter *adapter, adapter->name, msgs[i].addr, (msgs[i].flags & I2C_M_RD) ? 'r' : 'w', msgs[i].len); + /* + * Passive adapters sometimes NAK the first probe. Retry the first + * message once on -ENXIO for GMBUS transfers; the bit banging algorithm + * has retries internally. See also the retry loop in + * drm_do_probe_ddc_edid, which bails out on the first -ENXIO. + */ + if (ret == -ENXIO && i == 0 && try++ == 0) { + DRM_DEBUG_KMS("GMBUS [%s] NAK on first message, retry\n", + adapter->name); + goto retry; + } + goto out; timeout: From bd00c606a6f60ca015a62bdbf671eadd48a4ca82 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 9 Jun 2015 15:06:55 +0100 Subject: [PATCH 04/39] iommu/vt-d: Change PASID support to bit 40 of Extended Capability Register The existing hardware implementations with PASID support advertised in bit 28? Forget them. They do not exist. Bit 28 means nothing. When we have something that works, it'll use bit 40. Do not attempt to infer anything meaningful from bit 28. This will be reflected in an updated VT-d spec in the extremely near future. Signed-off-by: David Woodhouse --- include/linux/intel-iommu.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 796ef9645827f0..a240e61a7700da 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -115,13 +115,14 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) * Extended Capability Register */ +#define ecap_pasid(e) ((e >> 40) & 0x1) #define ecap_pss(e) ((e >> 35) & 0x1f) #define ecap_eafs(e) ((e >> 34) & 0x1) #define ecap_nwfs(e) ((e >> 33) & 0x1) #define ecap_srs(e) ((e >> 31) & 0x1) #define ecap_ers(e) ((e >> 30) & 0x1) #define ecap_prs(e) ((e >> 29) & 0x1) -#define ecap_pasid(e) ((e >> 28) & 0x1) +/* PASID support used to be on bit 28 */ #define ecap_dis(e) ((e >> 27) & 0x1) #define ecap_nest(e) ((e >> 26) & 0x1) #define ecap_mts(e) ((e >> 25) & 0x1) From 9c5a18a31b321f120efda412281bb9f610f84aa0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 9 Jun 2015 21:35:44 +0200 Subject: [PATCH 05/39] cfg80211: wext: clear sinfo struct before calling driver Until recently, mac80211 overwrote all the statistics it could provide when getting called, but it now relies on the struct having been zeroed by the caller. This was always the case in nl80211, but wext used a static struct which could even cause values from one device leak to another. Using a static struct is OK (as even documented in a comment) since the whole usage of this function and its return value is always locked under RTNL. Not clearing the struct for calling the driver has always been wrong though, since drivers were free to only fill values they could report, so calling this for one device and then for another would always have leaked values from one to the other. Fix this by initializing the structure in question before the driver method call. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=99691 Cc: stable@vger.kernel.org Reported-by: Gerrit Renker Reported-by: Alexander Kaltsas Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/wireless/wext-compat.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index fff1bef6ed6d91..fd682832a0e363 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -1333,6 +1333,8 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev) memcpy(bssid, wdev->current_bss->pub.bssid, ETH_ALEN); wdev_unlock(wdev); + memset(&sinfo, 0, sizeof(sinfo)); + if (rdev_get_station(rdev, dev, bssid, &sinfo)) return NULL; From c3b4afca7023b5aa0531912364246e67f79b3010 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 4 Jun 2015 22:25:04 +0800 Subject: [PATCH 06/39] blk-mq: free hctx->ctxs in queue's release handler Now blk_cleanup_queue() can be called before calling del_gendisk()[1], inside which hctx->ctxs is touched from blk_mq_unregister_hctx(), but the variable has been freed by blk_cleanup_queue() at that time. So this patch moves freeing of hctx->ctxs into queue's release handler for fixing the oops reported by Stefan. [1], 6cd18e711dd8075 (block: destroy bdi before blockdev is unregistered) Reported-by: Stefan Seyfried Cc: NeilBrown Cc: Christoph Hellwig Cc: stable@vger.kernel.org (v4.0) Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index e68b71b85a7eaf..594eea04266e6d 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1600,6 +1600,7 @@ static int blk_mq_hctx_notify(void *data, unsigned long action, return NOTIFY_OK; } +/* hctx->ctxs will be freed in queue's release handler */ static void blk_mq_exit_hctx(struct request_queue *q, struct blk_mq_tag_set *set, struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) @@ -1618,7 +1619,6 @@ static void blk_mq_exit_hctx(struct request_queue *q, blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier); blk_free_flush_queue(hctx->fq); - kfree(hctx->ctxs); blk_mq_free_bitmap(&hctx->ctx_map); } @@ -1891,8 +1891,12 @@ void blk_mq_release(struct request_queue *q) unsigned int i; /* hctx kobj stays in hctx */ - queue_for_each_hw_ctx(q, hctx, i) + queue_for_each_hw_ctx(q, hctx, i) { + if (!hctx) + continue; + kfree(hctx->ctxs); kfree(hctx); + } kfree(q->queue_hw_ctx); From 5eea900389f73ad715649e3c3e78fc4482248635 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 1 May 2015 05:59:35 -0700 Subject: [PATCH 07/39] blackfin: Fix build error Fix include/asm-generic/io.h: In function 'readb': include/asm-generic/io.h:113:2: error: implicit declaration of function 'bfin_read8' include/asm-generic/io.h: In function 'readw': include/asm-generic/io.h:121:2: error: implicit declaration of function 'bfin_read16' include/asm-generic/io.h: In function 'readl': include/asm-generic/io.h:129:2: error: implicit declaration of function 'bfin_read32' include/asm-generic/io.h: In function 'writeb': include/asm-generic/io.h:147:2: error: implicit declaration of function 'bfin_write8' include/asm-generic/io.h: In function 'writew': include/asm-generic/io.h:155:2: error: implicit declaration of function 'bfin_write16' include/asm-generic/io.h: In function 'writel': include/asm-generic/io.h:163:2: error: implicit declaration of function 'bfin_write32' Reported-by: Geert Uytterhoeven Fixes: 1a3372bc522ef ("blackfin: io: define __raw_readx/writex with bfin_readx/writex") Cc: Steven Miao Signed-off-by: Guenter Roeck --- arch/blackfin/include/asm/io.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/blackfin/include/asm/io.h b/arch/blackfin/include/asm/io.h index 4e8ad0523118d6..6abebe82d4e93e 100644 --- a/arch/blackfin/include/asm/io.h +++ b/arch/blackfin/include/asm/io.h @@ -10,6 +10,7 @@ #include #include #include +#include #define __raw_readb bfin_read8 #define __raw_readw bfin_read16 From 80524e933658077842271fbf34e6861c41095a32 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 15 Apr 2015 08:33:50 -0700 Subject: [PATCH 08/39] score: Fix exception handler label The latest version of modinfo fails to compile score architecture targets with the following error. FATAL: The relocation at __ex_table+0x634 references section "__ex_table" which is not executable, IOW the kernel will fault if it ever tries to jump to it. Something is seriously wrong and should be fixed. The probem is caused by a bad label in an __ex_table entry. Acked-by: Lennox Wu Cc: Quentin Casasnovas Signed-off-by: Guenter Roeck --- arch/score/lib/string.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/score/lib/string.S b/arch/score/lib/string.S index 00b7d3a2fc6068..16efa3ad037f7c 100644 --- a/arch/score/lib/string.S +++ b/arch/score/lib/string.S @@ -175,10 +175,10 @@ ENTRY(__clear_user) br r3 .section .fixup, "ax" +99: br r3 .previous .section __ex_table, "a" .align 2 -99: .word 0b, 99b .previous From 1b0ccfe54a6abd1bc4d7bdd1c33e61e2c58f72c7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 10 Jun 2015 15:29:31 -0700 Subject: [PATCH 09/39] Revert "ipv6: Fix protocol resubmission" This reverts commit 0243508edd317ff1fa63b495643a7c192fbfcd92. It introduces new regressions. Signed-off-by: David S. Miller --- net/ipv6/ip6_input.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 41a73da371a923..f2e464eba5efdb 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -212,13 +212,13 @@ static int ip6_input_finish(struct sock *sk, struct sk_buff *skb) */ rcu_read_lock(); +resubmit: idev = ip6_dst_idev(skb_dst(skb)); if (!pskb_pull(skb, skb_transport_offset(skb))) goto discard; nhoff = IP6CB(skb)->nhoff; nexthdr = skb_network_header(skb)[nhoff]; -resubmit: raw = raw6_local_deliver(skb, nexthdr); ipprot = rcu_dereference(inet6_protos[nexthdr]); if (ipprot) { @@ -246,12 +246,10 @@ static int ip6_input_finish(struct sock *sk, struct sk_buff *skb) goto discard; ret = ipprot->handler(skb); - if (ret < 0) { - nexthdr = -ret; + if (ret > 0) goto resubmit; - } else if (ret == 0) { + else if (ret == 0) IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS); - } } else { if (!raw) { if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { From 85bd839983778fcd0c1c043327b14a046e979b39 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Wed, 10 Jun 2015 11:14:43 -0700 Subject: [PATCH 10/39] mm/memory_hotplug.c: set zone->wait_table to null after freeing it Izumi found the following oops when hot re-adding a node: BUG: unable to handle kernel paging request at ffffc90008963690 IP: __wake_up_bit+0x20/0x70 Oops: 0000 [#1] SMP CPU: 68 PID: 1237 Comm: rs:main Q:Reg Not tainted 4.1.0-rc5 #80 Hardware name: FUJITSU PRIMEQUEST2800E/SB, BIOS PRIMEQUEST 2000 Series BIOS Version 1.87 04/28/2015 task: ffff880838df8000 ti: ffff880017b94000 task.ti: ffff880017b94000 RIP: 0010:[] [] __wake_up_bit+0x20/0x70 RSP: 0018:ffff880017b97be8 EFLAGS: 00010246 RAX: ffffc90008963690 RBX: 00000000003c0000 RCX: 000000000000a4c9 RDX: 0000000000000000 RSI: ffffea101bffd500 RDI: ffffc90008963648 RBP: ffff880017b97c08 R08: 0000000002000020 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8a0797c73800 R13: ffffea101bffd500 R14: 0000000000000001 R15: 00000000003c0000 FS: 00007fcc7ffff700(0000) GS:ffff880874800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffc90008963690 CR3: 0000000836761000 CR4: 00000000001407e0 Call Trace: unlock_page+0x6d/0x70 generic_write_end+0x53/0xb0 xfs_vm_write_end+0x29/0x80 [xfs] generic_perform_write+0x10a/0x1e0 xfs_file_buffered_aio_write+0x14d/0x3e0 [xfs] xfs_file_write_iter+0x79/0x120 [xfs] __vfs_write+0xd4/0x110 vfs_write+0xac/0x1c0 SyS_write+0x58/0xd0 system_call_fastpath+0x12/0x76 Code: 5d c3 66 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 48 89 e5 48 83 ec 20 65 48 8b 04 25 28 00 00 00 48 89 45 f8 31 c0 48 8d 47 48 <48> 39 47 48 48 c7 45 e8 00 00 00 00 48 c7 45 f0 00 00 00 00 48 RIP [] __wake_up_bit+0x20/0x70 RSP CR2: ffffc90008963690 Reproduce method (re-add a node):: Hot-add nodeA --> remove nodeA --> hot-add nodeA (panic) This seems an use-after-free problem, and the root cause is zone->wait_table was not set to *NULL* after free it in try_offline_node. When hot re-add a node, we will reuse the pgdat of it, so does the zone struct, and when add pages to the target zone, it will init the zone first (including the wait_table) if the zone is not initialized. The judgement of zone initialized is based on zone->wait_table: static inline bool zone_is_initialized(struct zone *zone) { return !!zone->wait_table; } so if we do not set the zone->wait_table to *NULL* after free it, the memory hotplug routine will skip the init of new zone when hot re-add the node, and the wait_table still points to the freed memory, then we will access the invalid address when trying to wake up the waiting people after the i/o operation with the page is done, such as mentioned above. Signed-off-by: Gu Zheng Reported-by: Taku Izumi Reviewed by: Yasuaki Ishimatsu Cc: KAMEZAWA Hiroyuki Cc: Tang Chen Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 457bde530cbedc..9e88f749aa5123 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1969,8 +1969,10 @@ void try_offline_node(int nid) * wait_table may be allocated from boot memory, * here only free if it's allocated by vmalloc. */ - if (is_vmalloc_addr(zone->wait_table)) + if (is_vmalloc_addr(zone->wait_table)) { vfree(zone->wait_table); + zone->wait_table = NULL; + } } } EXPORT_SYMBOL(try_offline_node); From 7d638093d4b0e9ef15bd78f38f11f126e773cc14 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Wed, 10 Jun 2015 11:14:46 -0700 Subject: [PATCH 11/39] memcg: do not call reclaim if !__GFP_WAIT When trimming memcg consumption excess (see memory.high), we call try_to_free_mem_cgroup_pages without checking if we are allowed to sleep in the current context, which can result in a deadlock. Fix this. Fixes: 241994ed8649 ("mm: memcontrol: default hierarchy interface for memory") Signed-off-by: Vladimir Davydov Cc: Johannes Weiner Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 14c2f2017e37cc..9da23a7ec4c00b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2323,6 +2323,8 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, css_get_many(&memcg->css, batch); if (batch > nr_pages) refill_stock(memcg, batch - nr_pages); + if (!(gfp_mask & __GFP_WAIT)) + goto done; /* * If the hierarchy is above the normal consumption range, * make the charging task trim their excess contribution. From d7ad41a1c498729b7584c257710b1b437a0c1470 Mon Sep 17 00:00:00 2001 From: Weijie Yang Date: Wed, 10 Jun 2015 11:14:49 -0700 Subject: [PATCH 12/39] zram: clear disk io accounting when reset zram device Clear zram disk io accounting when resetting the zram device. Otherwise the residual io accounting stat will affect the diskstat in the next zram active cycle. Signed-off-by: Weijie Yang Acked-by: Sergey Senozhatsky Acked-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 8dcbced0eafd5f..6e134f4759c0c9 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -805,7 +805,9 @@ static void zram_reset_device(struct zram *zram) memset(&zram->stats, 0, sizeof(zram->stats)); zram->disksize = 0; zram->max_comp_streams = 1; + set_capacity(zram->disk, 0); + part_stat_set_all(&zram->disk->part0, 0); up_write(&zram->init_lock); /* I/O operation under all of CPU are done so let's free */ From 5129e87cb81996d4d7e8f5a86b5355c5f2d0383b Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 10 Jun 2015 11:14:52 -0700 Subject: [PATCH 13/39] checkpatch: fix "GLOBAL_INITIALISERS" test Commit d5e616fc1c1d ("checkpatch: add a few more --fix corrections") broke the GLOBAL_INITIALISERS test with bad parentheses and optional leading spaces. Fix it. Signed-off-by: Joe Perches Reported-by: Bandan Das Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 89b1df4e72ab34..c5ec977b9c3786 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3169,12 +3169,12 @@ sub process { } # check for global initialisers. - if ($line =~ /^\+(\s*$Type\s*$Ident\s*(?:\s+$Modifier))*\s*=\s*(0|NULL|false)\s*;/) { + if ($line =~ /^\+$Type\s*$Ident(?:\s+$Modifier)*\s*=\s*(?:0|NULL|false)\s*;/) { if (ERROR("GLOBAL_INITIALISERS", "do not initialise globals to 0 or NULL\n" . $herecurr) && $fix) { - $fixed[$fixlinenr] =~ s/($Type\s*$Ident\s*(?:\s+$Modifier))*\s*=\s*(0|NULL|false)\s*;/$1;/; + $fixed[$fixlinenr] =~ s/(^.$Type\s*$Ident(?:\s+$Modifier)*)\s*=\s*(0|NULL|false)\s*;/$1;/; } } # check for static initialisers. From f371763a79d5212c2cb216b46fa8af46ba56cee3 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 10 Jun 2015 11:14:54 -0700 Subject: [PATCH 14/39] mm: memcontrol: fix false-positive VM_BUG_ON() on -rt On -rt, the VM_BUG_ON(!irqs_disabled()) triggers inside the memcg swapout path because the spin_lock_irq(&mapping->tree_lock) in the caller doesn't actually disable the hardware interrupts - which is fine, because on -rt the tophalves run in process context and so we are still safe from preemption while updating the statistics. Remove the VM_BUG_ON() but keep the comment of what we rely on. Signed-off-by: Johannes Weiner Reported-by: Clark Williams Cc: Fernando Lopez-Lezcano Cc: Steven Rostedt Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9da23a7ec4c00b..a04225d372ba3a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5835,9 +5835,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) if (!mem_cgroup_is_root(memcg)) page_counter_uncharge(&memcg->memory, 1); - /* XXX: caller holds IRQ-safe mapping->tree_lock */ - VM_BUG_ON(!irqs_disabled()); - + /* Caller disabled preemption with mapping->tree_lock */ mem_cgroup_charge_statistics(memcg, page, -1); memcg_check_events(memcg, page); } From 02f7b4145da113683ad64c74bf64605e16b71789 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 10 Jun 2015 11:14:57 -0700 Subject: [PATCH 15/39] zsmalloc: fix a null pointer dereference in destroy_handle_cache() If zs_create_pool()->create_handle_cache()->kmem_cache_create() or pool->name allocation fails, zs_create_pool()->destroy_handle_cache() will dereference the NULL pool->handle_cachep. Modify destroy_handle_cache() to avoid this. Signed-off-by: Sergey Senozhatsky Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/zsmalloc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 08bd7a3d464a9c..a8b5e749e84e7d 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -289,7 +289,8 @@ static int create_handle_cache(struct zs_pool *pool) static void destroy_handle_cache(struct zs_pool *pool) { - kmem_cache_destroy(pool->handle_cachep); + if (pool->handle_cachep) + kmem_cache_destroy(pool->handle_cachep); } static unsigned long alloc_handle(struct zs_pool *pool) From 8e76d4eecf7afeec9328e21cd5880e281838d0d6 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 10 Jun 2015 11:15:00 -0700 Subject: [PATCH 16/39] sched, numa: do not hint for NUMA balancing on VM_MIXEDMAP mappings Jovi Zhangwei reported the following problem Below kernel vm bug can be triggered by tcpdump which mmaped a lot of pages with GFP_COMP flag. [Mon May 25 05:29:33 2015] page:ffffea0015414000 count:66 mapcount:1 mapping: (null) index:0x0 [Mon May 25 05:29:33 2015] flags: 0x20047580004000(head) [Mon May 25 05:29:33 2015] page dumped because: VM_BUG_ON_PAGE(compound_order(page) && !PageTransHuge(page)) [Mon May 25 05:29:33 2015] ------------[ cut here ]------------ [Mon May 25 05:29:33 2015] kernel BUG at mm/migrate.c:1661! [Mon May 25 05:29:33 2015] invalid opcode: 0000 [#1] SMP In this case it was triggered by running tcpdump but it's not necessary reproducible on all systems. sudo tcpdump -i bond0.100 'tcp port 4242' -c 100000000000 -w 4242.pcap Compound pages cannot be migrated and it was not expected that such pages be marked for NUMA balancing. This did not take into account that drivers such as net/packet/af_packet.c may insert compound pages into userspace with vm_insert_page. This patch tells the NUMA balancing protection scanner to skip all VM_MIXEDMAP mappings which avoids the possibility that compound pages are marked for migration. Signed-off-by: Mel Gorman Reported-by: Jovi Zhangwei Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched/fair.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ffeaa4105e48a3..c2980e8733bcb9 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2181,7 +2181,7 @@ void task_numa_work(struct callback_head *work) } for (; vma; vma = vma->vm_next) { if (!vma_migratable(vma) || !vma_policy_mof(vma) || - is_vm_hugetlb_page(vma)) { + is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_MIXEDMAP)) { continue; } From 5ec45a192fe6e287f0fc06d5ca4f3bd446d94803 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 10 Jun 2015 11:15:02 -0700 Subject: [PATCH 17/39] arch/x86/kvm/mmu.c: work around gcc-4.4.4 bug Fix this compile issue with gcc-4.4.4: arch/x86/kvm/mmu.c: In function 'kvm_mmu_pte_write': arch/x86/kvm/mmu.c:4256: error: unknown field 'cr0_wp' specified in initializer arch/x86/kvm/mmu.c:4257: error: unknown field 'cr4_pae' specified in initializer arch/x86/kvm/mmu.c:4257: warning: excess elements in union initializer ... gcc-4.4.4 (at least) has issues when using anonymous unions in initializers. Fixes: edc90b7dc4ceef6 ("KVM: MMU: fix SMAP virtualization") Cc: Xiao Guangrong Cc: Paolo Bonzini Cc: Davidlohr Bueso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kvm/mmu.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 44a7d251549734..b73337634214c2 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4215,13 +4215,13 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, u64 entry, gentry, *spte; int npte; bool remote_flush, local_flush, zap_page; - union kvm_mmu_page_role mask = (union kvm_mmu_page_role) { - .cr0_wp = 1, - .cr4_pae = 1, - .nxe = 1, - .smep_andnot_wp = 1, - .smap_andnot_wp = 1, - }; + union kvm_mmu_page_role mask = { }; + + mask.cr0_wp = 1; + mask.cr4_pae = 1; + mask.nxe = 1; + mask.smep_andnot_wp = 1; + mask.smap_andnot_wp = 1; /* * If we don't have indirect shadow pages, it means no page is From b3be5e3e726a6cc849f40c70c3ae525e4146e9df Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Tue, 9 Jun 2015 17:27:12 +0200 Subject: [PATCH 18/39] tipc: disconnect socket directly after probe failure If the TIPC connection timer expires in a probing state, a self abort message is supposed to be generated and delivered to the local socket. This is currently broken, and the abort message is actually sent out to the peer node with invalid addressing information. This will cause the link to enter a constant retransmission state and eventually reset. We fix this by removing the self-abort message creation and tear down connection immediately instead. Signed-off-by: Erik Hugne Reviewed-by: Ying Xue Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 9074b5cede38b8..f485600c4507bc 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2142,11 +2142,17 @@ static void tipc_sk_timeout(unsigned long data) peer_node = tsk_peer_node(tsk); if (tsk->probing_state == TIPC_CONN_PROBING) { - /* Previous probe not answered -> self abort */ - skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, - TIPC_CONN_MSG, SHORT_H_SIZE, 0, - own_node, peer_node, tsk->portid, - peer_port, TIPC_ERR_NO_PORT); + if (!sock_owned_by_user(sk)) { + sk->sk_socket->state = SS_DISCONNECTING; + tsk->connected = 0; + tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk), + tsk_peer_port(tsk)); + sk->sk_state_change(sk); + } else { + /* Try again later */ + sk_reset_timer(sk, &sk->sk_timer, (HZ / 20)); + } + } else { skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, 0, peer_node, own_node, From 1a040eaca1a22f8da8285ceda6b5e4a2cb704867 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 9 Jun 2015 10:23:57 -0700 Subject: [PATCH 19/39] bridge: fix multicast router rlist endless loop Since the addition of sysfs multicast router support if one set multicast_router to "2" more than once, then the port would be added to the hlist every time and could end up linking to itself and thus causing an endless loop for rlist walkers. So to reproduce just do: echo 2 > multicast_router; echo 2 > multicast_router; in a bridge port and let some igmp traffic flow, for me it hangs up in br_multicast_flood(). Fix this by adding a check in br_multicast_add_router() if the port is already linked. The reason this didn't happen before the addition of multicast_router sysfs entries is because there's a !hlist_unhashed check that prevents it. Signed-off-by: Nikolay Aleksandrov Fixes: 0909e11758bd ("bridge: Add multicast_router sysfs entries") Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 22fd0419b31455..ff667e18b2d631 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1167,6 +1167,9 @@ static void br_multicast_add_router(struct net_bridge *br, struct net_bridge_port *p; struct hlist_node *slot = NULL; + if (!hlist_unhashed(&port->rlist)) + return; + hlist_for_each_entry(p, &br->router_list, rlist) { if ((unsigned long) port >= (unsigned long) p) break; @@ -1194,12 +1197,8 @@ static void br_multicast_mark_router(struct net_bridge *br, if (port->multicast_router != 1) return; - if (!hlist_unhashed(&port->rlist)) - goto timer; - br_multicast_add_router(br, port); -timer: mod_timer(&port->multicast_router_timer, now + br->multicast_querier_interval); } From 5d753610277862fd8050901f38b6571b9500cdb6 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 10 Jun 2015 21:02:04 -0400 Subject: [PATCH 20/39] net, swap: Remove a warning and clarify why sk_mem_reclaim is required when deactivating swap Jeff Layton reported the following; [ 74.232485] ------------[ cut here ]------------ [ 74.233354] WARNING: CPU: 2 PID: 754 at net/core/sock.c:364 sk_clear_memalloc+0x51/0x80() [ 74.234790] Modules linked in: cts rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache xfs libcrc32c snd_hda_codec_generic snd_hda_intel snd_hda_controller snd_hda_codec snd_hda_core snd_hwdep snd_seq snd_seq_device nfsd snd_pcm snd_timer snd e1000 ppdev parport_pc joydev parport pvpanic soundcore floppy serio_raw i2c_piix4 pcspkr nfs_acl lockd virtio_balloon acpi_cpufreq auth_rpcgss grace sunrpc qxl drm_kms_helper ttm drm virtio_console virtio_blk virtio_pci ata_generic virtio_ring pata_acpi virtio [ 74.243599] CPU: 2 PID: 754 Comm: swapoff Not tainted 4.1.0-rc6+ #5 [ 74.244635] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 74.245546] 0000000000000000 0000000079e69e31 ffff8800d066bde8 ffffffff8179263d [ 74.246786] 0000000000000000 0000000000000000 ffff8800d066be28 ffffffff8109e6fa [ 74.248175] 0000000000000000 ffff880118d48000 ffff8800d58f5c08 ffff880036e380a8 [ 74.249483] Call Trace: [ 74.249872] [] dump_stack+0x45/0x57 [ 74.250703] [] warn_slowpath_common+0x8a/0xc0 [ 74.251655] [] warn_slowpath_null+0x1a/0x20 [ 74.252585] [] sk_clear_memalloc+0x51/0x80 [ 74.253519] [] xs_disable_swap+0x42/0x80 [sunrpc] [ 74.254537] [] rpc_clnt_swap_deactivate+0x7e/0xc0 [sunrpc] [ 74.255610] [] nfs_swap_deactivate+0x27/0x30 [nfs] [ 74.256582] [] destroy_swap_extents+0x74/0x80 [ 74.257496] [] SyS_swapoff+0x222/0x5c0 [ 74.258318] [] ? syscall_trace_leave+0xc7/0x140 [ 74.259253] [] system_call_fastpath+0x12/0x71 [ 74.260158] ---[ end trace 2530722966429f10 ]--- The warning in question was unnecessary but with Jeff's series the rules are also clearer. This patch removes the warning and updates the comment to explain why sk_mem_reclaim() may still be called. [jlayton: remove if (sk->sk_forward_alloc) conditional. As Leon points out that it's not needed.] Cc: Leon Romanovsky Signed-off-by: Mel Gorman Signed-off-by: Jeff Layton Signed-off-by: David S. Miller --- net/core/sock.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 292f42228bfb36..469d6039c7f5dc 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -354,15 +354,12 @@ void sk_clear_memalloc(struct sock *sk) /* * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward - * progress of swapping. However, if SOCK_MEMALLOC is cleared while - * it has rmem allocations there is a risk that the user of the - * socket cannot make forward progress due to exceeding the rmem - * limits. By rights, sk_clear_memalloc() should only be called - * on sockets being torn down but warn and reset the accounting if - * that assumption breaks. + * progress of swapping. SOCK_MEMALLOC may be cleared while + * it has rmem allocations due to the last swapfile being deactivated + * but there is a risk that the socket is unusable due to exceeding + * the rmem limits. Reclaim the reserves and obey rmem limits again. */ - if (WARN_ON(sk->sk_forward_alloc)) - sk_mem_reclaim(sk); + sk_mem_reclaim(sk); } EXPORT_SYMBOL_GPL(sk_clear_memalloc); From 6286e8285078ab02b215bba1d42a05ecfd864515 Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan <_govind@gmx.com> Date: Thu, 11 Jun 2015 11:52:54 +0530 Subject: [PATCH 21/39] enic: unlock napi busy poll before unmasking intr There is a small window between vnic_intr_unmask() and enic_poll_unlock_napi(). In this window if an irq occurs and napi is scheduled on different cpu, it tries to acquire enic_poll_lock_napi() and hits the following WARN_ON message. Fix is to unlock napi_poll before unmasking the interrupt. [ 781.121746] ------------[ cut here ]------------ [ 781.121789] WARNING: CPU: 1 PID: 0 at drivers/net/ethernet/cisco/enic/vnic_rq.h:228 enic_poll_msix_rq+0x36a/0x3c0 [enic]() [ 781.121834] Modules linked in: nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss oid_registry nfsv4 dns_resolver coretemp intel_rapl iosf_mbi x86_pkg_temp_thermal intel_powerclamp kvm_intel kvm crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel mgag200 ttm drm_kms_helper joydev aes_x86_64 lrw drm gf128mul mousedev glue_helper sb_edac ablk_helper iTCO_wdt iTCO_vendor_support evdev ipmi_si syscopyarea sysfillrect sysimgblt i2c_algo_bit i2c_core edac_core lpc_ich mac_hid cryptd pcspkr ipmi_msghandler shpchp tpm_tis acpi_power_meter tpm wmi processor hwmon button ac sch_fq_codel nfs lockd grace sunrpc fscache hid_generic usbhid hid ehci_pci ehci_hcd sd_mod megaraid_sas usbcore scsi_mod usb_common enic crc32c_generic crc32c_intel btrfs xor raid6_pq ext4 crc16 mbcache jbd2 [ 781.122176] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.1.0-rc6-ARCH-00040-gc46a024-dirty #106 [ 781.122210] Hardware name: Cisco Systems Inc UCSB-B200-M4/UCSB-B200-M4, BIOS B200M4.2.2.2.23.061220140128 06/12/2014 [ 781.122252] 0000000000000000 bddbbc9d655ec96e ffff880277e43da8 ffffffff81583fe8 [ 781.122286] 0000000000000000 0000000000000000 ffff880277e43de8 ffffffff8107acfa [ 781.122319] ffff880272c01000 ffff880273f18000 ffff880273f1a100 0000000000000000 [ 781.122352] Call Trace: [ 781.122364] [] dump_stack+0x4f/0x7b [ 781.122399] [] warn_slowpath_common+0x8a/0xc0 [ 781.122425] [] warn_slowpath_null+0x1a/0x20 [ 781.122455] [] enic_poll_msix_rq+0x36a/0x3c0 [enic] [ 781.122487] [] net_rx_action+0x22a/0x370 [ 781.122512] [] __do_softirq+0xed/0x2d0 [ 781.122537] [] irq_exit+0x7e/0xa0 [ 781.122560] [] do_IRQ+0x64/0x100 [ 781.122582] [] common_interrupt+0x6e/0x6e [ 781.122605] [] ? cpu_startup_entry+0x121/0x480 [ 781.122638] [] ? cpu_startup_entry+0xec/0x480 [ 781.122667] [] ? clockevents_register_device+0x113/0x1f0 [ 781.122698] [] start_secondary+0x196/0x1e0 [ 781.122723] ---[ end trace cec2e9dd3af7b9db ]--- Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com> Signed-off-by: David S. Miller --- drivers/net/ethernet/cisco/enic/enic_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 204bd182473bce..0e5a01dd7545eb 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1407,6 +1407,7 @@ static int enic_poll_msix_rq(struct napi_struct *napi, int budget) */ enic_calc_int_moderation(enic, &enic->rq[rq]); + enic_poll_unlock_napi(&enic->rq[rq]); if (work_done < work_to_do) { /* Some work done, but not enough to stay in polling, @@ -1418,7 +1419,6 @@ static int enic_poll_msix_rq(struct napi_struct *napi, int budget) enic_set_int_moderation(enic, &enic->rq[rq]); vnic_intr_unmask(&enic->intr[intr]); } - enic_poll_unlock_napi(&enic->rq[rq]); return work_done; } From 19b596bda1c5400808635fde0d521c1f89a6c1a3 Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan <_govind@gmx.com> Date: Thu, 11 Jun 2015 11:52:55 +0530 Subject: [PATCH 22/39] enic: check return value for stat dump We do not check the return value of enic_dev_stats_dump(). If allocation fails, we will hit NULL pointer reference. Return only if memory allocation fails. For other failures, we return the previously recorded values. Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com> Signed-off-by: David S. Miller --- .../net/ethernet/cisco/enic/enic_ethtool.c | 20 ++++++++++++++++--- drivers/net/ethernet/cisco/enic/enic_main.c | 9 ++++++++- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c index 28d9ca675a274f..68d47b196daec3 100644 --- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c +++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c @@ -131,8 +131,15 @@ static void enic_get_drvinfo(struct net_device *netdev, { struct enic *enic = netdev_priv(netdev); struct vnic_devcmd_fw_info *fw_info; + int err; - enic_dev_fw_info(enic, &fw_info); + err = enic_dev_fw_info(enic, &fw_info); + /* return only when pci_zalloc_consistent fails in vnic_dev_fw_info + * For other failures, like devcmd failure, we return previously + * recorded info. + */ + if (err == -ENOMEM) + return; strlcpy(drvinfo->driver, DRV_NAME, sizeof(drvinfo->driver)); strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version)); @@ -181,8 +188,15 @@ static void enic_get_ethtool_stats(struct net_device *netdev, struct enic *enic = netdev_priv(netdev); struct vnic_stats *vstats; unsigned int i; - - enic_dev_stats_dump(enic, &vstats); + int err; + + err = enic_dev_stats_dump(enic, &vstats); + /* return only when pci_zalloc_consistent fails in vnic_dev_stats_dump + * For other failures, like devcmd failure, we return previously + * recorded stats. + */ + if (err == -ENOMEM) + return; for (i = 0; i < enic_n_tx_stats; i++) *(data++) = ((u64 *)&vstats->tx)[enic_tx_stats[i].index]; diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 0e5a01dd7545eb..eadae1b412c652 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -615,8 +615,15 @@ static struct rtnl_link_stats64 *enic_get_stats(struct net_device *netdev, { struct enic *enic = netdev_priv(netdev); struct vnic_stats *stats; + int err; - enic_dev_stats_dump(enic, &stats); + err = enic_dev_stats_dump(enic, &stats); + /* return only when pci_zalloc_consistent fails in vnic_dev_stats_dump + * For other failures, like devcmd failure, we return previously + * recorded stats. + */ + if (err == -ENOMEM) + return net_stats; net_stats->tx_packets = stats->tx.tx_frames_ok; net_stats->tx_bytes = stats->tx.tx_bytes_ok; From 8b13b4e0bc884ba7dc8ee4de3ee915b7d30e7f78 Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan <_govind@gmx.com> Date: Thu, 11 Jun 2015 11:52:56 +0530 Subject: [PATCH 23/39] enic: fix memory leak in rq_clean When incoming packet qualifies for rx_copybreak, we copy the data to newly allocated skb. We do not free/unmap the original buffer. At this point driver assumes this buffer is unallocated. When enic_rq_alloc_buf() is called for buffer allocation, it checks if buf->os_buf is NULL. If its not NULL that means buffer can be re-used. When vnic_rq_clean() is called for freeing all rq buffers, and if the rx_copybreak reused buffer falls outside the used desc, we do not free the buffer. The following trace is observer when dma-debug is enabled. Fix is to walk through complete ring and clean if buffer is present. [ 40.555386] ------------[ cut here ]------------ [ 40.555396] WARNING: CPU: 0 PID: 491 at lib/dma-debug.c:971 dma_debug_device_change+0x188/0x1f0() [ 40.555400] pci 0000:06:00.0: DMA-API: device driver has pending DMA allocations while released from device [count=4] One of leaked entries details: [device address=0x00000000ff4cc040] [size=9018 bytes] [mapped with DMA_FROM_DEVICE] [mapped as single] [ 40.555402] Modules linked in: nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss oid_registry nfsv4 dns_resolver coretemp intel_rapl iosf_mbi x86_pkg_temp_thermal intel_powerclamp kvm_intel kvm crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel aes_x86_64 lrw joydev mousedev gf128mul hid_generic glue_helper mgag200 usbhid ttm hid drm_kms_helper drm ablk_helper syscopyarea sysfillrect sysimgblt i2c_algo_bit i2c_core iTCO_wdt cryptd mac_hid evdev pcspkr sb_edac edac_core tpm_tis iTCO_vendor_support ipmi_si wmi tpm ipmi_msghandler shpchp lpc_ich processor acpi_power_meter hwmon button ac sch_fq_codel nfs lockd grace sunrpc fscache sd_mod ehci_pci ehci_hcd megaraid_sas usbcore scsi_mod usb_common enic(-) crc32c_generic crc32c_intel btrfs xor raid6_pq ext4 crc16 mbcache jbd2 [ 40.555467] CPU: 0 PID: 491 Comm: rmmod Not tainted 4.1.0-rc7-ARCH-01305-gf59b71f #118 [ 40.555469] Hardware name: Cisco Systems Inc UCSB-B200-M4/UCSB-B200-M4, BIOS B200M4.2.2.2.23.061220140128 06/12/2014 [ 40.555471] 0000000000000000 00000000e2f8a5b7 ffff880275f8bc48 ffffffff8158d6f0 [ 40.555474] 0000000000000000 ffff880275f8bca0 ffff880275f8bc88 ffffffff8107b04a [ 40.555477] ffff8802734e0000 0000000000000004 ffff8804763fb3c0 ffff88027600b650 [ 40.555480] Call Trace: [ 40.555488] [] dump_stack+0x4f/0x7b [ 40.555492] [] warn_slowpath_common+0x8a/0xc0 [ 40.555494] [] warn_slowpath_fmt+0x55/0x70 [ 40.555498] [] dma_debug_device_change+0x188/0x1f0 [ 40.555503] [] notifier_call_chain+0x4f/0x80 [ 40.555506] [] __blocking_notifier_call_chain+0x4b/0x70 [ 40.555510] [] blocking_notifier_call_chain+0x16/0x20 [ 40.555514] [] __device_release_driver+0xf6/0x120 [ 40.555518] [] driver_detach+0xc8/0xd0 [ 40.555523] [] bus_remove_driver+0x59/0xe0 [ 40.555527] [] driver_unregister+0x30/0x70 [ 40.555534] [] pci_unregister_driver+0x2d/0xa0 [ 40.555542] [] enic_cleanup_module+0x10/0x14e [enic] [ 40.555547] [] SyS_delete_module+0x1cf/0x280 [ 40.555551] [] ? ____fput+0xe/0x10 [ 40.555554] [] ? task_work_run+0xbc/0xf0 [ 40.555558] [] system_call_fastpath+0x12/0x71 [ 40.555561] ---[ end trace 4988cadc77c2b236 ]--- [ 40.555562] Mapped at: [ 40.555563] [] debug_dma_map_page+0x95/0x150 [ 40.555566] [] enic_rq_alloc_buf+0x1b8/0x360 [enic] [ 40.555570] [] enic_open+0xf8/0x820 [enic] [ 40.555574] [] __dev_open+0xce/0x150 [ 40.555579] [] __dev_change_flags+0xa1/0x170 Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com> Signed-off-by: David S. Miller --- drivers/net/ethernet/cisco/enic/vnic_rq.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/vnic_rq.c b/drivers/net/ethernet/cisco/enic/vnic_rq.c index 36a2ed606c911f..c4b2183bf352fb 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_rq.c +++ b/drivers/net/ethernet/cisco/enic/vnic_rq.c @@ -188,16 +188,15 @@ void vnic_rq_clean(struct vnic_rq *rq, struct vnic_rq_buf *buf; u32 fetch_index; unsigned int count = rq->ring.desc_count; + int i; buf = rq->to_clean; - while (vnic_rq_desc_used(rq) > 0) { - + for (i = 0; i < rq->ring.desc_count; i++) { (*buf_clean)(rq, buf); - - buf = rq->to_clean = buf->next; - rq->ring.desc_avail++; + buf = buf->next; } + rq->ring.desc_avail = rq->ring.desc_count - 1; /* Use current fetch_index as the ring starting point */ fetch_index = ioread32(&rq->ctrl->fetch_index); From 108029323910c5dd1ef8fa2d10da1ce5fbce6e12 Mon Sep 17 00:00:00 2001 From: Wang Long Date: Wed, 10 Jun 2015 08:12:37 +0000 Subject: [PATCH 24/39] ring-buffer-benchmark: Fix the wrong sched_priority of producer The producer should be used producer_fifo as its sched_priority, so correct it. Link: http://lkml.kernel.org/r/1433923957-67842-1-git-send-email-long.wanglong@huawei.com Cc: stable@vger.kernel.org # 2.6.33+ Signed-off-by: Wang Long Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer_benchmark.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index 13d945c0d03f2b..1b28df2d91042d 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -450,7 +450,7 @@ static int __init ring_buffer_benchmark_init(void) if (producer_fifo >= 0) { struct sched_param param = { - .sched_priority = consumer_fifo + .sched_priority = producer_fifo }; sched_setscheduler(producer, SCHED_FIFO, ¶m); } else From 6dfd197283bffc23a2b046a7f065588de7e1fc1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Date: Fri, 5 Jun 2015 13:33:57 -0400 Subject: [PATCH 25/39] drm/radeon: fix freeze for laptop with Turks/Thames GPU. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Laptop with Turks/Thames GPU will freeze if dpm is enabled. It seems the SMC engine is relying on some state inside the CP engine. CP needs to chew at least one packet for it to get in good state for dynamic power management. This patch simply disabled and re-enable DPM after the ring test which is enough to avoid the freeze. Signed-off-by: Jérôme Glisse Cc: stable@vger.kernel.org Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_device.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index b7ca4c51462120..a7fdfa4f0857b3 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1463,6 +1463,21 @@ int radeon_device_init(struct radeon_device *rdev, if (r) DRM_ERROR("ib ring test failed (%d).\n", r); + /* + * Turks/Thames GPU will freeze whole laptop if DPM is not restarted + * after the CP ring have chew one packet at least. Hence here we stop + * and restart DPM after the radeon_ib_ring_tests(). + */ + if (rdev->pm.dpm_enabled && + (rdev->pm.pm_method == PM_METHOD_DPM) && + (rdev->family == CHIP_TURKS) && + (rdev->flags & RADEON_IS_MOBILITY)) { + mutex_lock(&rdev->pm.mutex); + radeon_dpm_disable(rdev); + radeon_dpm_enable(rdev); + mutex_unlock(&rdev->pm.mutex); + } + if ((radeon_testing & 1)) { if (rdev->accel_working) radeon_test_moves(rdev); From 6fb3c025fee16f11ebd73f84f5aba1ee9ce7f8c6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 10 Jun 2015 01:29:14 -0400 Subject: [PATCH 26/39] Revert "drm/radeon: don't share plls if monitors differ in audio support" This reverts commit a10f0df0615abb194968fc08147f3cdd70fd5aa5. Fixes some systems at the expense of others. Need to properly fix the pll divider selection. bug: https://bugzilla.kernel.org/show_bug.cgi?id=99651 Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/atombios_crtc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index e597ffc265633e..42b2ea3fdcf358 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -1798,9 +1798,7 @@ static int radeon_get_shared_nondp_ppll(struct drm_crtc *crtc) if ((crtc->mode.clock == test_crtc->mode.clock) && (adjusted_clock == test_adjusted_clock) && (radeon_crtc->ss_enabled == test_radeon_crtc->ss_enabled) && - (test_radeon_crtc->pll_id != ATOM_PPLL_INVALID) && - (drm_detect_monitor_audio(radeon_connector_edid(test_radeon_crtc->connector)) == - drm_detect_monitor_audio(radeon_connector_edid(radeon_crtc->connector)))) + (test_radeon_crtc->pll_id != ATOM_PPLL_INVALID)) return test_radeon_crtc->pll_id; } } From ebb9bf18636926d5da97136c22e882c5d91fda73 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 10 Jun 2015 01:30:54 -0400 Subject: [PATCH 27/39] Revert "drm/radeon: adjust pll when audio is not enabled" This reverts commit 7fe04d6fa824ccea704535a597dc417c8687f990. Fixes some systems at the expense of others. Need to properly fix the pll divider selection. bug: https://bugzilla.kernel.org/show_bug.cgi?id=99651 Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/atombios_crtc.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index 42b2ea3fdcf358..dac78ad24b3155 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -580,9 +580,6 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc, else radeon_crtc->pll_flags |= RADEON_PLL_PREFER_LOW_REF_DIV; - /* if there is no audio, set MINM_OVER_MAXP */ - if (!drm_detect_monitor_audio(radeon_connector_edid(connector))) - radeon_crtc->pll_flags |= RADEON_PLL_PREFER_MINM_OVER_MAXP; if (rdev->family < CHIP_RV770) radeon_crtc->pll_flags |= RADEON_PLL_PREFER_MINM_OVER_MAXP; /* use frac fb div on APUs */ From ee18e599251ed06bf0c8ade7c434a0de311342ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Thu, 11 Jun 2015 18:38:38 +0900 Subject: [PATCH 28/39] drm/radeon: Make sure radeon_vm_bo_set_addr always unreserves the BO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some error paths didn't unreserve the BO. This resulted in a deadlock down the road on the next attempt to reserve the (still reserved) BO. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90873 Cc: stable@vger.kernel.org Reviewed-by: Christian König Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_vm.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index de42fc4a22b869..9c3377ca17b75e 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -458,14 +458,16 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, /* make sure object fit at this offset */ eoffset = soffset + size; if (soffset >= eoffset) { - return -EINVAL; + r = -EINVAL; + goto error_unreserve; } last_pfn = eoffset / RADEON_GPU_PAGE_SIZE; if (last_pfn > rdev->vm_manager.max_pfn) { dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n", last_pfn, rdev->vm_manager.max_pfn); - return -EINVAL; + r = -EINVAL; + goto error_unreserve; } } else { @@ -486,7 +488,8 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, "(bo %p 0x%010lx 0x%010lx)\n", bo_va->bo, soffset, tmp->bo, tmp->it.start, tmp->it.last); mutex_unlock(&vm->mutex); - return -EINVAL; + r = -EINVAL; + goto error_unreserve; } } @@ -497,7 +500,8 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); if (!tmp) { mutex_unlock(&vm->mutex); - return -ENOMEM; + r = -ENOMEM; + goto error_unreserve; } tmp->it.start = bo_va->it.start; tmp->it.last = bo_va->it.last; @@ -555,7 +559,6 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, r = radeon_vm_clear_bo(rdev, pt); if (r) { radeon_bo_unref(&pt); - radeon_bo_reserve(bo_va->bo, false); return r; } @@ -575,6 +578,10 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, mutex_unlock(&vm->mutex); return 0; + +error_unreserve: + radeon_bo_unreserve(bo_va->bo); + return r; } /** From 4d66e5e9b6d720d8463e11d027bd4ad91c8b1318 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 10 Jun 2015 23:47:14 -0400 Subject: [PATCH 29/39] block: fix ext_dev_lock lockdep report ================================= [ INFO: inconsistent lock state ] 4.1.0-rc7+ #217 Tainted: G O --------------------------------- inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. swapper/6/0 [HC0[0]:SC1[1]:HE1:SE0] takes: (ext_devt_lock){+.?...}, at: [] blk_free_devt+0x3c/0x70 {SOFTIRQ-ON-W} state was registered at: [] __lock_acquire+0x461/0x1e70 [] lock_acquire+0xb7/0x290 [] _raw_spin_lock+0x38/0x50 [] blk_alloc_devt+0x6d/0xd0 <-- take the lock in process context [..] [] __lock_acquire+0x3fe/0x1e70 [] ? __lock_acquire+0xe5d/0x1e70 [] lock_acquire+0xb7/0x290 [] ? blk_free_devt+0x3c/0x70 [] _raw_spin_lock+0x38/0x50 [] ? blk_free_devt+0x3c/0x70 [] blk_free_devt+0x3c/0x70 <-- take the lock in softirq [] part_release+0x1c/0x50 [] device_release+0x36/0xb0 [] kobject_cleanup+0x7b/0x1a0 [] kobject_put+0x30/0x70 [] put_device+0x17/0x20 [] delete_partition_rcu_cb+0x16c/0x180 [] ? read_dev_sector+0xa0/0xa0 [] rcu_process_callbacks+0x2ff/0xa90 [] ? rcu_process_callbacks+0x2bf/0xa90 [] __do_softirq+0xde/0x600 Neil sees this in his tests and it also triggers on pmem driver unbind for the libnvdimm tests. This fix is on top of an initial fix by Keith for incorrect usage of mutex_lock() in this path: 2da78092dda1 "block: Fix dev_t minor allocation lifetime". Both this and 2da78092dda1 are candidates for -stable. Fixes: 2da78092dda1 ("block: Fix dev_t minor allocation lifetime") Cc: Cc: Keith Busch Reported-by: NeilBrown Signed-off-by: Dan Williams Signed-off-by: Jens Axboe --- block/genhd.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index 666e11b8398378..ea982eadaf6380 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -422,9 +422,9 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt) /* allocate ext devt */ idr_preload(GFP_KERNEL); - spin_lock(&ext_devt_lock); + spin_lock_bh(&ext_devt_lock); idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_NOWAIT); - spin_unlock(&ext_devt_lock); + spin_unlock_bh(&ext_devt_lock); idr_preload_end(); if (idx < 0) @@ -449,9 +449,9 @@ void blk_free_devt(dev_t devt) return; if (MAJOR(devt) == BLOCK_EXT_MAJOR) { - spin_lock(&ext_devt_lock); + spin_lock_bh(&ext_devt_lock); idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); - spin_unlock(&ext_devt_lock); + spin_unlock_bh(&ext_devt_lock); } } @@ -690,13 +690,13 @@ struct gendisk *get_gendisk(dev_t devt, int *partno) } else { struct hd_struct *part; - spin_lock(&ext_devt_lock); + spin_lock_bh(&ext_devt_lock); part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); if (part && get_disk(part_to_disk(part))) { *partno = part->partno; disk = part_to_disk(part); } - spin_unlock(&ext_devt_lock); + spin_unlock_bh(&ext_devt_lock); } return disk; From b6f2098fb708ce935a59763178c4e8cd942fcf88 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Mon, 4 May 2015 20:58:57 +0200 Subject: [PATCH 30/39] block: pmem: Add dependency on HAS_IOMEM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not all architectures have io memory. Fixes: drivers/block/pmem.c: In function ‘pmem_alloc’: drivers/block/pmem.c:146:2: error: implicit declaration of function ‘ioremap_nocache’ [-Werror=implicit-function-declaration] pmem->virt_addr = ioremap_nocache(pmem->phys_addr, pmem->size); ^ drivers/block/pmem.c:146:18: warning: assignment makes pointer from integer without a cast [enabled by default] pmem->virt_addr = ioremap_nocache(pmem->phys_addr, pmem->size); ^ drivers/block/pmem.c:182:2: error: implicit declaration of function ‘iounmap’ [-Werror=implicit-function-declaration] iounmap(pmem->virt_addr); ^ Signed-off-by: Richard Weinberger Reviewed-by: Ross Zwisler Signed-off-by: Jens Axboe --- drivers/block/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index eb1fed5bd516ff..3ccef9eba6f9dc 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -406,6 +406,7 @@ config BLK_DEV_RAM_DAX config BLK_DEV_PMEM tristate "Persistent memory block device support" + depends on HAS_IOMEM help Saying Y here will allow you to use a contiguous range of reserved memory as one or more persistent block devices. From 58c98be137830d34b79024cc5dc95ef54fcd7ffe Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Thu, 11 Jun 2015 14:51:30 +0200 Subject: [PATCH 31/39] net: igb: fix the start time for periodic output signals When programming the start of a periodic output, the code wrongly places the seconds value into the "low" register and the nanoseconds into the "high" register. Even though this is backwards, it slipped through my testing, because the re-arming code in the interrupt service routine is correct, and the signal does appear starting with the second edge. This patch fixes the issue by programming the registers correctly. Signed-off-by: Richard Cochran Reviewed-by: Jacob Keller Acked-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/igb/igb_ptp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index e3b9b63ad01083..c3a9392cbc1922 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -538,8 +538,8 @@ static int igb_ptp_feature_enable_i210(struct ptp_clock_info *ptp, igb->perout[i].start.tv_nsec = rq->perout.start.nsec; igb->perout[i].period.tv_sec = ts.tv_sec; igb->perout[i].period.tv_nsec = ts.tv_nsec; - wr32(trgttiml, rq->perout.start.sec); - wr32(trgttimh, rq->perout.start.nsec); + wr32(trgttimh, rq->perout.start.sec); + wr32(trgttiml, rq->perout.start.nsec); tsauxc |= tsauxc_mask; tsim |= tsim_mask; } else { From 0fae3bf018d97b210051c8797a49d66d31071847 Mon Sep 17 00:00:00 2001 From: Robert Shearman Date: Thu, 11 Jun 2015 19:58:26 +0100 Subject: [PATCH 32/39] mpls: handle device renames for per-device sysctls If a device is renamed and the original name is subsequently reused for a new device, the following warning is generated: sysctl duplicate entry: /net/mpls/conf/veth0//input CPU: 3 PID: 1379 Comm: ip Not tainted 4.1.0-rc4+ #20 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140531_083030-gandalf 04/01/2014 0000000000000000 0000000000000000 ffffffff81566aaf 0000000000000000 ffffffff81236279 ffff88002f7d7f00 0000000000000000 ffff88000db336d8 ffff88000db33698 0000000000000005 ffff88002e046000 ffff8800168c9280 Call Trace: [] ? dump_stack+0x40/0x50 [] ? __register_sysctl_table+0x289/0x5a0 [] ? mpls_dev_notify+0x1ff/0x300 [mpls_router] [] ? notifier_call_chain+0x4f/0x70 [] ? register_netdevice+0x2b2/0x480 [] ? veth_newlink+0x178/0x2d3 [veth] [] ? rtnl_newlink+0x73c/0x8e0 [] ? rtnl_newlink+0x16a/0x8e0 [] ? __kmalloc_reserve.isra.30+0x32/0x90 [] ? rtnetlink_rcv_msg+0x8d/0x250 [] ? __alloc_skb+0x47/0x1f0 [] ? __netlink_lookup+0xab/0xe0 [] ? rtnetlink_rcv+0x30/0x30 [] ? netlink_rcv_skb+0xb0/0xd0 [] ? rtnetlink_rcv+0x24/0x30 [] ? netlink_unicast+0x107/0x1a0 [] ? netlink_sendmsg+0x50e/0x630 [] ? sock_sendmsg+0x3c/0x50 [] ? ___sys_sendmsg+0x27b/0x290 [] ? mem_cgroup_try_charge+0x88/0x110 [] ? mem_cgroup_commit_charge+0x56/0xa0 [] ? do_filp_open+0x30/0xa0 [] ? __sys_sendmsg+0x3e/0x80 [] ? system_call_fastpath+0x16/0x75 Fix this by unregistering the previous sysctl table (registered for the path containing the original device name) and re-registering the table for the path containing the new device name. Fixes: 37bde79979c3 ("mpls: Per-device enabling of packet input") Reported-by: Scott Feldman Signed-off-by: Robert Shearman Signed-off-by: David S. Miller --- net/mpls/af_mpls.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index bff427f3192470..1f93a5978f2ad4 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -564,6 +564,17 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, case NETDEV_UNREGISTER: mpls_ifdown(dev); break; + case NETDEV_CHANGENAME: + mdev = mpls_dev_get(dev); + if (mdev) { + int err; + + mpls_dev_sysctl_unregister(mdev); + err = mpls_dev_sysctl_register(dev, mdev); + if (err) + return notifier_from_errno(err); + } + break; } return NOTIFY_OK; } From fb05e7a89f500cfc06ae277bdc911b281928995d Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 11 Jun 2015 16:50:48 -0700 Subject: [PATCH 33/39] net: don't wait for order-3 page allocation We saw excessive direct memory compaction triggered by skb_page_frag_refill. This causes performance issues and add latency. Commit 5640f7685831e0 introduces the order-3 allocation. According to the changelog, the order-3 allocation isn't a must-have but to improve performance. But direct memory compaction has high overhead. The benefit of order-3 allocation can't compensate the overhead of direct memory compaction. This patch makes the order-3 page allocation atomic. If there is no memory pressure and memory isn't fragmented, the alloction will still success, so we don't sacrifice the order-3 benefit here. If the atomic allocation fails, direct memory compaction will not be triggered, skb_page_frag_refill will fallback to order-0 immediately, hence the direct memory compaction overhead is avoided. In the allocation failure case, kswapd is waken up and doing compaction, so chances are allocation could success next time. alloc_skb_with_frags is the same. The mellanox driver does similar thing, if this is accepted, we must fix the driver too. V3: fix the same issue in alloc_skb_with_frags as pointed out by Eric V2: make the changelog clearer Cc: Eric Dumazet Cc: Chris Mason Cc: Debabrata Banerjee Signed-off-by: Shaohua Li Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 +- net/core/sock.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3cfff2a3d651fb..41ec02242ea7c2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4398,7 +4398,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len, while (order) { if (npages >= 1 << order) { - page = alloc_pages(gfp_mask | + page = alloc_pages((gfp_mask & ~__GFP_WAIT) | __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY, diff --git a/net/core/sock.c b/net/core/sock.c index 469d6039c7f5dc..dc30dc5bb1b892 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1880,7 +1880,7 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp) pfrag->offset = 0; if (SKB_FRAG_PAGE_ORDER) { - pfrag->page = alloc_pages(gfp | __GFP_COMP | + pfrag->page = alloc_pages((gfp & ~__GFP_WAIT) | __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY, SKB_FRAG_PAGE_ORDER); if (likely(pfrag->page)) { From c008f1d356277a5b7561040596a073d87e56b0c8 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 12 Jun 2015 19:46:44 +1000 Subject: [PATCH 34/39] md: don't return 0 from array_state_store Returning zero from a 'store' function is bad. The return value should be either len length of the string or an error. So use 'len' if 'err' is zero. Fixes: 6791875e2e53 ("md: make reconfig_mutex optional for writes to md sysfs files.") Signed-off-by: NeilBrown Cc: stable@vger.kernel (v4.0+) --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 27506302eb7aa4..dd59d71ade2f52 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3834,7 +3834,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len) err = -EBUSY; } spin_unlock(&mddev->lock); - return err; + return err ?: len; } err = mddev_lock(mddev); if (err) From 8e8e2518fceca407bb8fc2a6710d19d2e217892e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 12 Jun 2015 19:51:27 +1000 Subject: [PATCH 35/39] md: Close race when setting 'action' to 'idle'. Checking ->sync_thread without holding the mddev_lock() isn't really safe, even after flushing the workqueue which ensures md_start_sync() has been run. While this code is waiting for the lock, md_check_recovery could reap the thread itself, and then start another thread (e.g. recovery might finish, then reshape starts). When this thread gets the lock md_start_sync() hasn't run so it doesn't get reaped, but MD_RECOVERY_RUNNING gets cleared. This allows two threads to start which leads to confusion. So don't both if MD_RECOVERY_RUNNING isn't set, but if it is do the flush and the test and the reap all under the mddev_lock to avoid any race with md_check_recovery. Signed-off-by: NeilBrown Fixes: 6791875e2e53 ("md: make reconfig_mutex optional for writes to md sysfs files.") Cc: stable@vger.kernel.org (v4.0+) --- drivers/md/md.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index dd59d71ade2f52..8d4408baa42823 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4217,13 +4217,14 @@ action_store(struct mddev *mddev, const char *page, size_t len) set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); else clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); - flush_workqueue(md_misc_wq); - if (mddev->sync_thread) { - set_bit(MD_RECOVERY_INTR, &mddev->recovery); - if (mddev_lock(mddev) == 0) { + if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) && + mddev_lock(mddev) == 0) { + flush_workqueue(md_misc_wq); + if (mddev->sync_thread) { + set_bit(MD_RECOVERY_INTR, &mddev->recovery); md_reap_sync_thread(mddev); - mddev_unlock(mddev); } + mddev_unlock(mddev); } } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) From ea358cd0d2c634ff1379a1392edcdf2289f31e13 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 12 Jun 2015 20:05:04 +1000 Subject: [PATCH 36/39] md: make sure MD_RECOVERY_DONE is clear before starting recovery/resync MD_RECOVERY_DONE is normally cleared by md_check_recovery after a resync etc finished. However it is possible for raid5_start_reshape to race and start a reshape before MD_RECOVERY_DONE is cleared. This can lean to multiple reshapes running at the same time, which isn't good. To make sure it is cleared before starting a reshape, and also clear it when reaping a thread, just to be safe. Signed-off-by: NeilBrown --- drivers/md/md.c | 1 + drivers/md/raid10.c | 1 + drivers/md/raid5.c | 1 + 3 files changed, 3 insertions(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index 8d4408baa42823..4dbed4a67aaf40 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8262,6 +8262,7 @@ void md_reap_sync_thread(struct mddev *mddev) if (mddev_is_clustered(mddev)) md_cluster_ops->metadata_update_finish(mddev); clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery); + clear_bit(MD_RECOVERY_DONE, &mddev->recovery); clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index e793ab6b35705e..f55c3f35b74631 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -4156,6 +4156,7 @@ static int raid10_start_reshape(struct mddev *mddev) clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); + clear_bit(MD_RECOVERY_DONE, &mddev->recovery); set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 553d54b870528f..b6793d2e051f3b 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7354,6 +7354,7 @@ static int raid5_start_reshape(struct mddev *mddev) clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); + clear_bit(MD_RECOVERY_DONE, &mddev->recovery); set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); mddev->sync_thread = md_register_thread(md_do_sync, mddev, From c83b2f20fdde578bded3dfc4405c5db7a039c694 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 12 Jun 2015 10:15:49 +0100 Subject: [PATCH 37/39] iommu/vt-d: Only enable extended context tables if PASID is supported Although the extended tables are theoretically a completely orthogonal feature to PASID and anything else that *uses* the newly-available bits, some of the early hardware has problems even when all we do is enable them and use only the same bits that were in the old context tables. For now, there's no motivation to support extended tables unless we're going to use PASID support to do SVM. So just don't use them unless PASID support is advertised too. Also add a command-line bailout just in case later chips also have issues. The equivalent problem for PASID support has already been fixed with the upcoming VT-d spec update and commit bd00c606a ("iommu/vt-d: Change PASID support to bit 40 of Extended Capability Register"), because the problematic platforms use the old definition of the PASID-capable bit, which is now marked as reserved and meaningless. So with this change, we'll magically start using ECS again only when we see the new hardware advertising "hey, we have PASID support and we actually tested it this time" on bit 40. The VT-d hardware architect has promised that we are not going to have any reason to support ECS *without* PASID any time soon, and he'll make sure he checks with us before changing that. In the future, if hypothetical new features also use new bits in the context tables and can be seen on implementations *without* PASID support, we might need to add their feature bits to the ecs_enabled() macro. Signed-off-by: David Woodhouse --- Documentation/kernel-parameters.txt | 6 ++++++ drivers/iommu/intel-iommu.c | 18 +++++++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index f6befa9855c128..491bb15620fc57 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1481,6 +1481,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. By default, super page will be supported if Intel IOMMU has the capability. With this option, super page will not be supported. + ecs_off [Default Off] + By default, extended context tables will be supported if + the hardware advertises that it has support both for the + extended tables themselves, and also PASID support. With + this option set, extended tables will not be used even + on hardware which claims to support them. intel_idle.max_cstate= [KNL,HW,ACPI,X86] 0 disables intel_idle and fall back on acpi_idle. diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 2ffe5896994485..5ecfaf29933ad4 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -422,6 +422,14 @@ static int dmar_map_gfx = 1; static int dmar_forcedac; static int intel_iommu_strict; static int intel_iommu_superpage = 1; +static int intel_iommu_ecs = 1; + +/* We only actually use ECS when PASID support (on the new bit 40) + * is also advertised. Some early implementations — the ones with + * PASID support on bit 28 — have issues even when we *only* use + * extended root/context tables. */ +#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \ + ecap_pasid(iommu->ecap)) int intel_iommu_gfx_mapped; EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); @@ -465,6 +473,10 @@ static int __init intel_iommu_setup(char *str) printk(KERN_INFO "Intel-IOMMU: disable supported super page\n"); intel_iommu_superpage = 0; + } else if (!strncmp(str, "ecs_off", 7)) { + printk(KERN_INFO + "Intel-IOMMU: disable extended context table support\n"); + intel_iommu_ecs = 0; } str += strcspn(str, ","); @@ -669,7 +681,7 @@ static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu struct context_entry *context; u64 *entry; - if (ecap_ecs(iommu->ecap)) { + if (ecs_enabled(iommu)) { if (devfn >= 0x80) { devfn -= 0x80; entry = &root->hi; @@ -806,7 +818,7 @@ static void free_context_table(struct intel_iommu *iommu) if (context) free_pgtable_page(context); - if (!ecap_ecs(iommu->ecap)) + if (!ecs_enabled(iommu)) continue; context = iommu_context_addr(iommu, i, 0x80, 0); @@ -1141,7 +1153,7 @@ static void iommu_set_root_entry(struct intel_iommu *iommu) unsigned long flag; addr = virt_to_phys(iommu->root_entry); - if (ecap_ecs(iommu->ecap)) + if (ecs_enabled(iommu)) addr |= DMA_RTADDR_RTT; raw_spin_lock_irqsave(&iommu->register_lock, flag); From ae36806a622aea5ac79f279cfccc82144967b6e7 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Thu, 11 Jun 2015 14:49:46 -0300 Subject: [PATCH 38/39] sctp: allow authenticating DATA chunks that are bundled with COOKIE_ECHO Currently, we can ask to authenticate DATA chunks and we can send DATA chunks on the same packet as COOKIE_ECHO, but if you try to combine both, the DATA chunk will be sent unauthenticated and peer won't accept it, leading to a communication failure. This happens because even though the data was queued after it was requested to authenticate DATA chunks, it was also queued before we could know that remote peer can handle authenticating, so sctp_auth_send_cid() returns false. The fix is whenever we set up an active key, re-check send queue for chunks that now should be authenticated. As a result, such packet will now contain COOKIE_ECHO + AUTH + DATA chunks, in that order. Reported-by: Liu Wei Signed-off-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/auth.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/sctp/auth.c b/net/sctp/auth.c index fb7976aee61c84..4f15b7d730e13d 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -381,13 +381,14 @@ int sctp_auth_asoc_copy_shkeys(const struct sctp_endpoint *ep, } -/* Public interface to creat the association shared key. +/* Public interface to create the association shared key. * See code above for the algorithm. */ int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp) { struct sctp_auth_bytes *secret; struct sctp_shared_key *ep_key; + struct sctp_chunk *chunk; /* If we don't support AUTH, or peer is not capable * we don't need to do anything. @@ -410,6 +411,14 @@ int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp) sctp_auth_key_put(asoc->asoc_shared_key); asoc->asoc_shared_key = secret; + /* Update send queue in case any chunk already in there now + * needs authenticating + */ + list_for_each_entry(chunk, &asoc->outqueue.out_chunk_list, list) { + if (sctp_auth_send_cid(chunk->chunk_hdr->type, asoc)) + chunk->auth = 1; + } + return 0; } From b07d496177cd3bc4b70fb8a5e85ede24cb403a11 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Sat, 13 Jun 2015 00:23:21 +0900 Subject: [PATCH 39/39] Doc: networking: Fix URL for wiki.wireshark.org in udplite.txt This patch fix URL (http to https) for wiki.wireshark.org. Signed-off-by: Masanari Iida Signed-off-by: David S. Miller --- Documentation/networking/udplite.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/udplite.txt b/Documentation/networking/udplite.txt index d727a38291005f..53a726855e49bf 100644 --- a/Documentation/networking/udplite.txt +++ b/Documentation/networking/udplite.txt @@ -20,7 +20,7 @@ files/UDP-Lite-HOWTO.txt o The Wireshark UDP-Lite WiKi (with capture files): - http://wiki.wireshark.org/Lightweight_User_Datagram_Protocol + https://wiki.wireshark.org/Lightweight_User_Datagram_Protocol o The Protocol Spec, RFC 3828, http://www.ietf.org/rfc/rfc3828.txt