Skip to content

Commit

Permalink
mm: move pcp and lru-pcp draining into single wq
Browse files Browse the repository at this point in the history
We currently have 2 specific WQ_RECLAIM workqueues in the mm code.
vmstat_wq for updating pcp stats and lru_add_drain_wq dedicated to drain
per cpu lru caches.  This seems more than necessary because both can run
on a single WQ.  Both do not block on locks requiring a memory
allocation nor perform any allocations themselves.  We will save one
rescuer thread this way.

On the other hand drain_all_pages() queues work on the system wq which
doesn't have rescuer and so this depend on memory allocation (when all
workers are stuck allocating and new ones cannot be created).

Initially we thought this would be more of a theoretical problem but
Hugh Dickins has reported:

: 4.11-rc has been giving me hangs after hours of swapping load.  At
: first they looked like memory leaks ("fork: Cannot allocate memory");
: but for no good reason I happened to do "cat /proc/sys/vm/stat_refresh"
: before looking at /proc/meminfo one time, and the stat_refresh stuck
: in D state, waiting for completion of flush_work like many kworkers.
: kthreadd waiting for completion of flush_work in drain_all_pages().

This worker should be using WQ_RECLAIM as well in order to guarantee a
forward progress.  We can reuse the same one as for lru draining and
vmstat.

Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Michal Hocko <[email protected]>
Suggested-by: Tetsuo Handa <[email protected]>
Acked-by: Vlastimil Babka <[email protected]>
Acked-by: Mel Gorman <[email protected]>
Tested-by: Yang Li <[email protected]>
Tested-by: Hugh Dickins <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
Michal Hocko authored and torvalds committed Apr 8, 2017
1 parent cdcf433 commit ce61287
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 26 deletions.
7 changes: 7 additions & 0 deletions mm/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,13 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
enum ttu_flags;
struct tlbflush_unmap_batch;


/*
* only for MM internal work items which do not depend on
* any allocations or locks which might depend on allocations
*/
extern struct workqueue_struct *mm_percpu_wq;

#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
void try_to_unmap_flush(void);
void try_to_unmap_flush_dirty(void);
Expand Down
9 changes: 8 additions & 1 deletion mm/page_alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -2373,6 +2373,13 @@ void drain_all_pages(struct zone *zone)
*/
static cpumask_t cpus_with_pcps;

/*
* Make sure nobody triggers this path before mm_percpu_wq is fully
* initialized.
*/
if (WARN_ON_ONCE(!mm_percpu_wq))
return;

/* Workqueues cannot recurse */
if (current->flags & PF_WQ_WORKER)
return;
Expand Down Expand Up @@ -2422,7 +2429,7 @@ void drain_all_pages(struct zone *zone)
for_each_cpu(cpu, &cpus_with_pcps) {
struct work_struct *work = per_cpu_ptr(&pcpu_drain, cpu);
INIT_WORK(work, drain_local_pages_wq);
schedule_work_on(cpu, work);
queue_work_on(cpu, mm_percpu_wq, work);
}
for_each_cpu(cpu, &cpus_with_pcps)
flush_work(per_cpu_ptr(&pcpu_drain, cpu));
Expand Down
27 changes: 8 additions & 19 deletions mm/swap.c
Original file line number Diff line number Diff line change
Expand Up @@ -670,30 +670,19 @@ static void lru_add_drain_per_cpu(struct work_struct *dummy)

static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);

/*
* lru_add_drain_wq is used to do lru_add_drain_all() from a WQ_MEM_RECLAIM
* workqueue, aiding in getting memory freed.
*/
static struct workqueue_struct *lru_add_drain_wq;

static int __init lru_init(void)
{
lru_add_drain_wq = alloc_workqueue("lru-add-drain", WQ_MEM_RECLAIM, 0);

if (WARN(!lru_add_drain_wq,
"Failed to create workqueue lru_add_drain_wq"))
return -ENOMEM;

return 0;
}
early_initcall(lru_init);

void lru_add_drain_all(void)
{
static DEFINE_MUTEX(lock);
static struct cpumask has_work;
int cpu;

/*
* Make sure nobody triggers this path before mm_percpu_wq is fully
* initialized.
*/
if (WARN_ON(!mm_percpu_wq))
return;

mutex_lock(&lock);
get_online_cpus();
cpumask_clear(&has_work);
Expand All @@ -707,7 +696,7 @@ void lru_add_drain_all(void)
pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
need_activate_page_drain(cpu)) {
INIT_WORK(work, lru_add_drain_per_cpu);
queue_work_on(cpu, lru_add_drain_wq, work);
queue_work_on(cpu, mm_percpu_wq, work);
cpumask_set_cpu(cpu, &has_work);
}
}
Expand Down
15 changes: 9 additions & 6 deletions mm/vmstat.c
Original file line number Diff line number Diff line change
Expand Up @@ -1552,7 +1552,6 @@ static const struct file_operations proc_vmstat_file_operations = {
#endif /* CONFIG_PROC_FS */

#ifdef CONFIG_SMP
static struct workqueue_struct *vmstat_wq;
static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
int sysctl_stat_interval __read_mostly = HZ;

Expand Down Expand Up @@ -1623,7 +1622,7 @@ static void vmstat_update(struct work_struct *w)
* to occur in the future. Keep on running the
* update worker thread.
*/
queue_delayed_work_on(smp_processor_id(), vmstat_wq,
queue_delayed_work_on(smp_processor_id(), mm_percpu_wq,
this_cpu_ptr(&vmstat_work),
round_jiffies_relative(sysctl_stat_interval));
}
Expand Down Expand Up @@ -1702,7 +1701,7 @@ static void vmstat_shepherd(struct work_struct *w)
struct delayed_work *dw = &per_cpu(vmstat_work, cpu);

if (!delayed_work_pending(dw) && need_update(cpu))
queue_delayed_work_on(cpu, vmstat_wq, dw, 0);
queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0);
}
put_online_cpus();

Expand All @@ -1718,7 +1717,6 @@ static void __init start_shepherd_timer(void)
INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
vmstat_update);

vmstat_wq = alloc_workqueue("vmstat", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
schedule_delayed_work(&shepherd,
round_jiffies_relative(sysctl_stat_interval));
}
Expand Down Expand Up @@ -1764,11 +1762,16 @@ static int vmstat_cpu_dead(unsigned int cpu)

#endif

struct workqueue_struct *mm_percpu_wq;

void __init init_mm_internals(void)
{
#ifdef CONFIG_SMP
int ret;
int ret __maybe_unused;

mm_percpu_wq = alloc_workqueue("mm_percpu_wq",
WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);

#ifdef CONFIG_SMP
ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead",
NULL, vmstat_cpu_dead);
if (ret < 0)
Expand Down

0 comments on commit ce61287

Please sign in to comment.