Skip to content

Commit

Permalink
pidns: Stop pid allocation when init dies
Browse files Browse the repository at this point in the history
Oleg pointed out that in a pid namespace the sequence.
- pid 1 becomes a zombie
- setns(thepidns), fork,...
- reaping pid 1.
- The injected processes exiting.

Can lead to processes attempting access their child reaper and
instead following a stale pointer.

That waitpid for init can return before all of the processes in
the pid namespace have exited is also unfortunate.

Avoid these problems by disabling the allocation of new pids in a pid
namespace when init dies, instead of when the last process in a pid
namespace is reaped.

Pointed-out-by:  Oleg Nesterov <[email protected]>
Reviewed-by: Oleg Nesterov <[email protected]>
Signed-off-by: "Eric W. Biederman" <[email protected]>
  • Loading branch information
ebiederm committed Dec 26, 2012
1 parent 8382fca commit c876ad7
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 4 deletions.
1 change: 1 addition & 0 deletions include/linux/pid.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ int next_pidmap(struct pid_namespace *pid_ns, unsigned int last);

extern struct pid *alloc_pid(struct pid_namespace *ns);
extern void free_pid(struct pid *pid);
extern void disable_pid_allocation(struct pid_namespace *ns);

/*
* ns_of_pid() returns the pid namespace in which the specified pid was
Expand Down
4 changes: 3 additions & 1 deletion include/linux/pid_namespace.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ struct pid_namespace {
struct kref kref;
struct pidmap pidmap[PIDMAP_ENTRIES];
int last_pid;
int nr_hashed;
unsigned int nr_hashed;
struct task_struct *child_reaper;
struct kmem_cache *pid_cachep;
unsigned int level;
Expand All @@ -42,6 +42,8 @@ struct pid_namespace {

extern struct pid_namespace init_pid_ns;

#define PIDNS_HASH_ADDING (1U << 31)

#ifdef CONFIG_PID_NS
static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
{
Expand Down
15 changes: 12 additions & 3 deletions kernel/pid.c
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,6 @@ void free_pid(struct pid *pid)
wake_up_process(ns->child_reaper);
break;
case 0:
ns->nr_hashed = -1;
schedule_work(&ns->proc_work);
break;
}
Expand Down Expand Up @@ -319,7 +318,7 @@ struct pid *alloc_pid(struct pid_namespace *ns)

upid = pid->numbers + ns->level;
spin_lock_irq(&pidmap_lock);
if (ns->nr_hashed < 0)
if (!(ns->nr_hashed & PIDNS_HASH_ADDING))
goto out_unlock;
for ( ; upid >= pid->numbers; --upid) {
hlist_add_head_rcu(&upid->pid_chain,
Expand All @@ -342,6 +341,13 @@ struct pid *alloc_pid(struct pid_namespace *ns)
goto out;
}

void disable_pid_allocation(struct pid_namespace *ns)
{
spin_lock_irq(&pidmap_lock);
ns->nr_hashed &= ~PIDNS_HASH_ADDING;
spin_unlock_irq(&pidmap_lock);
}

struct pid *find_pid_ns(int nr, struct pid_namespace *ns)
{
struct hlist_node *elem;
Expand Down Expand Up @@ -573,6 +579,9 @@ void __init pidhash_init(void)

void __init pidmap_init(void)
{
/* Veryify no one has done anything silly */
BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_HASH_ADDING);

/* bump default and minimum pid_max based on number of cpus */
pid_max = min(pid_max_max, max_t(int, pid_max,
PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
Expand All @@ -584,7 +593,7 @@ void __init pidmap_init(void)
/* Reserve PID 0. We never call free_pidmap(0) */
set_bit(0, init_pid_ns.pidmap[0].page);
atomic_dec(&init_pid_ns.pidmap[0].nr_free);
init_pid_ns.nr_hashed = 1;
init_pid_ns.nr_hashed = PIDNS_HASH_ADDING;

init_pid_ns.pid_cachep = KMEM_CACHE(pid,
SLAB_HWCACHE_ALIGN | SLAB_PANIC);
Expand Down
4 changes: 4 additions & 0 deletions kernel/pid_namespace.c
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
ns->level = level;
ns->parent = get_pid_ns(parent_pid_ns);
ns->user_ns = get_user_ns(user_ns);
ns->nr_hashed = PIDNS_HASH_ADDING;
INIT_WORK(&ns->proc_work, proc_cleanup_work);

set_bit(0, ns->pidmap[0].page);
Expand Down Expand Up @@ -181,6 +182,9 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
int rc;
struct task_struct *task, *me = current;

/* Don't allow any more processes into the pid namespace */
disable_pid_allocation(pid_ns);

/* Ignore SIGCHLD causing any terminated children to autoreap */
spin_lock_irq(&me->sighand->siglock);
me->sighand->action[SIGCHLD - 1].sa.sa_handler = SIG_IGN;
Expand Down

0 comments on commit c876ad7

Please sign in to comment.