Skip to content

Commit af085d9

Browse files
jpoimboeJiri Kosina
authored and
Jiri Kosina
committed
stacktrace/x86: add function for detecting reliable stack traces
For live patching and possibly other use cases, a stack trace is only useful if it can be assured that it's completely reliable. Add a new save_stack_trace_tsk_reliable() function to achieve that. Note that if the target task isn't the current task, and the target task is allowed to run, then it could be writing the stack while the unwinder is reading it, resulting in possible corruption. So the caller of save_stack_trace_tsk_reliable() must ensure that the task is either 'current' or inactive. save_stack_trace_tsk_reliable() relies on the x86 unwinder's detection of pt_regs on the stack. If the pt_regs are not user-mode registers from a syscall, then they indicate an in-kernel interrupt or exception (e.g. preemption or a page fault), in which case the stack is considered unreliable due to the nature of frame pointers. It also relies on the x86 unwinder's detection of other issues, such as: - corrupted stack data - stack grows the wrong way - stack walk doesn't reach the bottom - user didn't provide a large enough entries array Such issues are reported by checking unwind_error() and !unwind_done(). Also add CONFIG_HAVE_RELIABLE_STACKTRACE so arch-independent code can determine at build time whether the function is implemented. Signed-off-by: Josh Poimboeuf <[email protected]> Reviewed-by: Miroslav Benes <[email protected]> Acked-by: Ingo Molnar <[email protected]> # for the x86 changes Signed-off-by: Jiri Kosina <[email protected]>
1 parent c1ae3cf commit af085d9

File tree

7 files changed

+126
-6
lines changed

7 files changed

+126
-6
lines changed

arch/Kconfig

+6
Original file line numberDiff line numberDiff line change
@@ -713,6 +713,12 @@ config HAVE_STACK_VALIDATION
713713
Architecture supports the 'objtool check' host tool command, which
714714
performs compile-time stack metadata validation.
715715

716+
config HAVE_RELIABLE_STACKTRACE
717+
bool
718+
help
719+
Architecture has a save_stack_trace_tsk_reliable() function which
720+
only returns a stack trace if it can guarantee the trace is reliable.
721+
716722
config HAVE_ARCH_HASH
717723
bool
718724
default n

arch/x86/Kconfig

+1
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ config X86
160160
select HAVE_PERF_REGS
161161
select HAVE_PERF_USER_STACK_DUMP
162162
select HAVE_REGS_AND_STACK_ACCESS_API
163+
select HAVE_RELIABLE_STACKTRACE if X86_64 && FRAME_POINTER && STACK_VALIDATION
163164
select HAVE_STACK_VALIDATION if X86_64
164165
select HAVE_SYSCALL_TRACEPOINTS
165166
select HAVE_UNSTABLE_SCHED_CLOCK

arch/x86/include/asm/unwind.h

+6
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ struct unwind_state {
1111
unsigned long stack_mask;
1212
struct task_struct *task;
1313
int graph_idx;
14+
bool error;
1415
#ifdef CONFIG_FRAME_POINTER
1516
unsigned long *bp, *orig_sp;
1617
struct pt_regs *regs;
@@ -40,6 +41,11 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
4041
__unwind_start(state, task, regs, first_frame);
4142
}
4243

44+
static inline bool unwind_error(struct unwind_state *state)
45+
{
46+
return state->error;
47+
}
48+
4349
#ifdef CONFIG_FRAME_POINTER
4450

4551
static inline

arch/x86/kernel/stacktrace.c

+95-1
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,101 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
7676
}
7777
EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
7878

79+
#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE
80+
81+
#define STACKTRACE_DUMP_ONCE(task) ({ \
82+
static bool __section(.data.unlikely) __dumped; \
83+
\
84+
if (!__dumped) { \
85+
__dumped = true; \
86+
WARN_ON(1); \
87+
show_stack(task, NULL); \
88+
} \
89+
})
90+
91+
static int __save_stack_trace_reliable(struct stack_trace *trace,
92+
struct task_struct *task)
93+
{
94+
struct unwind_state state;
95+
struct pt_regs *regs;
96+
unsigned long addr;
97+
98+
for (unwind_start(&state, task, NULL, NULL); !unwind_done(&state);
99+
unwind_next_frame(&state)) {
100+
101+
regs = unwind_get_entry_regs(&state);
102+
if (regs) {
103+
/*
104+
* Kernel mode registers on the stack indicate an
105+
* in-kernel interrupt or exception (e.g., preemption
106+
* or a page fault), which can make frame pointers
107+
* unreliable.
108+
*/
109+
if (!user_mode(regs))
110+
return -EINVAL;
111+
112+
/*
113+
* The last frame contains the user mode syscall
114+
* pt_regs. Skip it and finish the unwind.
115+
*/
116+
unwind_next_frame(&state);
117+
if (!unwind_done(&state)) {
118+
STACKTRACE_DUMP_ONCE(task);
119+
return -EINVAL;
120+
}
121+
break;
122+
}
123+
124+
addr = unwind_get_return_address(&state);
125+
126+
/*
127+
* A NULL or invalid return address probably means there's some
128+
* generated code which __kernel_text_address() doesn't know
129+
* about.
130+
*/
131+
if (!addr) {
132+
STACKTRACE_DUMP_ONCE(task);
133+
return -EINVAL;
134+
}
135+
136+
if (save_stack_address(trace, addr, false))
137+
return -EINVAL;
138+
}
139+
140+
/* Check for stack corruption */
141+
if (unwind_error(&state)) {
142+
STACKTRACE_DUMP_ONCE(task);
143+
return -EINVAL;
144+
}
145+
146+
if (trace->nr_entries < trace->max_entries)
147+
trace->entries[trace->nr_entries++] = ULONG_MAX;
148+
149+
return 0;
150+
}
151+
152+
/*
153+
* This function returns an error if it detects any unreliable features of the
154+
* stack. Otherwise it guarantees that the stack trace is reliable.
155+
*
156+
* If the task is not 'current', the caller *must* ensure the task is inactive.
157+
*/
158+
int save_stack_trace_tsk_reliable(struct task_struct *tsk,
159+
struct stack_trace *trace)
160+
{
161+
int ret;
162+
163+
if (!try_get_task_stack(tsk))
164+
return -EINVAL;
165+
166+
ret = __save_stack_trace_reliable(trace, tsk);
167+
168+
put_task_stack(tsk);
169+
170+
return ret;
171+
}
172+
#endif /* CONFIG_HAVE_RELIABLE_STACKTRACE */
173+
79174
/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */
80175

81176
struct stack_frame_user {
@@ -138,4 +233,3 @@ void save_stack_trace_user(struct stack_trace *trace)
138233
if (trace->nr_entries < trace->max_entries)
139234
trace->entries[trace->nr_entries++] = ULONG_MAX;
140235
}
141-

arch/x86/kernel/unwind_frame.c

+2
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,8 @@ bool unwind_next_frame(struct unwind_state *state)
225225
return true;
226226

227227
bad_address:
228+
state->error = true;
229+
228230
/*
229231
* When unwinding a non-current task, the task might actually be
230232
* running on another CPU, in which case it could be modifying its

include/linux/stacktrace.h

+6-3
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ extern void save_stack_trace_regs(struct pt_regs *regs,
1818
struct stack_trace *trace);
1919
extern void save_stack_trace_tsk(struct task_struct *tsk,
2020
struct stack_trace *trace);
21+
extern int save_stack_trace_tsk_reliable(struct task_struct *tsk,
22+
struct stack_trace *trace);
2123

2224
extern void print_stack_trace(struct stack_trace *trace, int spaces);
2325
extern int snprint_stack_trace(char *buf, size_t size,
@@ -29,12 +31,13 @@ extern void save_stack_trace_user(struct stack_trace *trace);
2931
# define save_stack_trace_user(trace) do { } while (0)
3032
#endif
3133

32-
#else
34+
#else /* !CONFIG_STACKTRACE */
3335
# define save_stack_trace(trace) do { } while (0)
3436
# define save_stack_trace_tsk(tsk, trace) do { } while (0)
3537
# define save_stack_trace_user(trace) do { } while (0)
3638
# define print_stack_trace(trace, spaces) do { } while (0)
3739
# define snprint_stack_trace(buf, size, trace, spaces) do { } while (0)
38-
#endif
40+
# define save_stack_trace_tsk_reliable(tsk, trace) ({ -ENOSYS; })
41+
#endif /* CONFIG_STACKTRACE */
3942

40-
#endif
43+
#endif /* __LINUX_STACKTRACE_H */

kernel/stacktrace.c

+10-2
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,8 @@ int snprint_stack_trace(char *buf, size_t size,
5454
EXPORT_SYMBOL_GPL(snprint_stack_trace);
5555

5656
/*
57-
* Architectures that do not implement save_stack_trace_tsk or
58-
* save_stack_trace_regs get this weak alias and a once-per-bootup warning
57+
* Architectures that do not implement save_stack_trace_*()
58+
* get these weak aliases and once-per-bootup warnings
5959
* (whenever this facility is utilized - for example by procfs):
6060
*/
6161
__weak void
@@ -69,3 +69,11 @@ save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
6969
{
7070
WARN_ONCE(1, KERN_INFO "save_stack_trace_regs() not implemented yet.\n");
7171
}
72+
73+
__weak int
74+
save_stack_trace_tsk_reliable(struct task_struct *tsk,
75+
struct stack_trace *trace)
76+
{
77+
WARN_ONCE(1, KERN_INFO "save_stack_tsk_reliable() not implemented yet.\n");
78+
return -ENOSYS;
79+
}

0 commit comments

Comments
 (0)