Skip to content

Commit

Permalink
debug: thread_analyzer: Option to analyze threads on each core separa…
Browse files Browse the repository at this point in the history
…tely

Add implementation to analyze threads on each cpu separately. This
feature can be enabled with THREAD_ANALYZER_AUTO_SEPARATE_CORES Kconfig
option. If enabled, an analyzer thread is started for each cpu, and
the threads will only analyze thread on the cpu its running on.

This feature is needed for Intel ADSP platform, where cpu specific
caches are not synchronized between the cpu. It is also probably
needed by other platforms having CONFIG_KERNEL_COHERENCE=y, so default
to THREAD_ANALYZER_AUTO_SEPARATE_CORES=y for those platform.

Signed-off-by: Jyri Sarha <[email protected]>
  • Loading branch information
Jyri Sarha authored and fabiobaltieri committed Jul 30, 2024
1 parent 28215fc commit 1b6e0f6
Show file tree
Hide file tree
Showing 3 changed files with 133 additions and 32 deletions.
17 changes: 12 additions & 5 deletions include/zephyr/debug/thread_analyzer.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,18 +52,25 @@ typedef void (*thread_analyzer_cb)(struct thread_analyzer_info *info);
/** @brief Run the thread analyzer and provide information to the callback
*
* This function analyzes the current state for all threads and calls
* a given callback on every thread found.
* a given callback on every thread found. In the special case when Kconfig
* option THREAD_ANALYZER_AUTO_SEPARATE_CORES is set, the function analyzes
* only the threads running on the specified cpu.
*
* @param cb The callback function handler
* @param cpu cpu to analyze, ignored if THREAD_ANALYZER_AUTO_SEPARATE_CORES=n
*/
void thread_analyzer_run(thread_analyzer_cb cb);
void thread_analyzer_run(thread_analyzer_cb cb, unsigned int cpu);

/** @brief Run the thread analyzer and print stack size statistics.
*
* This function runs the thread analyzer and prints the output in standard
* form.
* This function runs the thread analyzer and prints the output in
* standard form. In the special case when Kconfig option
* THREAD_ANALYZER_AUTO_SEPARATE_CORES is set, the function analyzes
* only the threads running on the specified cpu.
*
* @param cpu cpu to analyze, ignored if THREAD_ANALYZER_AUTO_SEPARATE_CORES=n
*/
void thread_analyzer_print(void);
void thread_analyzer_print(unsigned int cpu);

/** @} */

Expand Down
9 changes: 9 additions & 0 deletions subsys/debug/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,15 @@ config THREAD_ANALYZER_AUTO

if THREAD_ANALYZER_AUTO

config THREAD_ANALYZER_AUTO_SEPARATE_CORES
bool "Run thread analyzer separately on each core"
default y if KERNEL_COHERENCE
depends on SCHED_CPU_MASK
help
Run the thread analyzer auto thread on each core and report
cores separately. This feature is needed for platforms running
on cache-incoherent architectures.

config THREAD_ANALYZER_AUTO_INTERVAL
int "Thread analysis interval"
default 60
Expand Down
139 changes: 112 additions & 27 deletions subsys/debug/thread_analyzer.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,11 @@ static void thread_print_cb(struct thread_analyzer_info *info)
#endif
}

struct ta_cb_user_data {
thread_analyzer_cb cb;
unsigned int cpu;
};

static void thread_analyze_cb(const struct k_thread *cthread, void *user_data)
{
struct k_thread *thread = (struct k_thread *)cthread;
Expand All @@ -79,7 +84,9 @@ static void thread_analyze_cb(const struct k_thread *cthread, void *user_data)
int ret;
#endif
size_t size = thread->stack_info.size;
thread_analyzer_cb cb = user_data;
struct ta_cb_user_data *ud = user_data;
thread_analyzer_cb cb = ud->cb;
unsigned int cpu = ud->cpu;
struct thread_analyzer_info info;
char hexname[PTR_STR_MAXLEN + 1];
const char *name;
Expand Down Expand Up @@ -115,9 +122,16 @@ static void thread_analyze_cb(const struct k_thread *cthread, void *user_data)
ret++;
}

if (k_thread_runtime_stats_all_get(&rt_stats_all) != 0) {
ret++;
if (IS_ENABLED(CONFIG_THREAD_ANALYZER_AUTO_SEPARATE_CORES)) {
if (k_thread_runtime_stats_cpu_get(cpu, &rt_stats_all) != 0) {
ret++;
}
} else {
if (k_thread_runtime_stats_all_get(&rt_stats_all) != 0) {
ret++;
}
}

if (ret == 0) {
info.utilization = (info.usage.execution_cycles * 100U) /
rt_stats_all.execution_cycles;
Expand All @@ -129,61 +143,132 @@ static void thread_analyze_cb(const struct k_thread *cthread, void *user_data)
K_KERNEL_STACK_ARRAY_DECLARE(z_interrupt_stacks, CONFIG_MP_MAX_NUM_CPUS,
CONFIG_ISR_STACK_SIZE);

static void isr_stack(int core)
{
const uint8_t *buf = K_KERNEL_STACK_BUFFER(z_interrupt_stacks[core]);
size_t size = K_KERNEL_STACK_SIZEOF(z_interrupt_stacks[core]);
size_t unused;
int err;

err = z_stack_space_get(buf, size, &unused);
if (err == 0) {
THREAD_ANALYZER_PRINT(
THREAD_ANALYZER_FMT(
" %s%-17d: STACK: unused %zu usage %zu / %zu (%zu %%)"),
THREAD_ANALYZER_VSTR("ISR"), core, unused,
size - unused, size, (100 * (size - unused)) / size);
}
}

static void isr_stacks(void)
{
unsigned int num_cpus = arch_num_cpus();

for (int i = 0; i < num_cpus; i++) {
const uint8_t *buf = K_KERNEL_STACK_BUFFER(z_interrupt_stacks[i]);
size_t size = K_KERNEL_STACK_SIZEOF(z_interrupt_stacks[i]);
size_t unused;
int err;

err = z_stack_space_get(buf, size, &unused);
if (err == 0) {
THREAD_ANALYZER_PRINT(
THREAD_ANALYZER_FMT(
" %s%-17d: STACK: unused %zu usage %zu / %zu (%zu %%)"),
THREAD_ANALYZER_VSTR("ISR"), i, unused,
size - unused, size, (100 * (size - unused)) / size);
}
}
for (int i = 0; i < num_cpus; i++)
isr_stack(i);
}

void thread_analyzer_run(thread_analyzer_cb cb)
void thread_analyzer_run(thread_analyzer_cb cb, unsigned int cpu)
{
struct ta_cb_user_data ud = { .cb = cb, .cpu = cpu };

if (IS_ENABLED(CONFIG_THREAD_ANALYZER_RUN_UNLOCKED)) {
k_thread_foreach_unlocked(thread_analyze_cb, cb);
if (IS_ENABLED(CONFIG_THREAD_ANALYZER_AUTO_SEPARATE_CORES))
k_thread_foreach_unlocked_filter_by_cpu(cpu, thread_analyze_cb, &ud);
else
k_thread_foreach_unlocked(thread_analyze_cb, cb);
} else {
k_thread_foreach(thread_analyze_cb, cb);
if (IS_ENABLED(CONFIG_THREAD_ANALYZER_AUTO_SEPARATE_CORES))
k_thread_foreach_filter_by_cpu(cpu, thread_analyze_cb, &ud);
else
k_thread_foreach(thread_analyze_cb, cb);
}

if (IS_ENABLED(CONFIG_THREAD_ANALYZER_ISR_STACK_USAGE)) {
isr_stacks();
if (IS_ENABLED(CONFIG_THREAD_ANALYZER_AUTO_SEPARATE_CORES))
isr_stack(cpu);
else
isr_stacks();
}
}

void thread_analyzer_print(void)
void thread_analyzer_print(unsigned int cpu)
{
#if IS_ENABLED(CONFIG_THREAD_ANALYZER_AUTO_SEPARATE_CORES)
THREAD_ANALYZER_PRINT(THREAD_ANALYZER_FMT("Thread analyze core %u:"),
cpu);
#else
THREAD_ANALYZER_PRINT(THREAD_ANALYZER_FMT("Thread analyze:"));
thread_analyzer_run(thread_print_cb);
#endif
thread_analyzer_run(thread_print_cb, cpu);
}

#if defined(CONFIG_THREAD_ANALYZER_AUTO)

void thread_analyzer_auto(void)
void thread_analyzer_auto(void *a, void *b, void *c)
{
unsigned int cpu = IS_ENABLED(CONFIG_THREAD_ANALYZER_AUTO_SEPARATE_CORES) ?
(unsigned int) a : 0;

for (;;) {
thread_analyzer_print();
thread_analyzer_print(cpu);
k_sleep(K_SECONDS(CONFIG_THREAD_ANALYZER_AUTO_INTERVAL));
}
}

#if IS_ENABLED(CONFIG_THREAD_ANALYZER_AUTO_SEPARATE_CORES)

static K_THREAD_STACK_ARRAY_DEFINE(analyzer_thread_stacks, CONFIG_MP_MAX_NUM_CPUS,
CONFIG_THREAD_ANALYZER_AUTO_STACK_SIZE);
static struct k_thread analyzer_thread[CONFIG_MP_MAX_NUM_CPUS];

static int thread_analyzer_init(void)
{
uint16_t i;

for (i = 0; i < ARRAY_SIZE(analyzer_thread); i++) {
char name[24];
k_tid_t tid = NULL;
int ret;

tid = k_thread_create(&analyzer_thread[i], analyzer_thread_stacks[i],
CONFIG_THREAD_ANALYZER_AUTO_STACK_SIZE,
thread_analyzer_auto,
(void *) (uint32_t) i, NULL, NULL,
K_LOWEST_APPLICATION_THREAD_PRIO, 0, K_FOREVER);
if (!tid) {
LOG_ERR("k_thread_create() failed for core %u", i);
continue;
}
ret = k_thread_cpu_pin(tid, i);
if (ret < 0) {
LOG_ERR("Pinning thread to code core %u", i);
k_thread_abort(tid);
continue;
}
snprintf(name, sizeof(name), "core %u thread analyzer", i);
ret = k_thread_name_set(tid, name);
if (ret < 0)
LOG_INF("k_thread_name_set failed: %d for %u", ret, i);

k_thread_start(tid);
LOG_DBG("Thread %p for core %u started", tid, i);
}

return 0;
}

SYS_INIT(thread_analyzer_init, APPLICATION, CONFIG_KERNEL_INIT_PRIORITY_DEFAULT);

#else

K_THREAD_DEFINE(thread_analyzer,
CONFIG_THREAD_ANALYZER_AUTO_STACK_SIZE,
thread_analyzer_auto,
NULL, NULL, NULL,
K_LOWEST_APPLICATION_THREAD_PRIO,
0, 0);

#endif
#endif /* CONFIG_THREAD_ANALYZER_AUTO_SEPARATE_CORES */

#endif /* CONFIG_THREAD_ANALYZER_AUTO */

0 comments on commit 1b6e0f6

Please sign in to comment.