Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ongoing] Experimental unwinding feature #405

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Experimental unwinding - WIP
Ensure we use elf addresses instead of absolute addresses
  • Loading branch information
r1viollet committed Apr 13, 2024
commit 7c4795ca141cb1cd393a4a371d690fa3f37cd018
8 changes: 4 additions & 4 deletions include/async-profiler/codeCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#ifndef _CODECACHE_H
#define _CODECACHE_H

// #include <jvmti.h>
#include <stdint.h>

#define NO_MIN_ADDRESS ((const void *)-1)
#define NO_MAX_ADDRESS ((const void *)0)
Expand Down Expand Up @@ -78,7 +78,7 @@ class CodeCache {
short _lib_index;
const void *_min_address;
const void *_max_address;
const char *_text_base;
const void *_text_base;

void **_got_start;
void **_got_end;
Expand Down Expand Up @@ -113,7 +113,7 @@ class CodeCache {

void setTextBase(const char *text_base) { _text_base = text_base; }

const char *getTextBase() { return _text_base; }
const void *getTextBase() { return _text_base; }

void **gotStart() const { return _got_start; }

Expand All @@ -136,7 +136,7 @@ class CodeCache {
void makeGotPatchable();

void setDwarfTable(FrameDesc *table, int length);
FrameDesc *findFrameDesc(const void *pc);
FrameDesc *findFrameDesc(uint64_t elf_address);
};

class CodeCacheArray {
Expand Down
8 changes: 6 additions & 2 deletions src/async-profiler/codeCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,13 @@
#include "codeCache.h"
#include "dwarf.h"
#include "os.h"

#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <limits>
#include <cassert>

char *NativeFunc::create(const char *name, short lib_index) {
NativeFunc *f = (NativeFunc *)malloc(sizeof(NativeFunc) + 1 + strlen(name));
Expand Down Expand Up @@ -216,8 +219,9 @@ void CodeCache::setDwarfTable(FrameDesc *table, int length) {
_dwarf_table_length = length;
}

FrameDesc *CodeCache::findFrameDesc(const void *pc) {
u32 target_loc = (const char *)pc - _text_base;
FrameDesc *CodeCache::findFrameDesc(uintptr_t elf_address) {
assert(elf_address < std::numeric_limits<u32>::max());
const u32 target_loc = (const u32)elf_address;
int low = 0;
int high = _dwarf_table_length - 1;

Expand Down
2 changes: 1 addition & 1 deletion src/async-profiler/stackWalker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ bool stepStackContext(ap::StackContext &sc, const ap::StackBuffer &buffer,
CodeCacheArray *cache) {
FrameDesc *f;
CodeCache *cc = findLibraryByAddress(cache, sc.pc);
if (cc == NULL || (f = cc->findFrameDesc(sc.pc)) == NULL) {
if (cc == NULL || (f = cc->findFrameDesc(static_cast<const char*>(sc.pc) - static_cast<const char*>(cc->getTextBase()))) == NULL) {
f = &FrameDesc::default_frame;
}
// const char *sym = cc?cc->binarySearch(sc.pc):"unknown";
Expand Down
3 changes: 3 additions & 0 deletions src/async-profiler/symbols_linux.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -678,6 +678,9 @@ void Symbols::parsePidLibraries(pid_t pid, CodeCacheArray *array,
printf("offset from get_elf_offset: %lx \n", elf_offset);
printf("last readable: %lx \n", last_readable_base);
}
else {
printf("Failed to read elf offsets \n");
}

// Do not parse the same executable twice, e.g. on Alpine Linux
if (parsed_inodes.insert(map.dev() | inode << 16).second) {
Expand Down
32 changes: 16 additions & 16 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -391,22 +391,22 @@ add_unit_test(

add_unit_test(ddprof_module_lib-ut ddprof_module_lib-ut.cc ../src/ddprof_module_lib.cc
../src/build_id.cc ../src/dso.cc LIBRARIES ${ELFUTILS_LIBRARIES})
#add_unit_test(
# dwarf_unwind-ut
# dwarf_unwind-ut.cc
# ../src/lib/pthread_fixes.cc
# ../src/lib/savecontext.cc
# ../src/lib/saveregisters.cc
# # ../src/lib/allocation_tracker.cc
# ../src/ringbuffer_utils.cc
# ../src/perf_ringbuffer.cc
# ../src/perf.cc
# ../src/pevent_lib.cc
# ../src/sys_utils.cc
# ../src/user_override.cc
# ../src/perf_watcher.cc
# LIBRARIES ${ELFUTILS_LIBRARIES} DDProf::AsyncProf)
#target_include_directories(dwarf_unwind-ut PRIVATE ${ASYNC_PROFILER_INCLUDE})
add_unit_test(
dwarf_unwind-ut
dwarf_unwind-ut.cc
../src/lib/pthread_fixes.cc
../src/lib/savecontext.cc
../src/lib/saveregisters.cc
# ../src/lib/allocation_tracker.cc
../src/ringbuffer_utils.cc
../src/perf_ringbuffer.cc
../src/perf.cc
../src/pevent_lib.cc
../src/sys_utils.cc
../src/user_override.cc
../src/perf_watcher.cc
LIBRARIES ${ELFUTILS_LIBRARIES} DDProf::AsyncProf)
target_include_directories(dwarf_unwind-ut PRIVATE ${ASYNC_PROFILER_INCLUDE})

add_benchmark(savecontext-bench savecontext-bench.cc ../src/lib/pthread_fixes.cc
../src/lib/savecontext.cc ../src/lib/saveregisters.cc LIBRARIES llvm-demangle)
Expand Down
174 changes: 174 additions & 0 deletions test/dwarf_unwind-ut.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
#include <gtest/gtest.h>

#include "savecontext.hpp"
#include "stackWalker.h"
#include "unwind_state.hpp"

#include <array>

#include "async-profiler/codeCache.h"
#include "async-profiler/stack_context.h"
#include "async-profiler/symbols.h"

// Retrieves instruction pointer
#define _THIS_IP_ \
({ \
__label__ __here; \
__here: \
(unsigned long)&&__here; \
})

// #include "ddprof_defs.hpp"

// temp copy pasta
#define PERF_SAMPLE_STACK_SIZE (4096UL * 8)

std::byte stack[PERF_SAMPLE_STACK_SIZE];

DDPROF_NOINLINE size_t funcA(std::array<uint64_t, ddprof::k_perf_register_count> &regs);
DDPROF_NOINLINE size_t funcB(std::array<uint64_t, ddprof::k_perf_register_count> &regs);

size_t funcB(std::array<uint64_t, ddprof::k_perf_register_count> &regs) {
printf("dwarf_unwind-ut:%s %lx \n", __FUNCTION__, _THIS_IP_);
std::span<const std::byte> bounds = ddprof::retrieve_stack_bounds();
size_t size = ddprof::save_context(bounds, regs, stack);

return size;
}

size_t funcA(std::array<uint64_t, ddprof::k_perf_register_count> &regs) {
printf("dwarf_unwind-ut:%s %lx \n", __FUNCTION__, _THIS_IP_);
return funcB(regs);
}

TEST(dwarf_unwind, simple) {
CodeCacheArray cache_arary;
// Load libraries
Symbols::parsePidLibraries(getpid(), &cache_arary, false);
std::array<uint64_t, ddprof::k_perf_register_count> regs;
size_t size_stack = funcA(regs);
EXPECT_TRUE(size_stack);

ap::StackContext sc = ap::from_regs(std::span(regs));
ap::StackBuffer buffer(stack, sc.sp, sc.sp + size_stack);

void *callchain[128];
int n = stackWalk(&cache_arary, sc, buffer,
const_cast<const void **>(callchain), 128, 0);
const char *syms[128];
for (int i = 0; i < n; ++i) {
{ // retrieve symbol
CodeCache *code_cache = findLibraryByAddress(
&cache_arary, reinterpret_cast<void *>(callchain[i]));
if (code_cache) {
syms[i] = code_cache->binarySearch(callchain[i]);
printf("IP = %p - %s\n", callchain[i], syms[i]);
}
}
}

// Check that we found the expected functions during unwinding
ASSERT_TRUE(std::string(syms[0]).find("save_context") != std::string::npos);
ASSERT_TRUE(std::string(syms[1]).find("funcB") != std::string::npos);
ASSERT_TRUE(std::string(syms[2]).find("funcA") != std::string::npos);
}

#ifdef ALLOC_TRACKER
#include "allocation_tracker.hpp"
#include "perf_ringbuffer.hpp"
#include "ringbuffer_holder.hpp"
#include "ringbuffer_utils.hpp"
#include <span>
#include "defer.hpp"

namespace ddprof {
static const uint64_t kSamplingRate = 1;

DDPROF_NOINLINE void func_save_sleep(size_t size);
DDPROF_NOINLINE void func_intermediate_0(size_t size);
DDPROF_NOINLINE void func_intermediate_1(size_t size);

DDPROF_NOINLINE void func_save_sleep(size_t size) {
ddprof::TrackerThreadLocalState *tl_state = AllocationTracker::get_tl_state();
assert(tl_state);
int i = 0;
while (++i < 100000) {

ddprof::AllocationTracker::track_allocation_s(0xdeadbeef, size, *tl_state);
// prevent tail call optimization
getpid();
usleep(100);
// printf("Save context nb -- %d \n", i);
}
}

void func_intermediate_0(size_t size) { func_intermediate_1(size); }

void func_intermediate_1(size_t size) { func_save_sleep(size); }

TEST(dwarf_unwind, remote) {
const uint64_t rate = 1;
const size_t buf_size_order = 5;
ddprof::RingBufferHolder ring_buffer{buf_size_order,
RingBufferType::kMPSCRingBuffer};
AllocationTracker::allocation_tracking_init(
kSamplingRate,
AllocationTracker::kDeterministicSampling |
AllocationTracker::kTrackDeallocations,
k_default_perf_stack_sample_size, ring_buffer.get_buffer_info(), {});
defer { AllocationTracker::allocation_tracking_free(); };

// Fork
pid_t temp_pid = fork();
if (!temp_pid) {
func_intermediate_0(10);
// char *const argList[] = {"sleep", "10", nullptr};
// execvp("sleep", argList);
return;
}

// Load libraries from the fork - Cache array is relent to a single pid
CodeCacheArray cache_arary;
sleep(1);
Symbols::parsePidLibraries(temp_pid, &cache_arary, false);
// Establish a ring buffer ?

ddprof::MPSCRingBufferReader reader{&ring_buffer.get_ring_buffer()};
ASSERT_GT(reader.available_size(), 0);

auto buf = reader.read_sample();
ASSERT_FALSE(buf.empty());
const perf_event_header *hdr =
reinterpret_cast<const perf_event_header *>(buf.data());
ASSERT_EQ(hdr->type, PERF_RECORD_SAMPLE);

// convert based on mask for this watcher (default in this case)
perf_event_sample *sample = hdr2samp(hdr, ddprof::perf_event_default_sample_type());

std::span<const uint64_t, ddprof::k_perf_register_count> regs_span{sample->regs, ddprof::k_perf_register_count};
ap::StackContext sc = ap::from_regs(regs_span);
std::span<const std::byte> stack{
reinterpret_cast<const std::byte *>(sample->data_stack), sample->size_stack};
ap::StackBuffer buffer(stack, sc.sp, sc.sp + sample->size_stack);

void *callchain[ddprof::kMaxStackDepth];
int n =
stackWalk(&cache_arary, sc, buffer, const_cast<const void **>(callchain),
ddprof::kMaxStackDepth, 0);

std::array<const char *, ddprof::kMaxStackDepth> syms;
for (int i = 0; i < n; ++i) {
{ // retrieve symbol
CodeCache *code_cache = findLibraryByAddress(
&cache_arary, reinterpret_cast<void *>(callchain[i]));
if (code_cache) {
syms[i] = code_cache->binarySearch(callchain[i]);
printf("IP = %p - %s\n", callchain[i], syms[i]);
}
}
// cleanup the producer fork
kill(temp_pid, SIGTERM);
}
}
}
#endif