From 761053f0aae8125a9484e7412022f9b915cb64b9 Mon Sep 17 00:00:00 2001 From: Georgiy Lebedev <g.lebedev@tarantool.org> Date: Wed, 9 Feb 2022 09:12:09 +0300 Subject: [PATCH] coro: fix `coro_{init,startup}` unwind information MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fiber call-chains end at `coro_{init, startup}`, but unwinders don't stop there, trying to use `coro_{init, startup}` stack frame's return address (which points to some garbage) and, in turn, failing. A similar issue was experienced by seastar and julia (see JuliaLang/julia#23074 and scylladb/scylla#1909). In order to make unwinding stop at `coro_{init, startup}`'s stack frame we need to annotate it with CFI assembly: previously, annotation was provided only for GCC on x86_64 — also provide it if ENABLE_BACKTRACE is set during configuration. Zero out rbp on x86_64 (to conform to x86_64 ABI): this requires setting "-fomit-frame-pointer" compile flag for coro.c. Backtrace collection from inactive fiber based on pseudo context-switch relied on the stack frame structure: remove redundant "-fno-omit-frame-pointer" and "-fno-stack-protector" compile flags for other Tarantool sources. For some reason unwinders ignore platform ABIs regarding ending of call-chains: explicitly invalidate the topmost (`coro_{init, startup}`) current frame information (CFI) for both x86_64 and AARCH64. References: 1. glibc: * clone: https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/unix/sysv/linux/x86_64/clone.S;h=31ac12da0cc08a934d514fed1de9eba1cb3e8ec5;hb=ebbb8c9f64c3486603ef4ccee4dd2a5574e41039 * start: https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/x86_64/start.S;h=9edd17b60cd54ec9eef11c76ab02322dcb5d057a;hb=5b736bc9b55115e67129e77db4de6cf193054cd2 2. seastar: * thread_context::main(): https://github.com/scylladb/seastar/blob/d27bf8b5a14e5b9e9c9df18fd1306489b651aa42/src/core/thread.cc#L278-L293 3. julia: * https://github.com/JuliaLang/julia/blob/2e2b1d2ad50fe12999cbded0b5acd3f0a36ec8c5/src/julia_internal.h#L90-L106 4. android: * https://cs.android.com/android/platform/superproject/+/master:bionic/libc/platform/bionic/macros.h;l=52-60;drc=2528dab7419a63f57fe20027886ba7dd3857aba8 Needed for #4002 NO_DOC=internal bug fix NO_CHANGELOG=internal bug fix NO_TEST=unwind information annotation in inline assembly --- cmake/BuildLibCORO.cmake | 3 +- cmake/compiler.cmake | 11 ------- third_party/coro/coro.c | 62 +++++++++++++++++++++++++++++++--------- 3 files changed, 50 insertions(+), 26 deletions(-) diff --git a/cmake/BuildLibCORO.cmake b/cmake/BuildLibCORO.cmake index 3a9c95426a..860aabd5e3 100644 --- a/cmake/BuildLibCORO.cmake +++ b/cmake/BuildLibCORO.cmake @@ -4,6 +4,8 @@ macro(libcoro_build) set(coro_src ${PROJECT_SOURCE_DIR}/third_party/coro/coro.c ) + set_source_files_properties(${coro_src} PROPERTIES + COMPILE_FLAGS -fomit-frame-pointer) add_library(coro STATIC ${coro_src}) @@ -20,4 +22,3 @@ macro(libcoro_build) unset(coro_src) endmacro(libcoro_build) - diff --git a/cmake/compiler.cmake b/cmake/compiler.cmake index c0b3b4703a..fb1a70fa0d 100644 --- a/cmake/compiler.cmake +++ b/cmake/compiler.cmake @@ -179,17 +179,6 @@ endif() add_compile_flags("C;CXX" "-fexceptions" "-funwind-tables") -# We must set -fno-omit-frame-pointer here, since we rely -# on frame pointer when getting a backtrace, and it must -# be used consistently across all object files. -# The same reasoning applies to -fno-stack-protector switch. - -if (ENABLE_BACKTRACE) - add_compile_flags("C;CXX" - "-fno-omit-frame-pointer" - "-fno-stack-protector") -endif() - # In C a global variable without a storage specifier (static/extern) and # without an initialiser is called a ’tentative definition’. The # language permits multiple tentative definitions in the single diff --git a/third_party/coro/coro.c b/third_party/coro/coro.c index ca490dc268..5028e3c146 100644 --- a/third_party/coro/coro.c +++ b/third_party/coro/coro.c @@ -40,6 +40,8 @@ #include "coro.h" +#include "trivia/config.h" + #include <stddef.h> #include <string.h> @@ -103,13 +105,36 @@ coro_init (void) coro_transfer (new_coro, create_coro); -#if __GCC_HAVE_DWARF2_CFI_ASM && __amd64 - asm (".cfi_undefined rip"); -#endif - - func ((void *)arg); - - /* the new coro returned. bad. just abort() for now */ + __asm__ volatile( +#ifdef ENABLE_BACKTRACE + /* + * Call-chain ends here: we need to invalidate this frame's return + * address to make unwinding stop here. + */ + + /* Clearing rbp is insufficient: undefine rip value's location — see: https://github.com/libunwind/libunwind/blob/ec171c9ba7ea3abb2a1383cee2988a7abd483a1f/src/x86_64/Gstep.c#L91-L92 */ + ".cfi_undefined rip\n" + /* Undefine rbp value's location — ibid. */ + ".cfi_undefined rbp\n" + /* Undefine rbp-relative return address location, see: https://github.com/libunwind/libunwind/blob/ec171c9ba7ea3abb2a1383cee2988a7abd483a1f/src/dwarf/Gparser.c#L877 */ + ".cfi_return_column rbp\n" +#endif /* ENABLE_BACKTRACE */ + /* Clear rbp to conform to the x86_64 ABI, see: https://github.com/libunwind/libunwind/blob/ec171c9ba7ea3abb2a1383cee2988a7abd483a1f/src/x86_64/Gstep.c#L144-L148 */ + "\txorq %%rbp, %%rbp\n" + "\tmovq %0, %%rdi\n" + "\tcallq *%1\n" + : + : "rm" (arg), "rm" (func), "m" (*arg) + : "rbp", "rax", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", + "xmm0","xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", + "xmm8","xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", +#ifdef __AVX512F__ + "xmm16", "xmm17", "xmm18", "xmm19", "xmm20", "xmm21", "xmm22", + "xmm23", "xmm24", "xmm25", "xmm26", "xmm27", "xmm28", "xmm29", + "xmm30", "xmm31", "k0", "k1", "k2", "k3", "k4", "k5", "k6", "k7", +#endif /* __AVX512F__ */ + "mm0","mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm6", "st", + "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"); abort (); } # endif @@ -320,13 +345,21 @@ trampoline (int sig) ".fnend\n" #elif __aarch64__ - +#ifdef ENABLE_BACKTRACE ".cfi_startproc\n" - "\tmov x30, #0\n" - "\tsub sp, sp, #16\n" - "\tstr x30, [sp, #0]\n" - ".cfi_def_cfa_offset 16\n" - ".cfi_offset 30, -16\n" + /* + * Call-chain ends here: we need to invalidate this frame's return + * address to make unwinding stop here. + */ + + /* + * Detection of call-chain end on AARCH64 ABI inherently relies on CFI, + * see: https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#623the-frame-pointer. + * Simply undefine x29 and x30 register values' locations. + */ + ".cfi_undefined x29\n" + ".cfi_undefined x30\n" +#endif /* ENABLE_BACKTRACE */ "\tmov x0, x20\n" "\tblr x19\n" # ifdef __APPLE__ @@ -334,8 +367,9 @@ trampoline (int sig) # else "\tb abort\n" # endif +#ifdef ENABLE_BACKTRACE ".cfi_endproc\n" - +#endif /* ENABLE_BACKTRACE */ #else #error unsupported architecture #endif -- GitLab