diff --git a/third_party/coro/coro.c b/third_party/coro/coro.c index 00b5bde47924e07d8f51550e66074f52b84ff794..ff71383b8414d708dc274ad9bf8a25a87d7ca2c4 100644 --- a/third_party/coro/coro.c +++ b/third_party/coro/coro.c @@ -1,16 +1,16 @@ /* * Copyright (c) 2001-2011 Marc Alexander Lehmann <schmorp@schmorp.de> - * + * * Redistribution and use in source and binary forms, with or without modifica- * tion, are permitted provided that the following conditions are met: - * + * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. - * + * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * + * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO @@ -40,6 +40,7 @@ #include "coro.h" +#include <stddef.h> #include <string.h> /*****************************************************************************/ @@ -116,25 +117,36 @@ trampoline (int sig) # if CORO_ASM - #if _WIN32 + #if _WIN32 || __CYGWIN__ #define CORO_WIN_TIB 1 #endif asm ( "\t.text\n" -#if defined(__APPLE__) && defined(__MACH__) + #if _WIN32 || __CYGWIN__ "\t.globl _coro_transfer\n" "_coro_transfer:\n" -#else + #else "\t.globl coro_transfer\n" "coro_transfer:\n" -#endif + #endif /* windows, of course, gives a shit on the amd64 ABI and uses different registers */ /* http://blogs.msdn.com/freik/archive/2005/03/17/398200.aspx */ #if __amd64 - #ifdef WIN32 - /* TODO: xmm6..15 also would need to be saved. sigh. */ - #define NUM_SAVED 8 + + #if _WIN32 || __CYGWIN__ + #define NUM_SAVED 29 + "\tsubq $168, %rsp\t" /* one dummy qword to improve alignment */ + "\tmovaps %xmm6, (%rsp)\n" + "\tmovaps %xmm7, 16(%rsp)\n" + "\tmovaps %xmm8, 32(%rsp)\n" + "\tmovaps %xmm9, 48(%rsp)\n" + "\tmovaps %xmm10, 64(%rsp)\n" + "\tmovaps %xmm11, 80(%rsp)\n" + "\tmovaps %xmm12, 96(%rsp)\n" + "\tmovaps %xmm13, 112(%rsp)\n" + "\tmovaps %xmm14, 128(%rsp)\n" + "\tmovaps %xmm15, 144(%rsp)\n" "\tpushq %rsi\n" "\tpushq %rdi\n" "\tpushq %rbp\n" @@ -163,6 +175,17 @@ trampoline (int sig) "\tpopq %rbp\n" "\tpopq %rdi\n" "\tpopq %rsi\n" + "\tmovaps (%rsp), %xmm6\n" + "\tmovaps 16(%rsp), %xmm7\n" + "\tmovaps 32(%rsp), %xmm8\n" + "\tmovaps 48(%rsp), %xmm9\n" + "\tmovaps 64(%rsp), %xmm10\n" + "\tmovaps 80(%rsp), %xmm11\n" + "\tmovaps 96(%rsp), %xmm12\n" + "\tmovaps 112(%rsp), %xmm13\n" + "\tmovaps 128(%rsp), %xmm14\n" + "\tmovaps 144(%rsp), %xmm15\n" + "\taddq $168, %rsp\n" #else #define NUM_SAVED 6 "\tpushq %rbp\n" @@ -180,13 +203,19 @@ trampoline (int sig) "\tpopq %rbx\n" "\tpopq %rbp\n" #endif + "\tpopq %rcx\n" + "\tjmpq *%rcx\n" + #elif __i386 + #define NUM_SAVED 4 "\tpushl %ebp\n" "\tpushl %ebx\n" "\tpushl %esi\n" "\tpushl %edi\n" #if CORO_WIN_TIB + #undef NUM_SAVED + #define NUM_SAVED 7 "\tpushl %fs:0\n" "\tpushl %fs:4\n" "\tpushl %fs:8\n" @@ -202,16 +231,18 @@ trampoline (int sig) "\tpopl %esi\n" "\tpopl %ebx\n" "\tpopl %ebp\n" + "\tpopl %ecx\n" + "\tjmpl *%ecx\n" + #else #error unsupported architecture #endif - "\tret\n" ); # endif void -coro_create (coro_context *ctx, coro_func coro, void *arg, void *sptr, long ssize) +coro_create (coro_context *ctx, coro_func coro, void *arg, void *sptr, size_t ssize) { coro_context nctx; # if CORO_SJLJ @@ -288,16 +319,16 @@ coro_create (coro_context *ctx, coro_func coro, void *arg, void *sptr, long ssiz #elif __CYGWIN__ && __x86_64 ctx->env[7] = (long) coro_init; ctx->env[6] = (long) ((char *)sptr + ssize) - sizeof (long); - #elif defined(__MINGW32__) + #elif defined __MINGW32__ ctx->env[5] = (long) coro_init; ctx->env[4] = (long) ((char *)sptr + ssize) - sizeof (long); - #elif defined(_M_IX86) + #elif defined _M_IX86 ((_JUMP_BUFFER *)&ctx->env)->Eip = (long) coro_init; ((_JUMP_BUFFER *)&ctx->env)->Esp = (long) STACK_ADJUST_PTR (sptr, ssize) - sizeof (long); - #elif defined(_M_AMD64) + #elif defined _M_AMD64 ((_JUMP_BUFFER *)&ctx->env)->Rip = (__int64) coro_init; ((_JUMP_BUFFER *)&ctx->env)->Rsp = (__int64) STACK_ADJUST_PTR (sptr, ssize) - sizeof (__int64); - #elif defined(_M_IA64) + #elif defined _M_IA64 ((_JUMP_BUFFER *)&ctx->env)->StIIP = (__int64) coro_init; ((_JUMP_BUFFER *)&ctx->env)->IntSp = (__int64) STACK_ADJUST_PTR (sptr, ssize) - sizeof (__int64); #else @@ -413,7 +444,7 @@ coro_transfer (coro_context *prev, coro_context *next) } void -coro_create (coro_context *ctx, coro_func coro, void *arg, void *sptr, long ssize) +coro_create (coro_context *ctx, coro_func coro, void *arg, void *sptr, size_t ssize) { static coro_context nctx; static int once; @@ -472,7 +503,205 @@ coro_destroy (coro_context *ctx) pthread_cond_destroy (&ctx->cv); } +/*****************************************************************************/ +/* fiber backend */ +/*****************************************************************************/ +#elif CORO_FIBER + +#define WIN32_LEAN_AND_MEAN +#if _WIN32_WINNT < 0x0400 + #undef _WIN32_WINNT + #define _WIN32_WINNT 0x0400 +#endif +#include <windows.h> + +VOID CALLBACK +coro_init (PVOID arg) +{ + coro_context *ctx = (coro_context *)arg; + + ctx->coro (ctx->arg); +} + +void +coro_transfer (coro_context *prev, coro_context *next) +{ + if (!prev->fiber) + { + prev->fiber = GetCurrentFiber (); + + if (prev->fiber == 0 || prev->fiber == (void *)0x1e00) + prev->fiber = ConvertThreadToFiber (0); + } + + SwitchToFiber (next->fiber); +} + +void +coro_create (coro_context *ctx, coro_func coro, void *arg, void *sptr, size_t ssize) +{ + ctx->fiber = 0; + ctx->coro = coro; + ctx->arg = arg; + + if (!coro) + return; + + ctx->fiber = CreateFiber (ssize, coro_init, ctx); +} + +void +coro_destroy (coro_context *ctx) +{ + DeleteFiber (ctx->fiber); +} + #else -# error unsupported backend + #error unsupported backend +#endif + +/*****************************************************************************/ +/* stack management */ +/*****************************************************************************/ +#if CORO_STACKALLOC + +#include <stdlib.h> + +#ifndef _WIN32 +# include <unistd.h> +#endif + +#if CORO_USE_VALGRIND +# include <valgrind/valgrind.h> +#endif + +#if _POSIX_MAPPED_FILES +# include <sys/mman.h> +# define CORO_MMAP 1 +# ifndef MAP_ANONYMOUS +# ifdef MAP_ANON +# define MAP_ANONYMOUS MAP_ANON +# else +# undef CORO_MMAP +# endif +# endif +# include <limits.h> +#else +# undef CORO_MMAP +#endif + +#if _POSIX_MEMORY_PROTECTION +# ifndef CORO_GUARDPAGES +# define CORO_GUARDPAGES 4 +# endif +#else +# undef CORO_GUARDPAGES +#endif + +#if !CORO_MMAP +# undef CORO_GUARDPAGES +#endif + +#if !__i386 && !__x86_64 && !__powerpc && !__m68k && !__alpha && !__mips && !__sparc64 +# undef CORO_GUARDPAGES +#endif + +#ifndef CORO_GUARDPAGES +# define CORO_GUARDPAGES 0 +#endif + +#if !PAGESIZE + #if !CORO_MMAP + #define PAGESIZE 4096 + #else + static size_t + coro_pagesize (void) + { + static size_t pagesize; + + if (!pagesize) + pagesize = sysconf (_SC_PAGESIZE); + + return pagesize; + } + + #define PAGESIZE coro_pagesize () + #endif +#endif + +int +coro_stack_alloc (struct coro_stack *stack, unsigned int size) +{ + if (!size) + size = 256 * 1024; + + stack->sptr = 0; + stack->ssze = ((size_t)size * sizeof (void *) + PAGESIZE - 1) / PAGESIZE * PAGESIZE; + +#if CORO_FIBER + + stack->sptr = (void *)stack; + return 1; + +#else + + size_t ssze = stack->ssze + CORO_GUARDPAGES * PAGESIZE; + void *base; + + #if CORO_MMAP + /* mmap supposedly does allocate-on-write for us */ + base = mmap (0, ssze, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + if (base == (void *)-1) + { + /* some systems don't let us have executable heap */ + /* we assume they won't need executable stack in that case */ + base = mmap (0, ssze, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + if (base == (void *)-1) + return 0; + } + + #if CORO_GUARDPAGES + mprotect (base, CORO_GUARDPAGES * PAGESIZE, PROT_NONE); + #endif + + base = (void*)((char *)base + CORO_GUARDPAGES * PAGESIZE); + #else + base = malloc (ssze); + if (!base) + return 0; + #endif + + #if CORO_USE_VALGRIND + stack->valgrind_id = VALGRIND_STACK_REGISTER ((char *)base, ((char *)base) + ssze - CORO_GUARDPAGES * PAGESIZE); + #endif + + stack->sptr = base; + return 1; + +#endif +} + +void +coro_stack_free (struct coro_stack *stack) +{ +#if CORO_FIBER + /* nop */ +#else + #if CORO_USE_VALGRIND + VALGRIND_STACK_DEREGISTER (stack->valgrind_id); + #endif + + #if CORO_MMAP + if (stack->sptr) + munmap ((void*)((char *)stack->sptr - CORO_GUARDPAGES * PAGESIZE), + stack->ssze + CORO_GUARDPAGES * PAGESIZE); + #else + free (stack->sptr); + #endif +#endif +} + #endif diff --git a/third_party/coro/coro.h b/third_party/coro/coro.h index 52bb4280d35c8417cb52096c4292d1685cf560a0..779d800b4af6ce4729a366156a58c63ac54a48e4 100644 --- a/third_party/coro/coro.h +++ b/third_party/coro/coro.h @@ -1,16 +1,16 @@ /* - * Copyright (c) 2001-2011 Marc Alexander Lehmann <schmorp@schmorp.de> - * + * Copyright (c) 2001-2012 Marc Alexander Lehmann <schmorp@schmorp.de> + * * Redistribution and use in source and binary forms, with or without modifica- * tion, are permitted provided that the following conditions are met: - * + * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. - * + * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * + * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO @@ -75,6 +75,10 @@ * 2011-06-08 maybe properly implement weird windows amd64 calling conventions. * 2011-07-03 rely on __GCC_HAVE_DWARF2_CFI_ASM for cfi detection. * 2011-08-08 cygwin trashes stacks, use pthreads with double stack on cygwin. + * 2012-12-04 reduce misprediction penalty for x86/amd64 assembly switcher. + * 2012-12-05 experimental fiber backend (allocates stack twice). + * 2012-12-07 API version 3 - add coro_stack_alloc/coro_stack_free. + * 2012-12-21 valgrind stack registering was broken. */ #ifndef CORO_H @@ -84,53 +88,60 @@ extern "C" { #endif -#define CORO_VERSION 2 - -/* - * Changes since API version 1: - * replaced bogus -DCORO_LOOSE with gramatically more correct -DCORO_LOSER - */ - /* * This library consists of only three files * coro.h, coro.c and LICENSE (and optionally README) * * It implements what is known as coroutines, in a hopefully - * portable way. At the moment you have to define which kind - * of implementation flavour you want: + * portable way. + * + * All compiletime symbols must be defined both when including coro.h + * (using libcoro) as well as when compiling coro.c (the implementation). + * + * You can manually specify which flavour you want. If you don't define + * any of these, libcoro tries to choose a safe and fast default: * * -DCORO_UCONTEXT * * This flavour uses SUSv2's get/set/swap/makecontext functions that - * unfortunately only newer unices support. + * unfortunately only some unices support, and is quite slow. * * -DCORO_SJLJ * * This flavour uses SUSv2's setjmp/longjmp and sigaltstack functions to * do it's job. Coroutine creation is much slower than UCONTEXT, but - * context switching is often a bit cheaper. It should work on almost - * all unices. + * context switching is a bit cheaper. It should work on almost all unices. * * -DCORO_LINUX * + * CORO_SJLJ variant. * Old GNU/Linux systems (<= glibc-2.1) only work with this implementation * (it is very fast and therefore recommended over other methods, but * doesn't work with anything newer). * * -DCORO_LOSER * + * CORO_SJLJ variant. * Microsoft's highly proprietary platform doesn't support sigaltstack, and - * this automatically selects a suitable workaround for this platform. - * (untested) + * this selects a suitable workaround for this platform. It might not work + * with your compiler though - it has only been tested with MSVC 6. + * + * -DCORO_FIBER + * + * Slower, but probably more portable variant for the Microsoft operating + * system, using fibers. Ignores the passed stack and allocates it internally. + * Also, due to bugs in cygwin, this does not work with cygwin. * * -DCORO_IRIX * - * SGI's version of Microsoft's NT ;) + * CORO_SJLJ variant. + * For SGI's version of Microsoft's NT ;) * * -DCORO_ASM * - * Handcoded assembly, known to work only on a few architectures/ABI: - * GCC + x86/IA32 and amd64/x86_64 + GNU/Linux and a few BSDs. + * Hand coded assembly, known to work only on a few architectures/ABI: + * GCC + x86/IA32 and amd64/x86_64 + GNU/Linux and a few BSDs. Fastest choice, + * if it works. * * -DCORO_PTHREAD * @@ -139,10 +150,23 @@ extern "C" { * so avoid it at all costs. * * If you define neither of these symbols, coro.h will try to autodetect - * the model. This currently works for CORO_LOSER only. For the other - * alternatives you should check (e.g. using autoconf) and define the - * following symbols: HAVE_UCONTEXT_H / HAVE_SETJMP_H / HAVE_SIGALTSTACK. + * the best/safest model. To help with the autodetection, you should check + * (e.g. using autoconf) and define the following symbols: HAVE_UCONTEXT_H + * / HAVE_SETJMP_H / HAVE_SIGALTSTACK. + */ + +/* + * Changes when the API changes incompatibly. + * This is ONLY the API version - there is no ABI compatibility between releases. + * + * Changes in API version 2: + * replaced bogus -DCORO_LOOSE with grammatically more correct -DCORO_LOSER + * Changes in API version 3: + * introduced stack management (CORO_STACKALLOC) */ +#define CORO_VERSION 3 + +#include <stddef.h> /* * This is the type for the initialization function of a new coroutine. @@ -163,7 +187,7 @@ typedef struct coro_context coro_context; * * Allocating/deallocating the stack is your own responsibility. * - * As a special case, if coro, arg, sptr and ssize are all zero, + * As a special case, if coro, arg, sptr and ssze are all zero, * then an "empty" coro_context will be created that is suitable * as an initial source for coro_transfer. * @@ -174,11 +198,11 @@ void coro_create (coro_context *ctx, /* an uninitialised coro_context */ coro_func coro, /* the coroutine code to be executed */ void *arg, /* a single pointer passed to the coro */ void *sptr, /* start of stack area */ - long ssize); /* size of stack area */ + size_t ssze); /* size of stack area in bytes */ /* * The following prototype defines the coroutine switching function. It is - * usually implemented as a macro, so watch out. + * sometimes implemented as a macro, so watch out. * * This function is thread-safe and reentrant. */ @@ -187,11 +211,11 @@ void coro_transfer (coro_context *prev, coro_context *next); #endif /* - * The following prototype defines the coroutine destroy function. It is - * usually implemented as a macro, so watch out. It also serves - * no purpose unless you want to use the CORO_PTHREAD backend, - * where it is used to clean up the thread. You are responsible - * for freeing the stack and the context itself. + * The following prototype defines the coroutine destroy function. It + * is sometimes implemented as a macro, so watch out. It also serves no + * purpose unless you want to use the CORO_PTHREAD backend, where it is + * used to clean up the thread. You are responsible for freeing the stack + * and the context itself. * * This function is thread-safe and reentrant. */ @@ -199,23 +223,91 @@ void coro_transfer (coro_context *prev, coro_context *next); void coro_destroy (coro_context *ctx); #endif +/*****************************************************************************/ +/* optional stack management */ +/*****************************************************************************/ +/* + * You can disable all of the stack management functions by + * defining CORO_STACKALLOC to 0. Otherwise, they are enabled by default. + * + * If stack management is enabled, you can influence the implementation via these + * symbols: + * + * -DCORO_USE_VALGRIND + * + * If defined, then libcoro will include valgrind/valgrind.h and register + * and unregister stacks with valgrind. + * + * -DCORO_GUARDPAGES=n + * + * libcoro will try to use the specified number of guard pages to protect against + * stack overflow. If n is 0, then the feature will be disabled. If it isn't + * defined, then libcoro will choose a suitable default. If guardpages are not + * supported on the platform, then the feature will be silently disabled. + */ +#ifndef CORO_STACKALLOC +# define CORO_STACKALLOC 1 +#endif + +#if CORO_STACKALLOC + +/* + * The only allowed operations on these struct members is to read the + * "sptr" and "ssze" members to pass it to coro_create, to read the "sptr" + * member to see if it is false, in which case the stack isn't allocated, + * and to set the "sptr" member to 0, to indicate to coro_stack_free to + * not actually do anything. + */ + +struct coro_stack +{ + void *sptr; + size_t ssze; +#if CORO_USE_VALGRIND + int valgrind_id; +#endif +}; + +/* + * Try to allocate a stack of at least the given size and return true if + * successful, or false otherwise. + * + * The size is *NOT* specified in bytes, but in units of sizeof (void *), + * i.e. the stack is typically 4(8) times larger on 32 bit(64 bit) platforms + * then the size passed in. + * + * If size is 0, then a "suitable" stack size is chosen (usually 1-2MB). + */ +int coro_stack_alloc (struct coro_stack *stack, unsigned int size); + +/* + * Free the stack allocated by coro_stack_alloc again. It is safe to + * call this function on the coro_stack structure even if coro_stack_alloc + * failed. + */ +void coro_stack_free (struct coro_stack *stack); + +#endif + /* - * That was it. No other user-visible functions are implemented here. + * That was it. No other user-serviceable parts below here. */ /*****************************************************************************/ -#if !defined(CORO_LOSER) && !defined(CORO_UCONTEXT) \ - && !defined(CORO_SJLJ) && !defined(CORO_LINUX) \ - && !defined(CORO_IRIX) && !defined(CORO_ASM) \ - && !defined(CORO_PTHREAD) -# if defined(WINDOWS) || defined(_WIN32) +#if !defined CORO_LOSER && !defined CORO_UCONTEXT \ + && !defined CORO_SJLJ && !defined CORO_LINUX \ + && !defined CORO_IRIX && !defined CORO_ASM \ + && !defined CORO_PTHREAD && !defined CORO_FIBER +# if defined WINDOWS && (defined __i386 || (__x86_64 || defined _M_IX86 || defined _M_AMD64) +# define CORO_ASM 1 +# elif defined WINDOWS || defined _WIN32 # define CORO_LOSER 1 /* you don't win with windoze */ -# elif defined(__linux) && (defined(__x86) || defined (__amd64)) +# elif __linux && (__i386 || (__x86_64 && !__ILP32)) # define CORO_ASM 1 -# elif defined(HAVE_UCONTEXT_H) +# elif defined HAVE_UCONTEXT_H # define CORO_UCONTEXT 1 -# elif defined(HAVE_SETJMP_H) && defined(HAVE_SIGALTSTACK) +# elif defined HAVE_SETJMP_H && defined HAVE_SIGALTSTACK # define CORO_SJLJ 1 # else error unknown or unsupported architecture @@ -228,7 +320,8 @@ error unknown or unsupported architecture # include <ucontext.h> -struct coro_context { +struct coro_context +{ ucontext_t uc; }; @@ -238,7 +331,7 @@ struct coro_context { #elif CORO_SJLJ || CORO_LOSER || CORO_LINUX || CORO_IRIX # if defined(CORO_LINUX) && !defined(_GNU_SOURCE) -# define _GNU_SOURCE /* for linux libc */ +# define _GNU_SOURCE /* for glibc */ # endif # if !CORO_LOSER @@ -267,7 +360,8 @@ struct coro_context { # define coro_longjmp(env) siglongjmp ((env), 1) # endif -struct coro_context { +struct coro_context +{ coro_jmp_buf env; }; @@ -276,7 +370,8 @@ struct coro_context { #elif CORO_ASM -struct coro_context { +struct coro_context +{ void **sp; /* must be at offset 0 */ }; @@ -291,7 +386,8 @@ coro_transfer (coro_context *prev, coro_context *next); extern pthread_mutex_t coro_mutex; -struct coro_context { +struct coro_context +{ pthread_cond_t cv; pthread_t id; }; @@ -299,6 +395,19 @@ struct coro_context { void coro_transfer (coro_context *prev, coro_context *next); void coro_destroy (coro_context *ctx); +#elif CORO_FIBER + +struct coro_context +{ + void *fiber; + /* only used for initialisation */ + coro_func coro; + void *arg; +}; + +void coro_transfer (coro_context *prev, coro_context *next); +void coro_destroy (coro_context *ctx); + #endif #if __cplusplus