From f9950440a68b4adbc0e3caee7efa58f1d878eb87 Mon Sep 17 00:00:00 2001 From: Konstantin Osipov <kostja.osipov@gmail.com> Date: Fri, 16 Aug 2019 16:13:42 +0300 Subject: [PATCH] gc: randomie the next checkpoint time also after a manual box.snapshot(). Before this patch, snapshot interval was set randomly within checkpoint_interval period. However, after box.snapshot(), the next snapshot was scheduled exactly checkpoint_interval from the current time. Many orchestration scripts snapshot entire cluster right after deployment, to take a backup. This kills randomness, since all instances begin to count the next checkpoint time from the current time. Randomize the next checkpoint time after a manual snapshot as well. Fixes gh-4432 (cherry picked from commit 6277f48ad8a7213d73e280d5046c5b5872f74e25) --- src/box/gc.c | 15 +++++++++++---- src/box/gc.h | 4 +++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/box/gc.c b/src/box/gc.c index a2c963e0a3..e0df924730 100644 --- a/src/box/gc.c +++ b/src/box/gc.c @@ -404,11 +404,18 @@ gc_checkpoint(void) } /* - * Reset the schedule and wake up the checkpoint daemon - * so that it can readjust. + * Since a user invoked a snapshot manually, this may be + * because he may be not happy with the current randomized + * schedule. Randomize the schedule again and wake up the + * checkpoint daemon so that it * can readjust. + * It is also a good idea to randomize the interval, since + * otherwise many instances running on the same host will + * no longer run their checkpoints randomly after + * a sweeping box.snapshot() (gh-4432). */ - checkpoint_schedule_reset(&gc.checkpoint_schedule, - ev_monotonic_now(loop())); + checkpoint_schedule_cfg(&gc.checkpoint_schedule, + ev_monotonic_now(loop()), + gc.checkpoint_schedule.interval); fiber_wakeup(gc.checkpoint_fiber); if (gc_do_checkpoint() != 0) diff --git a/src/box/gc.h b/src/box/gc.h index 5790ebcc6a..827a5db8eb 100644 --- a/src/box/gc.h +++ b/src/box/gc.h @@ -240,7 +240,9 @@ void gc_add_checkpoint(const struct vclock *vclock); /** - * Make a checkpoint. + * Make a *manual* checkpoint. + * This is entry point for box.snapshot() and SIGUSR1 signal + * handler. * * This function runs engine/WAL callbacks to create a checkpoint * on disk, then tracks the new checkpoint in the garbage collector -- GitLab