From 2a7cf7f5c5a5ae1944ec9c34a2c6ca6c2d2998dd Mon Sep 17 00:00:00 2001 From: Vladimir Davydov <vdavydov.dev@gmail.com> Date: Sat, 7 Apr 2018 10:40:05 +0300 Subject: [PATCH] vinyl: fix crash if index is dropped while read task is in progress If a fiber waiting for a read task to complete is cancelled, it will leave the read iterator immediately, leaving the read task pending. If the index is dropped before the read task is complete, the task will attempt to dereference a deleted run upon completion: 0 0x560b4007dbbc in print_backtrace+9 1 0x560b3ff80a1d in _ZL12sig_fatal_cbiP9siginfo_tPv+1e7 2 0x7f52b09190c0 in __restore_rt+0 3 0x7f52af6ea30a in bzero+5a 4 0x560b3ffc7a99 in mempool_free+2a 5 0x560b3ffcaeb7 in vy_page_read_cb_free+47 6 0x560b400806a2 in cbus_call_done+3f 7 0x560b400805ea in cmsg_deliver+30 8 0x560b40080e4b in cbus_process+51 9 0x560b4003046b in _ZL10tx_prio_cbP7ev_loopP10ev_watcheri+2b 10 0x560b4023d86e in ev_invoke_pending+ca 11 0x560b4023e772 in ev_run+5a0 12 0x560b3ff822dc in main+5ed 13 0x7f52af6862b1 in __libc_start_main+f1 14 0x560b3ff801da in _start+2a 15 (nil) in +2a Fix this by elevating the run reference counter per each read task. Note, currently we use vy_run::refs not only as a reference counter, but also as a counter of slices created for the run - see how we compare it to vy_run::compacted_slice_count in vy_task_compact_complete(). This isn't going to work anymore, obviously. Now we need to count slices created per each run in a separate counter, vy_run::slice_count. Anyway, it was a rather dubious hack to abuse reference counter for counting slices and it's good to finally get rid of it. --- src/box/vy_run.c | 9 ++++++++- src/box/vy_run.h | 7 +++++-- src/box/vy_scheduler.c | 2 +- test/vinyl/errinj.result | 18 ++++++++++++++++++ test/vinyl/errinj.test.lua | 7 +++++++ 5 files changed, 39 insertions(+), 4 deletions(-) diff --git a/src/box/vy_run.c b/src/box/vy_run.c index 08d0761bf1..a41e9a057e 100644 --- a/src/box/vy_run.c +++ b/src/box/vy_run.c @@ -371,6 +371,7 @@ vy_slice_new(int64_t id, struct vy_run *run, slice->id = id; slice->run = run; vy_run_ref(run); + run->slice_count++; if (begin != NULL) tuple_ref(begin); slice->begin = begin; @@ -424,6 +425,8 @@ void vy_slice_delete(struct vy_slice *slice) { assert(slice->pin_count == 0); + assert(slice->run->slice_count > 0); + slice->run->slice_count--; vy_run_unref(slice->run); if (slice->begin != NULL) tuple_unref(slice->begin); @@ -910,8 +913,10 @@ static int vy_page_read_cb_free(struct cbus_call_msg *base) { struct vy_page_read_task *task = (struct vy_page_read_task *)base; + struct vy_run_env *env = task->run->env; vy_page_delete(task->page); - mempool_free(&task->run->env->read_task_pool, task); + vy_run_unref(task->run); + mempool_free(&env->read_task_pool, task); return 0; } @@ -970,6 +975,7 @@ vy_run_iterator_load_page(struct vy_run_iterator *itr, uint32_t page_no, task->run = slice->run; task->page_info = *page_info; task->page = page; + vy_run_ref(task->run); /* Post task to the reader thread. */ rc = cbus_call(&reader->reader_pipe, &reader->tx_pipe, @@ -978,6 +984,7 @@ vy_run_iterator_load_page(struct vy_run_iterator *itr, uint32_t page_no, if (!task->base.complete) return -1; /* timed out or cancelled */ + vy_run_unref(task->run); mempool_free(&env->read_task_pool, task); if (rc != 0) { diff --git a/src/box/vy_run.h b/src/box/vy_run.h index 6973ee2d30..b3e3d1c6ff 100644 --- a/src/box/vy_run.h +++ b/src/box/vy_run.h @@ -130,15 +130,18 @@ struct vy_run { /** * Run reference counter, the run is deleted once it hits 0. * A new run is created with the reference counter set to 1. - * A run is referenced by each slice created for it. + * A run is referenced by each slice created for it and each + * pending read or write task. */ int refs; + /** Number of slices created for this run. */ + int slice_count; /** * Counter used on completion of a compaction task to check if * all slices of the run have been compacted and so the run is * not used any more and should be deleted. */ - int64_t compacted_slice_count; + int compacted_slice_count; /** * Link in the list of runs that became unused * after compaction. diff --git a/src/box/vy_scheduler.c b/src/box/vy_scheduler.c index b6225ace6a..4954afe75d 100644 --- a/src/box/vy_scheduler.c +++ b/src/box/vy_scheduler.c @@ -1102,7 +1102,7 @@ vy_task_compact_complete(struct vy_scheduler *scheduler, struct vy_task *task) } for (slice = first_slice; ; slice = rlist_next_entry(slice, in_range)) { run = slice->run; - if (run->compacted_slice_count == run->refs) + if (run->compacted_slice_count == run->slice_count) rlist_add_entry(&unused_runs, run, in_unused); slice->run->compacted_slice_count = 0; if (slice == last_slice) diff --git a/test/vinyl/errinj.result b/test/vinyl/errinj.result index e7c9bc71ce..9135108665 100644 --- a/test/vinyl/errinj.result +++ b/test/vinyl/errinj.result @@ -290,9 +290,27 @@ s:select() - [9, 'test str9'] - [10, 'test str10'] ... +-- index is dropped while a read task is in progress +errinj.set("ERRINJ_VY_READ_PAGE_TIMEOUT", 0.05) +--- +- ok +... +f1 = fiber.create(test_cancel_read) +--- +... +fiber.cancel(f1) +--- +... s:drop() --- ... +fiber.sleep(0.1) +--- +... +errinj.set("ERRINJ_VY_READ_PAGE_TIMEOUT", 0); +--- +- ok +... box.cfg{vinyl_cache = vinyl_cache} --- ... diff --git a/test/vinyl/errinj.test.lua b/test/vinyl/errinj.test.lua index 7655a57cc4..9724a69b82 100644 --- a/test/vinyl/errinj.test.lua +++ b/test/vinyl/errinj.test.lua @@ -101,7 +101,14 @@ errinj.set("ERRINJ_VY_READ_PAGE_TIMEOUT", 0); errinj.set("ERRINJ_VY_READ_PAGE", false); s:select() +-- index is dropped while a read task is in progress +errinj.set("ERRINJ_VY_READ_PAGE_TIMEOUT", 0.05) +f1 = fiber.create(test_cancel_read) +fiber.cancel(f1) s:drop() +fiber.sleep(0.1) +errinj.set("ERRINJ_VY_READ_PAGE_TIMEOUT", 0); + box.cfg{vinyl_cache = vinyl_cache} -- gh-2871: check that long reads are logged -- GitLab