From 0b1730659e1cb0c75b6f65b0918c5ac81e1cae7e Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov.dev@gmail.com>
Date: Thu, 15 Jun 2017 15:44:09 +0300
Subject: [PATCH] vinyl: fix dump bandwidth calculation

We compute dump bandwidth basing on the time it takes a run writing task
to complete. While it used to work when we didn't have data compression
and indexes didn't share in-memory tuples, today the logic behind dump
bandwidth calculation is completely flawed:

 - Due to data compression, the amount of memory we dump may be much
   greater than the amount of data we write to disk, in which case dump
   bandwidth will be underestimated.

 - If a space has several indexes, dumping it may result in writing more
   data than is actually stored in-memory, because tuples of the same
   space are shared among its indexes in-memory, but stored separately
   when written to disk. In this case, dump bandwidth will be
   overestimated.

This results in quota watermark being set incorrectly and, as a result,
either stalling transactions or dumping memory non-stop.

Obviously, to resolve both issues, we need to account memory freed per
unit of time instead of data written to disk. So this patch makes
vy_scheduler_trigger_dump() remember the time when dump was started and
vy_scheduler_complete_dump() update dump bandwidth basing on the amount
of memory dumped and the time dump took.
---
 src/box/vinyl.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/box/vinyl.c b/src/box/vinyl.c
index 887ffcd974..8392abd1ef 100644
--- a/src/box/vinyl.c
+++ b/src/box/vinyl.c
@@ -317,10 +317,9 @@ vy_stat_cursor(struct vy_stat *s, ev_tstamp start, int ops)
 }
 
 static void
-vy_stat_dump(struct vy_stat *s, ev_tstamp time, size_t written,
+vy_stat_dump(struct vy_stat *s, size_t written,
 	     uint64_t dumped_statements)
 {
-	histogram_collect(s->dump_bw, written / time);
 	s->dump_total += written;
 	s->dumped_statements += dumped_statements;
 }
@@ -1494,6 +1493,8 @@ struct vy_scheduler {
 	 * dumping of all in-memory trees whose generation is less.
 	 */
 	int64_t checkpoint_generation;
+	/** Time of dump start. */
+	ev_tstamp dump_start;
 	/**
 	 * List of all in-memory trees, scheduled for dump.
 	 * Older trees are closer to the tail of the list.
@@ -2904,8 +2905,6 @@ struct vy_task {
 	struct diag diag;
 	/** Index this task is for. */
 	struct vy_index *index;
-	/** How long ->execute took, in nanoseconds. */
-	ev_tstamp exec_time;
 	/** Number of bytes written to disk by this task. */
 	size_t dump_size;
 	/** Number of statements dumped to the disk. */
@@ -3934,8 +3933,7 @@ vy_scheduler_f(va_list va)
 			else
 				tasks_done++;
 			if (task->dump_size > 0)
-				vy_stat_dump(env->stat, task->exec_time,
-					     task->dump_size,
+				vy_stat_dump(env->stat, task->dump_size,
 					     task->dumped_statements);
 			vy_task_delete(&scheduler->task_pool, task);
 			scheduler->workers_available++;
@@ -4039,9 +4037,7 @@ vy_worker_f(va_list va)
 		assert(task != NULL);
 
 		/* Execute task */
-		uint64_t start = ev_now(loop());
 		task->status = task->ops->execute(task);
-		task->exec_time = ev_now(loop()) - start;
 		if (task->status != 0) {
 			struct diag *diag = diag_get();
 			assert(!diag_is_empty(diag));
@@ -4142,6 +4138,7 @@ vy_scheduler_trigger_dump(struct vy_scheduler *scheduler)
 	 * all in-memory trees.
 	 */
 	scheduler->generation++;
+	scheduler->dump_start = ev_now(loop());
 }
 
 /** Called on memory dump completion. */
@@ -4159,8 +4156,15 @@ vy_scheduler_complete_dump(struct vy_scheduler *scheduler)
 	lsregion_gc(allocator, scheduler->generation - 1);
 	size_t mem_used_after = lsregion_used(allocator);
 	assert(mem_used_after <= mem_used_before);
-	vy_quota_release(quota, mem_used_before - mem_used_after);
+	size_t mem_dumped = mem_used_before - mem_used_after;
+	vy_quota_release(quota, mem_dumped);
 	ipc_cond_signal(&scheduler->dump_cond);
+
+	/* Account dump bandwidth. */
+	struct vy_stat *stat = scheduler->env->stat;
+	ev_tstamp dump_duration = ev_now(loop()) - scheduler->dump_start;
+	if (dump_duration > 0)
+		histogram_collect(stat->dump_bw, mem_dumped / dump_duration);
 }
 
 /** Check if memory dump is required. */
-- 
GitLab