diff --git a/src/box/column_mask.h b/src/box/column_mask.h
index 14da841f1e295a593bb2dde54ffa598bd72984a0..9470fe19c05dffd831125d560bedcd92e2b5d2e9 100644
--- a/src/box/column_mask.h
+++ b/src/box/column_mask.h
@@ -123,4 +123,31 @@ column_mask_fieldno_is_set(uint64_t column_mask, uint32_t fieldno)
 	return (column_mask & mask) != 0;
 }
 
+/**
+ * Set a bit with given index @a bitno in a given @a bitmask.
+ * Do nothing when @a bitno is greater than size of bitmask.
+ * @param bitmask Bitmask to test.
+ * @param bitno Bit number to test (must be 0-based).
+ */
+static inline void
+bitmask64_set_bit(uint64_t *bitmask, uint32_t bitno)
+{
+	if (bitno < 64)
+		*bitmask |= ((uint64_t)1 << bitno);
+}
+
+/**
+ * Test a bit in the bitmask corresponding to @a bitno.
+ * @param bitmask Bitmask to test.
+ * @param bitno Bit number to test (must be 0-based).
+ * @return Whether given bit is set in a given bitmask.
+ *         When bitno is greater than size of bitmask returns
+ *         false.
+ */
+static inline bool
+bitmask64_is_bit_set(uint64_t bitmask, uint32_t bitno)
+{
+	return bitno < 64 && (bitmask & ((uint64_t)1 << bitno)) != 0;
+}
+
 #endif
diff --git a/src/box/sql.c b/src/box/sql.c
index 0ab3a506fde404cd2fb01683cd5c37490ca4dcd5..f1df55571b4522cabd6ae081fa7e0ce688987180 100644
--- a/src/box/sql.c
+++ b/src/box/sql.c
@@ -1245,7 +1245,10 @@ vdbe_field_ref_create(struct vdbe_field_ref *field_ref, struct tuple *tuple,
 	const char *field0 = data;
 	field_ref->field_count = mp_decode_array((const char **) &field0);
 	field_ref->slots[0] = (uint32_t)(field0 - data);
-	field_ref->rightmost_slot = 0;
+	memset(&field_ref->slots[1], 0,
+	       field_ref->field_count * sizeof(field_ref->slots[0]));
+	field_ref->slot_bitmask = 0;
+	bitmask64_set_bit(&field_ref->slot_bitmask, 0);
 }
 
 void
diff --git a/src/box/sql.h b/src/box/sql.h
index 9ccecf28c244e9957793fe08eae68e7a69c57013..7644051b4ffe4907dd0d11ab4b88d198234b7c6b 100644
--- a/src/box/sql.h
+++ b/src/box/sql.h
@@ -372,10 +372,13 @@ struct vdbe_field_ref {
 	/** Count of fields in tuple. */
 	uint32_t field_count;
 	/**
-	 * Index of the rightmost initialized slot in slots
-	 * array.
+	 * Bitmask of initialized slots. The fieldno == 0 slot
+	 * must be initialized in vdbe_field_ref constructor.
+	 * This bitmap allows to lookup for the nearest
+	 * initialized slot for a given fieldno to perform as few
+	 * extra tuple decoding as possible.
 	 */
-	uint32_t rightmost_slot;
+	uint64_t slot_bitmask;
 	/**
 	 * Array of offsets of tuple fields.
 	 * Only values <= rightmost_slot are valid.
diff --git a/src/box/sql/vdbe.c b/src/box/sql/vdbe.c
index e096e1f65aa4f915231c2efa3827c6c9cc98be4a..21b585364af8e82c39d7f72ee5bd47da6b12507e 100644
--- a/src/box/sql/vdbe.c
+++ b/src/box/sql/vdbe.c
@@ -606,27 +606,74 @@ vdbe_field_ref_fetch_field(struct vdbe_field_ref *field_ref, uint32_t fieldno)
 }
 
 /**
- * Try to get a current tuple field using its field map.
+ * Find the left closest field for a given fieldno in field_ref's
+ * slot_bitmask. The fieldno is expected to be greater than 0.
+ * @param field_ref The vdbe_field_ref instance to use.
+ * @param fieldno Number of a field to find the nearest left
+ *        neighbor of.
+ * @retval >0 An index of the closest smaller fieldno
+ *            initialized in slot_bitmask.
+ */
+static inline uint32_t
+vdbe_field_ref_closest_slotno(struct vdbe_field_ref *field_ref,
+			      uint32_t fieldno)
+{
+	uint64_t slot_bitmask = field_ref->slot_bitmask;
+	assert(slot_bitmask != 0 && fieldno > 0);
+	uint64_t le_mask = fieldno < 64 ? slot_bitmask & ((1LLU << fieldno) - 1)
+			   : slot_bitmask;
+	assert(bit_clz_u64(le_mask) < 64);
+	return 64 - bit_clz_u64(le_mask) - 1;
+}
+
+/**
+ * Get a tuple's field using field_ref's slot_bitmask, and tuple's
+ * field_map when possible. Required field must be present in
+ * tuple.
  * @param field_ref The vdbe_field_ref instance to use.
  * @param fieldno Number of a field to get.
- * @param[out] field_size Result field size.
  * @retval not NULL MessagePack field.
- * @retval NULL Can not use field_map - it does not contain
- *              offset to @a fieldno.
  */
-static const void *
-vdbe_field_ref_fast_fetch(struct vdbe_field_ref *field_ref, uint32_t fieldno,
-			  uint32_t *field_size)
+static const char *
+vdbe_field_ref_fetch_data(struct vdbe_field_ref *field_ref, uint32_t fieldno)
 {
-	const struct tuple_field *tf =
-		vdbe_field_ref_fetch_field(field_ref, fieldno);
-	if (tf == NULL || tf->offset_slot == TUPLE_OFFSET_SLOT_NIL)
-		return NULL;
-	const char *field = tuple_field(field_ref->tuple, fieldno);
-	const char *end = field;
-	mp_next(&end);
-	*field_size = end - field;
-	return field;
+	if (field_ref->slots[fieldno] != 0)
+		return field_ref->data + field_ref->slots[fieldno];
+
+	const char *field_begin;
+	const struct tuple_field *field = vdbe_field_ref_fetch_field(field_ref,
+								     fieldno);
+	if (field != NULL && field->offset_slot != TUPLE_OFFSET_SLOT_NIL) {
+		field_begin = tuple_field(field_ref->tuple, fieldno);
+	} else {
+		uint32_t prev = vdbe_field_ref_closest_slotno(field_ref,
+							      fieldno);;
+		if (fieldno >= 64) {
+			/*
+			 * There could be initialized slots
+			 * that didn't fit in the bitmask.
+			 * Try to find the biggest initialized
+			 * slot.
+			 */
+			for (uint32_t it = fieldno - 1; it > prev; it--) {
+				if (field_ref->slots[it] == 0)
+					continue;
+				prev = it;
+				break;
+			}
+		}
+		field_begin = field_ref->data + field_ref->slots[prev];
+		for (prev++; prev < fieldno; prev++) {
+			mp_next(&field_begin);
+			field_ref->slots[prev] =
+				(uint32_t)(field_begin - field_ref->data);
+			bitmask64_set_bit(&field_ref->slot_bitmask, prev);
+		}
+		mp_next(&field_begin);
+	}
+	field_ref->slots[fieldno] = (uint32_t)(field_begin - field_ref->data);
+	bitmask64_set_bit(&field_ref->slot_bitmask, fieldno);
+	return field_begin;
 }
 
 static inline enum field_type
@@ -652,45 +699,13 @@ static int
 vdbe_field_ref_fetch(struct vdbe_field_ref *field_ref, uint32_t fieldno,
 		     struct Mem *dest_mem)
 {
-	uint32_t *slots = field_ref->slots;
 	if (fieldno >= field_ref->field_count) {
 		UPDATE_MAX_BLOBSIZE(dest_mem);
 		return 0;
 	}
-
-	const char *data;
-	if (field_ref->rightmost_slot <= fieldno) {
-		uint32_t field_sz;
-		data = vdbe_field_ref_fast_fetch(field_ref, fieldno,
-						 &field_sz);
-		if (data != NULL) {
-			/*
-			 * Special case for tarantool spaces: for
-			 * indexed fields a tuple field map can be
-			 * used. Else there is no sense in
-			 * tuple_field usage, because it makes
-			 * foreach field { mp_next(); } to find
-			 * a field. In such a case sql is
-			 * better - it saves offsets to all fields
-			 * visited in mp_next() cycle.
-			 */
-			uint32_t offset = (uint32_t)(data - field_ref->data);
-			slots[fieldno] = offset;
-			slots[fieldno + 1] = offset + field_sz;
-		} else {
-			uint32_t i = field_ref->rightmost_slot;
-			data = field_ref->data + slots[i];
-			do {
-				mp_next(&data);
-				slots[++i] = (uint32_t)(data - field_ref->data);
-			} while (i <= fieldno);
-			field_ref->rightmost_slot = i;
-		}
-	}
-
 	assert(sqlVdbeCheckMemInvariants(dest_mem) != 0);
+	const char *data = vdbe_field_ref_fetch_data(field_ref, fieldno);
 	uint32_t dummy;
-	data = field_ref->data + slots[fieldno];
 	if (vdbe_decode_msgpack_into_mem(data, dest_mem, &dummy) != 0)
 		return -1;
 
@@ -699,8 +714,9 @@ vdbe_field_ref_fetch(struct vdbe_field_ref *field_ref, uint32_t fieldno,
 	 * Wrap it in a blob verbatim.
 	 */
 	if (dest_mem->flags == 0) {
-		dest_mem->n = slots[fieldno + 1] - slots[fieldno];
 		dest_mem->z = (char *) data;
+		dest_mem->n = vdbe_field_ref_fetch_data(field_ref,
+							fieldno + 1) - data;
 		dest_mem->flags = MEM_Blob | MEM_Ephem | MEM_Subtype;
 		dest_mem->subtype = SQL_SUBTYPE_MSGPACK;
 	}
diff --git a/test/sql/misc.result b/test/sql/misc.result
index 2591d9d75f57f1e6b24fc3ecfe8448f508530f31..a157ddbc1cb577496d28cea16d06d2d04216f704 100644
--- a/test/sql/misc.result
+++ b/test/sql/misc.result
@@ -133,3 +133,70 @@ box.execute('select * from "s"')
 s:drop()
 ---
 ...
+--
+-- gh-4267: Full power of vdbe_field_ref
+-- Tarantool's SQL internally stores data offset for all acceded
+-- fields. It also keeps a bitmask of size 64 with all initialized
+-- slots in actual state to find the nearest left field really
+-- fast and parse tuple from that position. For fieldno >= 64
+-- bitmask is not applicable, so it scans data offsets area in
+-- a cycle.
+--
+-- The test below covers a case when this optimisation doesn't
+-- work and the second lookup require parsing tuple from
+-- beginning.
+---
+format = {}
+---
+...
+t = {}
+---
+...
+for i = 1, 70 do                                                \
+        format[i] = {name = 'FIELD'..i, type = 'unsigned'}      \
+        t[i] = i                                                \
+end
+---
+...
+s = box.schema.create_space('TEST', {format = format})
+---
+...
+pk = s:create_index('pk', {parts = {70}})
+---
+...
+s:insert(t)
+---
+- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
+  23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
+  43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
+  63, 64, 65, 66, 67, 68, 69, 70]
+...
+box.execute('SELECT field70, field64 FROM test')
+---
+- metadata:
+  - name: FIELD70
+    type: unsigned
+  - name: FIELD64
+    type: unsigned
+  rows:
+  - [70, 64]
+...
+-- In the case below described optimization works fine.
+pk:alter({parts = {66}})
+---
+...
+box.execute('SELECT field66, field68, field70 FROM test')
+---
+- metadata:
+  - name: FIELD66
+    type: unsigned
+  - name: FIELD68
+    type: unsigned
+  - name: FIELD70
+    type: unsigned
+  rows:
+  - [66, 68, 70]
+...
+box.space.TEST:drop()
+---
+...
diff --git a/test/sql/misc.test.lua b/test/sql/misc.test.lua
index fdc19f3ac75262c327dd3123ef6d46d63c22c897..541660c61890a23c16aa9633867194ceac5018e6 100644
--- a/test/sql/misc.test.lua
+++ b/test/sql/misc.test.lua
@@ -35,3 +35,32 @@ s = box.schema.space.create('s',{format=format, temporary=true})
 i = s:create_index('i')
 box.execute('select * from "s"')
 s:drop()
+
+--
+-- gh-4267: Full power of vdbe_field_ref
+-- Tarantool's SQL internally stores data offset for all acceded
+-- fields. It also keeps a bitmask of size 64 with all initialized
+-- slots in actual state to find the nearest left field really
+-- fast and parse tuple from that position. For fieldno >= 64
+-- bitmask is not applicable, so it scans data offsets area in
+-- a cycle.
+--
+-- The test below covers a case when this optimisation doesn't
+-- work and the second lookup require parsing tuple from
+-- beginning.
+---
+format = {}
+t = {}
+for i = 1, 70 do                                                \
+        format[i] = {name = 'FIELD'..i, type = 'unsigned'}      \
+        t[i] = i                                                \
+end
+s = box.schema.create_space('TEST', {format = format})
+pk = s:create_index('pk', {parts = {70}})
+s:insert(t)
+box.execute('SELECT field70, field64 FROM test')
+
+-- In the case below described optimization works fine.
+pk:alter({parts = {66}})
+box.execute('SELECT field66, field68, field70 FROM test')
+box.space.TEST:drop()