Skip to content
Snippets Groups Projects
Commit 98a76e1d authored by Roman Tsisyk's avatar Roman Tsisyk
Browse files

Fix #167: Replica can't find next xlog file if there is a gap in LSN

parent 364be3b3
No related branches found
No related tags found
No related merge requests found
......@@ -403,7 +403,7 @@ recover_remaining_wals(struct recovery_state *r)
if (r->current_wal != NULL)
goto recover_current_wal;
while (r->confirmed_lsn < wal_greatest_lsn) {
while (current_lsn <= wal_greatest_lsn) {
/*
* If a newer WAL appeared in the directory before
* current_wal was fully read, try re-reading
......@@ -421,8 +421,6 @@ recover_remaining_wals(struct recovery_state *r)
}
}
/* TODO: find a better way of finding the next xlog */
current_lsn = r->confirmed_lsn + 1;
/*
* For the last WAL, first try to open .inprogress
* file: if it doesn't exist, we can safely try an
......@@ -458,8 +456,18 @@ recover_remaining_wals(struct recovery_state *r)
say_warn("unlink broken %s WAL", filename);
if (inprogress_log_unlink(filename) != 0)
panic("can't unlink 'inprogres' WAL");
result = 0;
break;
}
/* Missing xlog or gap in LSN */
say_error("not all WALs have been successfully read");
if (!r->wal_dir->panic_if_error) {
/* Ignore missing WALs */
say_warn("ignoring missing WALs");
current_lsn++;
continue;
}
result = 0;
result = -1;
break;
}
assert(r->current_wal == NULL);
......@@ -491,6 +499,8 @@ recover_remaining_wals(struct recovery_state *r)
r->confirmed_lsn);
log_io_close(&r->current_wal);
}
current_lsn = r->confirmed_lsn + 1;
}
/*
......@@ -498,7 +508,7 @@ recover_remaining_wals(struct recovery_state *r)
* we lose some logs it is a fatal error.
*/
if (wal_greatest_lsn > r->confirmed_lsn + 1) {
say_error("not all WALs have been successfully read");
say_error("can't recover WALs");
result = -1;
}
......
......@@ -48,3 +48,34 @@ lua #box.space[0]
---
- 0
...
lua box.space[0]:insert{1, 'first tuple'}
---
- 1: {'first tuple'}
...
lua box.space[0]:insert{2, 'second tuple'}
---
- 2: {'second tuple'}
...
lua box.space[0]:insert{3, 'third tuple'}
---
- 3: {'third tuple'}
...
lua box.space[0]:insert{4, 'fourth tuple'}
---
- 4: {'fourth tuple'}
...
lua box.space[0]:select(0, 1)
---
- 1: {'first tuple'}
...
lua box.space[0]:select(0, 2)
---
- 2: {'second tuple'}
...
lua box.space[0]:select(0, 3)
---
...
lua box.space[0]:select(0, 4)
---
- 4: {'fourth tuple'}
...
......@@ -3,6 +3,7 @@
import os
from os.path import abspath
import shutil
import yaml
# cleanup vardir
server.stop()
......@@ -123,6 +124,36 @@ exec admin "lua box.space[0]:select(0, 1)"
exec admin "lua box.space[0]:select(0, 2)"
exec admin "lua #box.space[0]"
#
# gh-167: Replica can't find next xlog file if there is a gap in LSN
#
server.stop()
server.cfgfile_source = "box/panic_on_wal_error.cfg"
server.deploy()
# Create wal#1
exec admin "lua box.space[0]:insert{1, 'first tuple'}"
exec admin "lua box.space[0]:insert{2, 'second tuple'}"
lsn = yaml.load(server.admin.execute("lua box.info.lsn", True))[0] + 1
wal = os.path.join(server.vardir, str(lsn).zfill(20) + ".xlog")
server.stop()
server.start()
exec admin "lua box.space[0]:insert{3, 'third tuple'}"
server.stop()
server.start()
exec admin "lua box.space[0]:insert{4, 'fourth tuple'}"
server.stop()
# Remove xlog with {3, 'third tuple'} to simulate gap in LSNs
os.unlink(wal)
server.start()
exec admin "lua box.space[0]:select(0, 1)"
exec admin "lua box.space[0]:select(0, 2)"
# missing tuple from removed xlog
exec admin "lua box.space[0]:select(0, 3)"
exec admin "lua box.space[0]:select(0, 4)"
# cleanup
server.stop()
server.deploy()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment