Howard Chu writes:
Thanks, confirmed. In a nested rebalance, doing a page merge with the neighbor on the left was discarding the rebalanced state. Fixed now in mdb.master
Nope. 69edafe28aef03b965c3d911be9ab8e340f914e1 "ITS#7829 more for prev commit" breaks test054-syncreplication-parallel-load in RE24 (with normal pagesize), every 2nd run or so.
Running ./scripts/test054-syncreplication-parallel-load for mdb... running defines.sh Starting provider slapd on TCP/IP port 9011... Using ldapsearch to check that provider slapd is running... Using ldapadd to create the context prefix entry in the provider... Starting consumer slapd on TCP/IP port 9014... Using ldapsearch to check that consumer slapd is running... Using ldapadd to populate the provider directory... Waiting 7 seconds for syncrepl to receive changes... Stopping the provider, sleeping 10 seconds and restarting it... Using ldapsearch to check that provider slapd is running... Waiting 10 seconds to let the system catch up Using ldapmodify to modify provider directory... ldapmodify failed (80)!
slapd.1.log says:
533bbaa2 mdb_modrdn: new ndn=cn=rosco p. coltrane,ou=retired,ou=people,dc=example,dc=com 533bbaa2 => mdb_dn2id("cn=rosco p. coltrane,ou=retired,ou=people,dc=example,dc=com") 533bbaa2 <= mdb_dn2id: get failed: MDB_NOTFOUND: No matching key/data pair found (-30798) 533bbaa2 => mdb_dn2id_delete 0x2a 533bbaa2 <= mdb_dn2id_delete 0x2a: -30798 533bbaa2 <=- mdb_modrdn: dn2id del failed: MDB_NOTFOUND: No matching key/data pair found (-30798) 533bbaa2 send_ldap_result: conn=1002 op=8 p=3 533bbaa2 send_ldap_result: err=80 matched="" text="DN index delete fail"
This dn2id.c assert() would catch it. Passing -MDB_SET to catch it in a patched liblmdb before that.
diff --git a/servers/slapd/back-mdb/dn2id.c b/servers/slapd/back-mdb/dn2id.c index ceacb17..8cb9d91 100644 --- a/servers/slapd/back-mdb/dn2id.c +++ b/servers/slapd/back-mdb/dn2id.c @@ -243,3 +243,4 @@ mdb_dn2id_delete( do { - rc = mdb_cursor_get( mc, &key, &data, MDB_SET ); + rc = mdb_cursor_get( mc, &key, &data, -MDB_SET ); + assert(!rc); if ( !rc ) {
mdb.c fails at 1st assert, the rest is for handling -MDB_SET:
diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 10a8358..71b0b93 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -5348,2 +5348,3 @@ mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data, rc = mc->mc_dbx->md_cmp(key, &nodekey); + mdb_cassert(mc, op != (MDB_cursor_op)-MDB_SET || rc >= 0 || mc->mc_top); if (rc == 0) { @@ -5460,3 +5461,3 @@ set1: if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { - if (op == MDB_SET || op == MDB_SET_KEY || op == MDB_SET_RANGE) { + if (op == MDB_SET || op == (MDB_cursor_op)-MDB_SET || op == MDB_SET_KEY || op == MDB_SET_RANGE) { rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL); @@ -5651,2 +5652,3 @@ mdb_cursor_get(MDB_cursor *mc, MDB_val *key, MDB_val *data, case MDB_SET_RANGE: + case (MDB_cursor_op)-MDB_SET: if (key == NULL) { @@ -5747,2 +5749,3 @@ fetchm: DPRINTF(("unhandled/unimplemented cursor operation %u", op)); + mdb_cassert(mc, 0); /* just checking that we never get here */ rc = EINVAL;
#2 mdb_assert_fail () #3 mdb_cursor_set (mc=0x7fabb4102bb0, key=0x7fabbf276260, data=0x7fabbf276250, op=4294967281, exactp=0x7fabbf2761f0) at mdb.c:5349 #4 mdb_cursor_get (mc=0x7fabb4102bb0, key=0x7fabbf276260, data=0x7fabbf276250, op=4294967281) at mdb.c:5657 #5 mdb_dn2id_delete (op=0x7fabb4000960, mc=0x7fabb4102bb0, id=42, nsubs=1) at dn2id.c:244 #6 mdb_modrdn (op=0x7fabb4000960, rs=0x7fabbf276910) at modrdn.c:476 #7 overlay_op_walk (op=0x7fabb4000960, rs=0x7fabbf276910, which=op_modrdn, oi=0x1c2c120, on=0x0) at backover.c:671 #8 over_op_func (op=0x7fabb4000960, rs, which) at backover.c:723 #9 fe_op_modrdn (op=0x7fabb4000960, rs=0x7fabbf276910) at modrdn.c:314 #10 do_modrdn (op=0x7fabb4000960, rs=0x7fabbf276910) at modrdn.c:186 #11 connection_operation (ctx=0x7fabbf276a70, arg_v=0x7fabb4000960) at connection.c:1155 #12 connection_read_thread (ctx=0x7fabbf276a70, argv) at connection.c:1291 #13 ldap_int_thread_pool_wrapper (xpool=0x1bd7370) at tpool.c:688
(gdb) frame 3 #3 mdb_cursor_set (mc=0x7fabb4102bb0, key=0x7fabbf276260, data=0x7fabbf276250, op=4294967281, exactp=0x7fabbf2761f0) at mdb.c:5349 5349 mdb_cassert(mc, op != (MDB_cursor_op)-MDB_SET || rc >= 0 || mc->mc_top); (gdb) info locals nodekey = {mv_size = 8, mv_data = 0x7fabb01092da} rc = -8 mp = 0x7fabb0108310 leaf = 0x7fabb01092d2 __FUNCTION__ = "mdb_cursor_set" (gdb) set output-radix 16 (gdb) p *mp $1 = {mp_p = {p_pgno = 0x51, p_next = 0x51}, mp_pad = 0x0, mp_flags = 0x12, mp_pb = {pb = {pb_lower = 0x44, pb_upper = 0xa2a}, pb_pages = 0xa2a0044}, mp_ptrs = {0xfc2}} (gdb) p *leaf $2 = {mn_lo = 0x2e, mn_hi = 0x0, mn_flags = 0x0, mn_ksize = 0x8, mn_data = "\020"} (gdb) p *(MDB_ID*)key->mv_data $3 = 0x8 (gdb) p *(MDB_ID*)nodekey.mv_data $4 = 0x10