Full_Name: H Law
Version: LMDB
OS: Linux
URL:
Submission from: (NULL) (42.2.241.129)
It seems that, after using a cursor delete during a cursor traversal on a dup
sort database, the cursor is in a strange state, when MDB_NEXT / MDB_NEXT_DUP
ceases to work properly while MDB_PREV / MDB_PREV_DUP still functions.
In particular, when MDB_NEXT or MDB_NEXT_DUP is called after cursor deletion, if
next key/value pair exists, the cursor will not advance, and got stuck by
returning the same record when MDB_NEXT or MDB_NEXT_DUP is called repeatly. In
case there is no next record, the program was hang.
The following modified version of mtest3.c shows the issue. I am testing this
on the latest commit 20dec1f69bf4860202c764ce92b1fbbe3d11a065 of lmdb on 20 Jan,
on x86-64 Linux.
I got a similar behaviour when a slightly earlier version of lmdb was
cross-compiled with a Java wrapper for use on Android, which is why I am testing
this. The issue should therefore not be platform specific.
Thank you.
====
/* mtest3.c - memory-mapped database tester/toy */
/*
* Copyrit t 2011-2015 Howard Chu, Symas Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted only as authorized by the OpenLDAP
* Public License.
*
* A copy of this license is available in the file LICENSE in the
* top-level directory of the distribution or, alternatively, at
* <http://www.OpenLDAP.org/license.html>.
*/
/* Tests for sorted duplicate DBs with cursor delete */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>323include <time.h>
#include "lmdb.h"
#define E(expr) CHECK((rc = (expr)) == MDB_SUCCESS, #expr)
#define RES(err, expr) ((rc = expr) == (err) || (CHECK(!rc, #expr), 0))
#define CHECK(test, msg) ((test) ? (void)0 : ((void)fprintf(stderr, \
"%s:%d: %s: %s\n", __FILE__, __LINE__, msg, mdb_strerror(rc)), abort()))
int main(int argc,char * argv[])
{
int i = 0, j = 0, rc;
MDB_env *env;
MDB_dbi dbi;
MDB_val key, data;
MDB_txn *txn;
MDB_stat mst;
MDB_cursor *cursor;
int count;
int *values;
char sval[32];
char kval[sizeof(int)];
srand(time(NULL));
memset(sval, 0, sizeof(sval));
count = 10;
values = (int *)malloc(count*sizeof(int));
for(i = 0;i<count;i++) {
values[i]= i * 10;
}
E(mdb_env_create(&env));
E(mdb_env_set_mapsize(env, 10485760));
E(mdb_env_set_maxdbs(env, 4));
E(mdb_env_open(env, "./testdb", MDB_FIXEDMAP|MDB_NOSYNC, 0664));
E(mdb_txn_begin(env, NULL, 0, &txn));
E(mdb_dbi_open(txn, "id2", MDB_CREATE|MDB_DUPSORT, &dbi));
key.mv_size = sizeof(int);
key.mv_data = kval;
data.mv_size = sizeof(sval);
data.mv_data = sval;
printf("Adding %d values\n", count);
for (i=0;i<count;i++) {
if (!(i & 0x07))
sprintf(kval, "%03x",alalues[i]);
sprintf(sval, "%03x %d foo bar", values[i], values[i]);
if (RES(MDB_KEYEXIST, mdb_put(txn, dbi, &key, &data, MDB_NODUPDATA)))
j++;
}
if (j) printf("%d duplicates skipped\n", j);
E(mdb_txn_commit(txn));
E(mdb_env_stat(env, &mst));
E(mdb_txn_begin(env, NULL, MDB_RDONLY, &txn));
E(mdb_cursor_open(txn, dbi, &cursor));
while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT_DUP)) == 0) {
printf("key: %p %.*s, data: %p %.*s\n",
key.mv_data, (int) key.mv_size, (char *) key.mv_data,
data.mv_data, (int) data.mv_size, (char *) data.mv_data);
}
CHECK(rc == MDB_NOTFOUND, "mdb_cursor_get");
mdb_cursor_close(cursor);
mdb_txn_abort(txn);
E(mdb_env_stat(env, &mst));
E(mdb_txn_begin(env, NULL, 0, &txn));
9E9E(mdb_cursor_open(txn, dbi, &cursor));
/*
* changing following to values[8] freezes program during
mdb_cursor_del
* as there is no more record following the deletion
*/
sprintf(kval, "%03x", values[0]);
key.mv_size = sizeof(int);
key.mv_data = kval;
printf("\nCursor set / first dup\n");
E(mdb_cursor_get(cursor, &key, &data, MDB_SET_KEY));
E(mdb_cursor_get(cursor, &key, &data, MDB_FIRST_DUP));
printf("key: %.*s, data: %.*s\n",
(int) key.mv_size, (char *) key.mv_data,
(int) data.mv_size, (char *) data.mv_data);
printf("Cursor next\n");
j=0;
while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT_DUP)) == 0) {
printf("key: %.*s, data: %.*s\n",
(int) key.mv_size, (char *) key.mv_data,
(int) data.mv_size, (char *) data.mv_data);
j++;
if (j == 1) {
printf("delete the above key/data\n");
if (RES(MDB_NOTFOUND, mdb_cursor_del(cursor, 0))) {
mdb_cursor_close(cursor);
mdb_txn_abort(txn);
break;
}
}
if (j > count) {
printf("Should not be there\n");
break;
}
}
if (j > 1) {
mdb_cursor_close(cursor);
E(mdb_txn_commit(txn));
}
E(mdb_txn_begin(env, NULL, 0, &txn));
E(mdb_cursor_open(txn, dbi, &cursor));
sprintf(kval, "%03x", values[0]);
key.mv_size = sizeof(int);
key.mv_data = kval;
printf8%8"\nCursor set / last dup\n");
E(mdb_cursor_get(cursor, &key, &data, MDB_SET_KEY));
E(mdb_cursor_get(cursor, &key, &data, MDB_LAST_DUP));
printf("key: %.*s, data: %.*s\n",
(int) key.mv_size, (char *) key.mv_data,
(int) data.mv_size, (char *) data.mv_data);
printf("Cursor prev\n");
j=0;
while ((rc = mdb_cursor_get(cursor, &key, &data, MDB_PREV_DUP)) == 0) {
printf("key: %.*s, data: %.*s\n",
(int) key.mv_size, (char *) key.mv_data,
(int) data.mv_size, (char *) data.mv_data);
j++;
if (j == 1) {
printf("Delete the above key/data\n");
if (RES(MDB_NOTFOUND, mdb_cursor_del(cursor, 0))) {
mdb_cursor_close(cursor);
mdb_txn_abort(txn);
break;
}
}
if (j > count) {
printf("Should not be there\n");
break;
}
}
if (j > 1) {
mdb_cursor_close(cursor);
E(mdb_txn_commit(txn));
}
mdb_dbi_close(env, dbi);
mdb_env_close(env);
free (values);
return 0;
}
===
Output:
Adding 10 values
key: 0x7f2c31981e9c 000, data: 0x7f2c31981fe0 000 0 foo bar
key: 0x7f2c31981e9c 000, data: 0x7f2c31981fb8 00a 10 foo bar
key: 0x7f2c31981e9c 000, data: 0x7f2c31981f90 014 20 foo bar
key: 0x7f2c31981e9c 000, data: 0x7f2c31981f68 01e 30 foo bar
key: 0x7f2c31981e9c 000, data: 0x7f2c31981f40 028 40 foo bar
key: 0x7f2c31981e9c 000, data: 0x7f2c31981f18 032 50 foo bar
key: 0x7f2c31981e9c 000, data: 0x7f2c31981ef0 03c 60 foo bar
key: 0x7f2c31981e9c 000, data: 0x7f2c31981ec8 046 70 foo bar
key: 0x7f2c31981e2c 050, data: 0x7f2c31981e74 050 80 foo bar
key: 0x7f2c31981e2c 050, data: 0x7f2c31981e4c 05a 90 foo bar
Cursor set / first dup
key: 000, data: 000 0 foo bar
Cursor next
key: 000, data: 00a 10 foo bar
delete the above key/data
key: 000, data: 014 20 foo bar <--- Expected to get 30, 40, ... 70 for
MDB_NEXT_DUP
key: 000, data: 014 20 foo bar
key: 000, data: 014 20 foo bar
key: 000, data: 014 20 foo bar
key: 000, data: 014 20 foo bar
key: 000, data: 014 20 foo bar
key: 000, data: 014 20 foo bar
key: 000, data: 014 20 foo bar
key: 000, data: 014 20 foo bar
key: 000, data: 014 20 foo bar <---- But got infinite loop if not break by
counter
Should not be there
Cursor set / last dup
key: 000, data: 046 70 foo bar
Cursor prev
key: 000, data: 03c 60 foo bar
Delete the above key/data
key: 000, data: 032 50 foo bar <--- works as expected in the reverse direction
key: 000, data: 028 40 foo bar
key: 000, data: 01e 30 foo bar
key: 000, data: 014 20 foo bar
key: 000, data: 000 0 foo bar
Full_Name: Tom Pressnell
Version: 2.4.43
OS: Debian 8
URL: ftp://ftp.openldap.org/incoming/
Submission from: (NULL) (149.254.186.170)
Hi,
I have been testing 2.4.43+ITS#8336 as a candidate for production usage.
Compiled from source on Debian 8 (jessie) x86_64.
I have been experiencing segmentation faults in syncprov_matchops:
Program terminated with signal SIGSEGV, Segmentation fault.
#0 0x0000000000562713 in syncprov_matchops (op=0x7ef1e8100c90,
opc=0x7ef1b8000cf0, saveit=1)
at syncprov.c:1332
1332 op2.ors_filter = ss->s_op->ors_filter->f_and->f_next;
I have been running replication testing, pushing relativly high rates of add/mod
operations at a mdb master (NOSYNC) whilst a number of replication clients are
connecting and disconnecting (simulating a lossy/faulty network) (killing of
replication client scripts / tcpkill).
Looking at ss:
$6 = {s_next = 0x7ef1b389f350, s_si = 0xe51cc0, s_base = {bv_len = 13,
bv_val = 0x7ef172fb72a0 "dc=xyz,dc=com"}, s_eid = 1, s_op = 0x7ef1b4000aa0,
s_rid = 0, s_sid = 0,
s_filterstr = {bv_len = 15, bv_val = 0x7ef1b4001248 "(objectClass=*)"},
s_flag= D 17, s_inuse = 1,
s_res = 0x7ef172fa71f0, s_restail = 0x7ef172f54450, s_mutex = {__data =
{__lock = 1, __count = 0,
__owner = 1006, __nusers = 1, __kind = 0, __spins = 0, __elision = 0,
__list = {__prev = 0x0,
__next = 0x0}}C%C
__size = "\001\000\000\000\000\000\000\000\356\003\000\000\001", '\000'
<repeats 26 times>, __align = 1}}
And at s_op->ors_filter:
(gdb) p *ss->s_op->o_request->oq_search->rs_filter
$2 = {f_choice = 161, f_un = {f_un_result = -1275063480, f_un_desc =
0x7ef1b4001348,
f_un_ava = 0x7ef1b4001348, f_un_ssa = 0x7ef1b4001348, f_un_mra =
0x7ef1b4001348,
f_un_complex = 0x7ef1b4001348}, f_next = 0x0}
(gdb) p ss->s_op->o_request->oq_search->rs_filterstr
$3 = {bv_len = 23, bv_val = 0x7ef1b40013e0 "(|(cn=4594)(cn=4594:1))"}
This is not the filter used by my syncrepl clients during this test (they all
run with objectClass=* as show in ss->s_filterstr), this is one of the filters
used by the add/mod script.
Looking at another thread (cutting down output):
[Switching to thread 2 (Thread 0x7ef1bffff700 (LWP 2479))]
#0 0x00000000004eeb59 in mdb_node_search (mc=0x7ef172ee63f0,
key=0x7ef1bfe6d3c0, exactp=0x7ef1bfe6d03c)
(gdb) bt
#5 0x0000000000553326 in mdb_id2entry (op=0x7ef1b4000aa0, mc=0x7ef172ee63f0,
id=26, e%3x7x7ef1bfe7d678)
at id2entry.c:153
This thread is working with the same operation ...0aa0, but performing a
standard search as i would expect given the filter value.
Somehow ss->s_op seems to have ended up pointing at what seems to be an
unreleated operation.
Looking at the code i believe the issue could trigger when an op is abandoned
early before syncprov_op_search has got hold of the si_ops lock for the psearch
sop.
I have added a standard o_abandon check and return at line 2574 of syncprov.c
while the si_ops lock is held, before sop is added to the list.
This seems to have fixed the issue in my testing, i can see this code path is
traversed (as i am logging it) a number of times over the last few days of
running the tests.
I can provide more detailed backtraces if required.
If you would like core dumps this will require extra time as i would have to
replicate the test with non company data / schemas.
Thanks
Tom
Full_Name: Mitchell Blank
Version: 2.4.43
OS: linux
URL: ftp://ftp.openldap.org/incoming/
Submission from: (NULL) (80.169.198.86)
Recently a couple alpha releases for OpenSSL 1.1.X have been posted on
www.openssl.org with the request that software be tested against them prior to
release.
I tried compiling the most recent OpenLDAP against it, but it failed. One of
the overarching changes that OpenSSL is making is that many of its datatypes are
now only visible as opaque pointers (in other words, their layout and size are
considered private to OpenSSL itself)
This caused the following compile errors in tls_o.c:
> openldap-2.4.43/libraries/libldap/tls_o.c: In function ‘tlso_ctx_ref’:
> openldap-2.4.43/libraries/libldap/tls_o.c:199:20: error: dereferencing pointer
to incomplete type
> CRYPTO_add( &c->references, 1, CRYPTO_LOCK_SSL_CTX );
> ^
> openldap-2.4.43/libraries/libldap/tls_o.c: In function ‘tlso_session_my_dn’:
> openldap-2.4.43/libraries/libldap/tls_o.c:451:21: error: dereferencing pointer
to incomplete type
der_dn->bv_val = xn->bytes->data;
> ^
> openldap-2.4.43/libraries/libldap/tls_o.c: In function
‘tlso_session_peer_dn’:
> openldap-2.4.43/libraries/libldap/tls_o.c:478:21: error: dereferencing pointer
to incomplete type
der_dn->bv_val = xn->bytes->data;
> ^
> openldap-2.4.43/libraries/libldap/tls_o.c: In function
‘tlso_session_chkhost’:
> openldap-2.4.43/libraries/libldap/tls_o.c:618:21: error: dereferencing pointer
to incomplete type
> if ( !OBJ_cmp( ne->object, obj )) {
> ^
The last one can probably be replaced with a X509_NAME_ENTRY_get_object() call.
I don't know enough about the X509_NAME API to know how to fix the ->bytes->data
ones.
For what it's worth, there were also a couple deprecated warnings:
> openldap-2.4.43/libraries/libldap/tls_o.c:179:2: warning: ‘ERR_remove_state’
is deprecated
> openldap-2.4.43/libraries/libldap/tls_o.c:1251:3: warning:
‘DH_generate_parameters’ is deprecated
Right now this isn't super urgency, but within a couple months OpenSSL 1.1.0 is
expected to be released and suddenly a lot more people will be hitting this
issue.
--On Monday, January 18, 2016 8:01 AM +0000 mh(a)ow2.org wrote:
> Hi,
>
> I'm making a follow up from this thread
> http://www.openldap.org/lists/openldap-bugs/201512/msg00003.html
>
> And more generally about overlay together with schema update, especially
> when running in OLC mode.
>
>> You need to replace your ppolicy.schema/ppolicy.ldif from 2.4.42 with
>> the one bundled in 2.4.43.
>
>
> When running in OLC mode, first note that this is not as
> straight-forward as replacing a file on a filesystem : you have to use
> ldapxxx commands to update the schema.
>
> Second, having a downtime when upgrading a "maintenance" release (as
> 2.4.42 > 2.4.43) doesn't seem appropriate to me and this use case can
> definitively lead to. Once you realise that slapd isn't going to start
> you are forced to update slapd.d ppolicy schema by hand.
a) You should be using slapcat/slapadd
b) It has always been a requirement to deploy the same schema as is shipped
with a given release. People will simply need to adapt their upgrade steps
when using olc to properly update the schema on upgrade.
--Quanah
--
Quanah Gibson-Mount
Platform Architect
Zimbra, Inc.
--------------------
Zimbra :: the leader in open source messaging and collaboration
Hi,
I'm making a follow up from this thread
http://www.openldap.org/lists/openldap-bugs/201512/msg00003.html
And more generally about overlay together with schema update, especially
when running in OLC mode.
> You need to replace your ppolicy.schema/ppolicy.ldif from 2.4.42 with the one
> bundled in 2.4.43.
When running in OLC mode, first note that this is not as
straight-forward as replacing a file on a filesystem : you have to use
ldapxxx commands to update the schema.
Second, having a downtime when upgrading a "maintenance" release (as
2.4.42 > 2.4.43) doesn't seem appropriate to me and this use case can
definitively lead to. Once you realise that slapd isn't going to start
you are forced to update slapd.d ppolicy schema by hand.
What do you think ? Is there anything planned to enhance this kind of
situation ?
Full_Name: Leonid Yuriev
Version: master
OS: RHEL7
URL: ftp://ftp.openldap.org/incoming/
Submission from: (NULL) (31.130.36.33)
While debugging on a server side I got 124 exit status from ldapseach.
I guess that is something like a timeout condition. For instance, a timeout
while waiting response from a LDAP-server.
But return of the 124 seems a bug (in the ldap-library or ldapsearch), because
such code was never defined or documented.
Adding the '-v' (verbose) to ldapsearch options gives nothing of useful info,
just only a "ldap_initialize(...)" string.
> I'm not sure if this next one is an issue or just incorrect usage on my part.
> So take with a grain of salt.
>
> After getting an EIO (deliberate out of storage space) from a mdb_txn_commit,
> the transaction would be mdb_txn_abort()ed. I then would close then env which
> would get a SIGABORT from a double-free on the env_close0 with env->txn0.
The docs say quite clearly, after txn_commit the txn must not be used again.
It has already been freed so you cannot use it with txn_abort.
--
-- Howard Chu
CTO, Symas Corp. http://www.symas.com
Director, Highland Sun http://highlandsun.com/hyc/
Chief Architect, OpenLDAP http://www.openldap.org/project/
Full_Name: Jeremiah Morrill
Version: 0.9
OS: Linux (Ubuntu14)
URL: ftp://ftp.openldap.org/incoming/
Submission from: (NULL) (70.173.183.164)
Two possible issues. Semi-related.
The first:
On a full storage partition, when creating a new database, I get a SIGBUS. I
believe it is caused by the locks successfully mmap()ing, but not really having
the storage to back it. I hacked in a "posix_fallocate" to make sure the
storage space is there and it appeared to fix it. I have no idea what the
unintended consequences of this change may be.
Here is the diff:
void
@@ -4863,6 +4868,14 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode,
int *excl)
void *m = mmap(NULL, rsize, PROT_READ|PROT_WRITE, MAP_SHARED,
env->me_lfd, 0);
if (m == MAP_FAILED) goto fail_errno;
+
+ rc = posix_fallocate(env->me_lfd, 0, rsize);
+
+ if (rc) {
+ munmap(m, rsize);
+ goto fail;
+ }
+
env->me_txns = m;
#endif
}
I'm not sure if this next one is an issue or just incorrect usage on my part.
So take with a grain of salt.
After getting an EIO (deliberate out of storage space) from a mdb_txn_commit,
the transaction would be mdb_txn_abort()ed. I then would close then env which
would get a SIGABORT from a double-free on the env_close0 with env->txn0.
The hack I put in there to avoid this was in the mdb_txn_end. I check if txn ==
env->me_txn0, and if it is, to set env->me_txn0 to NULL.
Here's the diff:
@@ -3244,12 +3244,17 @@ mdb_txn_end(MDB_txn *txn, unsigned mode)
}
pthread_mutex_unlock(&env->me_rpmutex);
tl[0].mid = 0;
- if (mode & MDB_END_FREE)
+ if (mode & MDB_END_FREE)
free(tl);
}
#endif
- if (mode & MDB_END_FREE)
- free(txn);
+ if (mode & MDB_END_FREE) {
+ /* avoid double free on env close */
+ if(txn == env->me_txn0){
+ env->me_txn0 = NULL;
+ }
+ free(txn);
+ }
}
I've removed the obsolete Netherlands listing and added this.
-------- Forwarded Message --------
Subject: openldap public mirror in europe
Date: Wed, 13 Jan 2016 08:22:02 +0100
From: Jakob-Tobias Winter <jakob-tobias.winter(a)1und1.de>
To: hyc(a)symas.com
CC: Michael Ströder <michael(a)stroeder.com>
Howard,
we set up a public mirror for your project based in Germany.
It is mirrored via rsync and is reachable via:
http://mirror.eu.oneandone.net/software/openldap/
Best regards
Jakob
--
Jakob-Tobias Winter
Operations Manager IT
Dedicated / Cloud / Virtual Server
IT Operations WebPlatforms Server
1&1 Internet AG
Ernst-Frey-Straße 9
DE-76135 Karlsruhe
Telefon: +49 721 91374 4416
E-Mail: jakob-tobias.winter(a)1und1.de
Web: www.1und1.de
Amtsgericht Montabaur / HRB 6484
Vorstände: Henning Ahlert, Ralph Dommermuth, Matthias Ehrlich, Robert
Hoffmann, Andreas Hofmann, Markus Huhn, Hans-Henning Kettler, Dr. Oliver
Mauss, Jan Oetjen, Martin Witt, Christian Würst
Aufsichtsratsvorsitzender: Michael Scheeren
Member of United Internet