Full_Name: Ryan Tandy
Version: 2.4.45
OS: Debian
URL:
Submission from: (NULL) (24.68.41.160)
Submitted by: ryan
This is rather similar to ITS#8429 (the deadlock is at the same location), but
not enough for me to be sure it's the same.
cat > slapd.conf << EOF
include /path/to/core.schema
include /path/to/cosine.schema
serverid 1 ldap://:9001
serverid 2 ldap://:9002
serverid 3 ldap://:9003
database mdb
directory db
maxsize 104857600
envflags writemap
index objectClass,cn,entryCSN,entryUUID,uid eq
suffix dc=example,dc=com
rootdn cn=root,dc=example,dc=com
rootpw secret
access to * by * read
sizelimit unlimited
syncrepl rid=1 provider="ldap://:9001" searchbase="dc=example,dc=com"
type=refreshAndPersist retry="10 +"
bindmethod=simple binddn="cn=root,dc=example,dc=com" credentials="secret"
syncdata=accesslog logbase="cn=accesslog"
logfilter="(&(objectClass=auditWriteObject)(reqResult=0))"
syncrepl rid=2 provider="ldap://:9002" searchbase="dc=example,dc=com"
type=refreshAndPersist retry="10 +"
bindmethod=simple binddn="cn=root,dc=example,dc=com" credentials="secret"
syncdata=accesslog logbase="cn=accesslog"
logfilter="(&(objectClass=auditWriteObject)(reqResult=0))"
syncrepl rid=3 provider="ldap://:9003" searchbase="dc=example,dc=com"
type=refreshAndPersist retry="10 +"
bindmethod=simple binddn="cn=root,dc=example,dc=com" credentials="secret"
syncdata=accesslog logbase="cn=accesslog"
logfilter="(&(objectClass=auditWriteObject)(reqResult=0))"
mirrormode on
overlay syncprov
syncprov-checkpoint 10 1
syncprov-reloadhint TRUE
overlay accesslog
logdb cn=accesslog
logops writes
logsuccess true
logpurge 07+00:00 01+00:00
database mdb
directory accesslog
maxsize 104857600
envflags writemap
index entryCSN,objectClass,reqEnd,reqResult,reqStart eq
suffix cn=accesslog
access to * by * read
sizelimit unlimited
overlay syncprov
syncprov-nopresent TRUE
syncprov-reloadhint TRUE
EOF
cat > data.ldif << EOF
dn: dc=example,dc=com
objectClass: domain
dn: uid=u0,dc=example,dc=com
objectclass: account
dn: cn=g0,dc=example,dc=com
objectClass: groupOfNames
member:
EOF
Start up all three slapds and get them synced and settled. I also executed no-op
modifications on each node to ensure every server had CSNs from all the others.
cat > groupmod.ldif << EOF
dn: cn=g0,dc=example,dc=com
add: member
member: uid=u0,dc=example,dc=com
dn: cn=g0,dc=example,dc=com
delete: member
member: uid=u0,dc=example,dc=com
EOF
Execute the above modification on one node and watch the other two. After a few
times, I reliably get one or both nodes hanging.
If I disable syncprov-checkpoint, I cannot reproduce the hang.
Backtrace from a hung node:
Thread 6 (Thread 0x7f77093d0700 (LWP 28817)):
#0 pthread_cond_wait@@GLIBC_2.3.2 () at
../sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185
#1 0x0000560c5a9af3b7 in ldap_pvt_thread_cond_wait (cond=0x560c5b9696c0,
mutex=0x560c5b969698) at thr_posix.c:277
#2 0x0000560c5a9add6e in ldap_int_thread_pool_wrapper (xpool=0x560c5b969690) at
tpool.c:683
#3 0x00007f7718a4a494 in start_thread (arg=0x7f77093d0700) at
pthread_create.c:333
#4 0x00007f771878ca8f in clone () at
../sysdeps/unix/sysv/linux/x86_64/clone.S:97
Thread 5 (Thread 0x7f7709bd1700 (LWP 28816)):
#0 pthread_cond_wait@@GLIBC_2.3.2 () at
../sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185
#1 0x0000560c5a9af3b7 in ldap_pvt_thread_cond_wait (cond=0x560c5b9696c0,
mutex=0x560c5b969698) at thr_posix.c:277
#2 0x0000560c5a9add6e in ldap_int_thread_pool_wrapper (xpool=0x560c5b969690) at
tpool.c:683
#3 0x00007f7718a4a494 in start_thread (arg=0x7f7709bd1700) at
pthread_create.c:333
#4 0x00007f771878ca8f in clone () at
../sysdeps/unix/sysv/linux/x86_64/clone.S:97
Thread 4 (Thread 0x7f770a3d2700 (LWP 28815)):
#0 pthread_cond_wait@@GLIBC_2.3.2 () at
../sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185
#1 0x0000560c5a9af3b7 in ldap_pvt_thread_cond_wait (cond=0x560c5b993fc8,
mutex=0x560c5b993fa0) at thr_posix.c:277
#2 0x0000560c5a9ac942 in ldap_pvt_thread_rmutex_lock (rmutex=0x560c5b993f68,
owner=140149249615616) at rmutex.c:129
#3 0x0000560c5a98bd4c in accesslog_op_mod (op=0x7f770a3d14e0,
rs=0x7f770a3d1120) at accesslog.c:1994
#4 0x0000560c5a941763 in overlay_op_walk (op=0x7f770a3d14e0, rs=0x7f770a3d1120,
which=op_modify, oi=0x560c5b992a00,
on=0x560c5b993d20) at backover.c:661
#5 0x0000560c5a941a50 in over_op_func (op=0x7f770a3d14e0, rs=0x7f770a3d1120,
which=op_modify) at backover.c:730
#6 0x0000560c5a941b84 in over_op_modify (op=0x7f770a3d14e0, rs=0x7f770a3d1120)
at backover.c:769
#7 0x0000560c5a92ef07 in syncrepl_message_to_op (si=0x560c5b992580,
op=0x7f770a3d14e0, msg=0x7f76f4103bd0)
at syncrepl.c:2417
#8 0x0000560c5a929f7e in do_syncrep2 (op=0x7f770a3d14e0, si=0x560c5b992580) at
syncrepl.c:1014
#9 0x0000560c5a92c160 in do_syncrepl (ctx=0x7f770a3d1c10, arg=0x560c5b992980)
at syncrepl.c:1565
#10 0x0000560c5a8b11cd in connection_read_thread (ctx=0x7f770a3d1c10, argv=0xc)
at connection.c:1296
#11 0x0000560c5a9ade15 in ldap_int_thread_pool_wrapper (xpool=0x560c5b969690) at
tpool.c:696
#12 0x00007f7718a4a494 in start_thread (arg=0x7f770a3d2700) at
pthread_create.c:333
#13 0x00007f771878ca8f in clone () at
../sysdeps/unix/sysv/linux/x86_64/clone.S:97
Thread 3 (Thread 0x7f770abd3700 (LWP 28814)):
#0 pthread_cond_wait@@GLIBC_2.3.2 () at
../sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185
#1 0x0000560c5a9af3b7 in ldap_pvt_thread_cond_wait (cond=0x560c5b9696c0,
mutex=0x560c5b969698) at thr_posix.c:277
#2 0x0000560c5a9add6e in ldap_int_thread_pool_wrapper (xpool=0x560c5b969690) at
tpool.c:683
#3 0x00007f7718a4a494 in start_thread (arg=0x7f770abd3700) at
pthread_create.c:333
#4 0x00007f771878ca8f in clone () at
../sysdeps/unix/sysv/linux/x86_64/clone.S:97
Thread 2 (Thread 0x7f770b3d4700 (LWP 28813)):
#0 0x00007f771878d083 in epoll_wait () at
../sysdeps/unix/syscall-template.S:84
#1 0x0000560c5a8ac6b3 in slapd_daemon_task (ptr=0x560c5bd176e0) at
daemon.c:2539
#2 0x00007f7718a4a494 in start_thread (arg=0x7f770b3d4700) at
pthread_create.c:333
#3 0x00007f771878ca8f in clone () at
../sysdeps/unix/sysv/linux/x86_64/clone.S:97
Thread 1 (Thread 0x7f7719293400 (LWP 28812)):
#0 0x00007f7718a4b6cd in pthread_join (threadid=140149266401024,
thread_return=0x0) at pthread_join.c:90
#1 0x0000560c5a9af2f8 in ldap_pvt_thread_join (thread=140149266401024,
thread_return=0x0) at thr_posix.c:197
#2 0x0000560c5a8ad99c in slapd_daemon () at daemon.c:2932
#3 0x0000560c5a88c105 in main (argc=8, argv=0x7ffd119da7b8) at main.c:1017