Hi,
I finally had the opportunity to get a valgrind analysis of my problem. You can find it below. Thank you for any help.
D.
==19496== 12 bytes in 1 blocks are definitely lost in loss record 16 of 52 ==19496== at 0x4005B83: malloc (vg_replace_malloc.c:195) ==19496== by 0x4056977: ber_memalloc_x (memory.c:228) ==19496== by 0x8095D39: ch_malloc (ch_malloc.c:54) ==19496== by 0x80E0297: do_syncrepl (syncrepl.c:706) ==19496== by 0x4016A23: ldap_int_thread_pool_wrapper (tpool.c:688) ==19496== by 0x821831: start_thread (in /lib/libpthread-2.5.so) ==19496== by 0x76146D: clone (in /lib/libc-2.5.so) ==19496== ==19496== 155 (32 direct, 123 indirect) bytes in 1 blocks are definitely lost in loss record 47 of 52 ==19496== at 0x4005B83: malloc (vg_replace_malloc.c:195) ==19496== by 0x4056977: ber_memalloc_x (memory.c:228) ==19496== by 0x4057054: ber_bvarray_dup_x (memory.c:756) ==19496== by 0x80E0212: do_syncrepl (syncrepl.c:699) ==19496== by 0x4016A23: ldap_int_thread_pool_wrapper (tpool.c:688) ==19496== by 0x821831: start_thread (in /lib/libpthread-2.5.so) ==19496== by 0x76146D: clone (in /lib/libc-2.5.so) ==19496== ==19496== 270 (228 direct, 42 indirect) bytes in 3 blocks are definitely lost in loss record 49 of 52 ==19496== at 0x4005B83: malloc (vg_replace_malloc.c:195) ==19496== by 0x4056977: ber_memalloc_x (memory.c:228) ==19496== by 0x8095D39: ch_malloc (ch_malloc.c:54) ==19496== by 0x8195E91: syncprov_op_search (syncprov.c:2473) ==19496== by 0x80E3333: overlay_op_walk (backover.c:661) ==19496== by 0x80E3A09: over_op_func (backover.c:723) ==19496== by 0x807BE85: fe_op_search (search.c:402) ==19496== by 0x807C772: do_search (search.c:247) ==19496== by 0x807988B: connection_operation (connection.c:1150) ==19496== by 0x807A0FC: connection_read_thread (connection.c:1286) ==19496== by 0x4016A23: ldap_int_thread_pool_wrapper (tpool.c:688) ==19496== by 0x821831: start_thread (in /lib/libpthread-2.5.so)
Le 20/02/2012 10:32, David Coutadeur a écrit :
Hi,
Concerning the cpu : model name : Intel(R) Xeon(R) CPU L5640 @ 2.27GHz However, OpenLDAP is used in a virtual machine, with only one dedicated core. This is 32 bits version of OpenLDAP, on a 32 bits OS. Linux OS : Red Hat Enterprise Linux Server release 5.7 (Tikanga) kernel : 2 2.6.18-274.3.1.el5
You can get the server configuration below.
Concerning the client, you can find the source code here : http://loadtesting.sourceforge.net/index.php?lang=en
#
# Global section
#
sizelimit 15000
allow bind_v2 password-hash {SSHA} threads 8 loglevel 256
serverID 2
include /opt/openldap/etc/openldap/inc_shemas/slapd-schemas.conf
pidfile /opt/openldap/var/run/slapd.pid argsfile /opt/openldap/var/run/slapd.args
# ACLs definition (cutted here)
####################################################################### # SSL definitions ####################################################################### # Definition of cert files # --------------------------------- TLSCipherSuite HIGH:MEDIUM:+SSLv2
TLSCACertificateFile /usr/local/openldap/etc/certs/ca.cert TLSCertificateFile /usr/local/openldap/etc/certs/ldap-master-2.cert TLSCertificateKeyFile /usr/local/openldap/etc/certs/ldap-master-2.key
TLSVerifyClient never
####################################################################### # BDB database definitions ####################################################################### database monitor
####################################################################### # suffix dc=example,dc=com # -------------------- ####################################################################### database bdb directory /opt/openldap/var/openldap-data # some indexes
#
# root suffix
#
suffix "dc=example,dc=com" checkpoint 512 10
#
# cache settings
#
cachesize 60000 dncachesize 60000 idlcachesize 60000 cachefree 100
rootdn "cn=Manager,dc=example,dc=com" rootpw secret
# Password policy : hash the clear passwords overlay ppolicy ppolicy_hash_cleartext
#
# REPLICATION definitions
#
#######################################################################
# I am a master (provider) # ------------------------------------------ overlay syncprov syncprov-checkpoint 100 10 syncprov-sessionlog 100
# syncrepl directive (consummer infos) # ------------------------------------------ syncrepl rid=001 provider=ldap://ldap-master-1.example.com bindmethod=simple binddn="uid=ReplicationMaster,ou=Replication,ou=Special Users,dc=example,dc=com" credentials=secret searchbase="dc=example,dc=com" schemachecking=on type=refreshAndPersist retry="60 +"
mirrormode on
#
# limits
#
limits dn.exact="uid=ReplicationMaster,ou=Replication,ou=Special Users,dc=example,dc=com" size=unlimited time=unlimited limits dn.exact="uid=ReplicationHub,ou=Replication,ou=Special Users,dc=example,dc=com" size=unlimited time=unlimited limits dn.exact="uid=ReplicationLSC,ou=Replication,ou=Special Users,dc=example,dc=com" size=unlimited time=unlimited
####################################################################### # suffix o=edition # ---------------- ####################################################################### database bdb
directory /opt/openldap/var/openldap-data-edition # some indexes
#
# root suffix
#
suffix "o=edition" checkpoint 512 10
#
# cache settings
#
cachesize 50000 dncachesize 50000 idlcachesize 50000 cachefree 100
#
# rootdn
#
# Cleartext passwords, especially for the rootdn, should # be avoid. See slappasswd(8) and slapd.conf(5) for details. # Use of strong authentication encouraged. rootdn "cn=Manager,o=edition" rootpw secret
# Password policy : hash the clear passwords overlay ppolicy ppolicy_hash_cleartext
####################################################################### # REPLICATION definitions #######################################################################
# I am a master (provider) # ------------------------------------------ overlay syncprov syncprov-checkpoint 100 10 syncprov-sessionlog 100
# syncrepl directive (consummer infos) # ------------------------------------------ syncrepl rid=002 provider=ldap://vspar-ldap-master-1.example.com bindmethod=simple binddn="uid=ReplicationMaster,ou=Replication,ou=Special Users,dc=example,dc=com" credentials=secret searchbase="o=edition" schemachecking=on type=refreshAndPersist retry="60 +"
mirrormode on
#
# Limits for current BDB
#
limits dn.exact="uid=ReplicationMaster,ou=Replication,ou=Special Users,dc=example,dc=com" size=unlimited time=unlimited limits dn.exact="uid=ReplicationHub,ou=Replication,ou=Special Users,dc=example,dc=com" size=unlimited time=unlimited limits dn.exact="uid=ReplicationLSC,ou=Replication,ou=Special Users,dc=example,dc=com" size=unlimited time=unlimited
Le 17/02/2012 19:00, Howard Chu a écrit :
dcoutadeur@linagora.com wrote:
Full_Name: dcoutadeur Version: 2.4.28 OS: Red Hat Enterprise Linux Server release 5.7 (Tikanga) URL: ftp://ftp.openldap.org/incoming/ Submission from: (NULL) (109.197.176.10)
Hello,
I had a segfault in the last git version of OpenLDAP, after 10 to 15 tests, each interrupted by Ctrl+C. (see what's a test below) The segfault is also reproduced in version 2.4.28.
I think I won't be able to reproduce the bug with Valgrind.
If you expect us to try to reproduce the bug, you'll have to provide more information. Since this crash is in syncprov there's obviously at least two servers involved; what are their configurations? What hardware are they running on (in particular, how many CPU cores per server)? Since you're using a custom client, can you provide the client source code?
Thank you in advance for any help.
D.
Note : A test is 100 times 100 threads, each doing a bind, an add, a modify, a delete, and a logout.
(gdb) bt full #0 sp_avl_cmp (c1=0x8b4004c8, c2=0xa37cf28) at syncprov.c:366 rc =<value optimized out> #1 0x081afe3e in avl_delete (root=0xa255648, data=0x8b4004c8, fcmp=0x81948a0<sp_avl_cmp>) at avl.c:197 p =<value optimized out> q =<value optimized out> r =<value optimized out> top =<value optimized out> side =<value optimized out> side_bf =<value optimized out> shorter =<value optimized out> nside =<value optimized out> pptr = {0x89908, 0x0, 0x0, 0x0, 0xe8043c, 0x0, 0xfdc, 0x8d994858, 0xe7b95c, 0xfdc, 0xa372570, 0x0, 0xa288350, 0xe8043c, 0xa372570, 0x8d994878, 0xe7c324, 0xfdc, 0xa372570, 0x0, 0xe7af2c, 0x8cb9136e, 0x81b3634, 0x0, 0xe8043c, 0xe, 0xa3d3a40, 0x8d9948b8, 0xe7d081, 0xa3e3618, 0x8cb91358, 0x823c27} pdir = "\000\000\000\000lI\231\215\064\066\033\bn>\000\000\244\345t\000\310\004@\213\b\346\067\n\310H\231\215"
depth = 0
#2 0x08199f7f in syncprov_op_cleanup (op=0xa37e608, rs=0x8d995108) at syncprov.c:1401 cb = 0x8cb91258 opc = 0x8cb91268 si = 0xa255610 sm = 0xa255688 snext =<value optimized out> mt = 0x8b4004c8 #3 0x08089654 in slap_cleanup_play (op=0xa37e608, rs=0x8d995108) at result.c:541 sc_next = 0x8d994dec sc = 0x8cb91258 scp = 0x8d994928 #4 0x0808a150 in send_ldap_response (op=0xa37e608, rs=0x8d995108) at result.c:733 berbuf = { buffer = "\000\000\001\000\000\001\000\000\377\377\377\377", '\000'<repeats 12 times>, "f\023\271\214\064#\271\214\000\000\000\000f\023\271\214p%7\n\000\000\000\000\314I\231\215\001\000\000\000\000\000\000\000\314mK\236x\271\347\000\001\000\000\000`+@\213D`K\236\230\063\066\n\250<6\n\000\000\000\000\000\000\000\000\005\000\000\000P7@\213`\343\070\n\000\000\000\000\n\000\000\000(\234\200\330\000\000\000\000\000\000\000\000@4"\000\000\000\000\000(\234\200\330\210J\231\215\270\214
\000\230\063\066\n`+@\213\314mK\236\r\000\000\000\001\000\000\000\021\217;O(\234\200\330\000\000\000\000`&%\n`&%\n8J\231\215b\f"\000\224mK\236\230\063\066\n(\234\200أ\347\022\b\a",
'\000'<repeats 31 times>, "D'%\n\224mK\236\000\000\000", ialign = 65536, lalign = 65536, falign = 9.18354962e-41, dalign = 5.4323095486619588e-312, palign = 0x10000<Address 0x10000 out of bounds>} ber =<value optimized out> rc = 32768 bytes = 14 __PRETTY_FUNCTION__ = "send_ldap_response" #5 0x0808af1f in slap_send_ldap_result (op=0xa37e608, rs=0x8d995108) at result.c:860 tmp = 0x0 otext = 0x0 oref = 0x0 __PRETTY_FUNCTION__ = "slap_send_ldap_result" #6 0x0812bde5 in bdb_add (op=0xa37e608, rs=0x8d995108) at add.c:511 pdn = {bv_len = 23, bv_val = 0x8b40372f "ou=people,dc=afp,dc=com"} p = 0x8fc4c0fc oe = 0x8fc4c804 ei = 0xa37d1c8 textbuf = "\000\000\000\000\320O"\n", '\000'<repeats 48 times>, "\001", '\000'<repeats 198 times> children = 0xa223b20 entry = 0xa223980 ltid = 0x0 lt2 = 0x8b402bf0 eid = 57976 opinfo = {boi_oe = {oe_next = {sle_next = 0x8d99509c}, oe_key = 0x0}, boi_txn = 0x8b402b60, boi_locks = 0x0, boi_err = 0, boi_acl_cache = 0 '\000', boi_flag = 0 '\000'} lock = {off = 133260, ndx = 772, gen = 2004, mode = DB_LOCK_READ} num_retries = 0 success = 0 postread_ctrl = 0x0 ctrls = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0} num_ctrls = 0 #7 0x080e33a1 in overlay_op_walk (op=0xa37e608, rs=0x8d995108, which=op_add, oi=0xa254ff0, on=0xa255508) at backover.c:671 rc = 32768 #8 0x080e3a0a in over_op_func (op=0xa37e608, rs=0x8d995108, which=op_add) at backover.c:723 oi = 0xa254ff0 on = 0xa255508 be = 0xa252560 db = {bd_info = 0x821d41c, bd_self = 0xa252560, be_ctrls = "\000\001\001\001\000\001\000\000\001\000\000\001\001\000\001\001\000\000\000\000\001\000\001\000\000\000\000\000\000\000\000\000\001",
be_flags = 563464, be_restrictops = 0, be_requires = 0, be_ssf_set = {sss_ssf = 0, sss_transport = 0, sss_tls = 0, sss_sasl = 0, sss_update_ssf = 0, sss_update_transport = 0, sss_update_tls = 0, sss_update_sasl = 0, sss_simple_bind = 0}, be_suffix = 0xa288350, be_nsuffix = 0xa288368, be_schemadn = { bv_len = 0, bv_val = 0x0}, be_schemandn = {bv_len = 0, bv_val = 0x0}, be_rootdn = {bv_len = 24, bv_val = 0xa287648 "cn=Manager,dc=afp,dc=com"}, be_rootndn = {bv_len = 24, bv_val = 0xa2876d0 "cn=manager,dc=afp,dc=com"}, be_rootpw = {bv_len = 38, bv_val = 0xa2876f0 "{SSHA}rEmMhg3MU5xkQX5Ng92tH4WzGMlA+nGU"}, be_max_deref_depth = 15, be_def_limit = { lms_t_soft = 3600, lms_t_hard = 0, lms_s_soft = 15000, lms_s_hard = 0, lms_s_unchecked = -1, lms_s_pr = 0, lms_s_pr_hide = 0, lms_s_pr_total = 0}, be_limits = 0xa255748, be_acl = 0x0, be_dfltaccess = ACL_READ, be_extra_anlist = 0x0, be_update_ndn = {bv_len = 0, bv_val = 0x0}, be_update_refs = 0x0, be_pending_csn_list = 0xa363388, be_pcl_mutex = {__data = {__lock = 0, __count = 0, __owner = 0, __kind = 0, __nusers = 0, {__spins = 0, __list = {__next = 0x0}}}, __size = '\000'<repeats 23 times>, __align = 0}, be_syncinfo = 0xa28aec8, be_pb = 0x0, be_cf_ocs = 0x821f840, be_private = 0xa252660, be_next = { stqe_next = 0xa288538}} cb = {sc_next = 0x0, sc_response = 0x80e30e0 <over_back_response>, sc_cleanup = 0, sc_private = 0xa254ff0} sc =<value optimized out> rc =<value optimized out> __PRETTY_FUNCTION__ = "over_op_func" #9 0x08081129 in fe_op_add (op=0xa37e608, rs=0x8d995108) at add.c:334 repl_user = 0 rc =<value optimized out> bd = 0x82234c0 textbuf = "\000\000\000\000\000\000\000\000\060[\231\215\000\000\000\000\035\000\000\000\020\070@\213\001\000\000\000xN\231\215\270\026@\213(I"\n\002\000\000\000\250N\231\215\255\214\v\b\270\026@\213\224N\231\215\001\000\000\000\000\000\000\000x9@\213\000\000\000\000\n\000\000\000\001\000\000\000\340\067@\213\n\000\000\000\060\070@\213\320\026@\213(I"\n\270\026@\213\370N\231\215oc\t\b\002\000\000\000X(
\n\370N\231\215\321_\t\bh\234!\n\240\066@\213'<\202\000\000\000\000\000\f\000\000\000W.@\213n>\000\000\244\345t\000\320O"\n\320O"\n\370N\231\215\035\205q\000
."\b\314h\032\216\030O\231\215\245\063\b\b ."\b\240\066@\213\000\000\000\000\270\026@\213\244i\032\216\000\000\000\000HO\231\215\267\r\b\b\320O"\n\320O"\n\001\000\000\000HO\231\215\020\000\000\000\340h\032\216\377\377\377\377"
__PRETTY_FUNCTION__ = "fe_op_add"
#10 0x08081a13 in do_add (op=0xa37e608, rs=0x8d995108) at add.c:194 ber =<value optimized out> last = 0x8b402e71 "" dn = {bv_len = 38, bv_val = 0x8b402d98 "uid=dcoutadeur,ou=People,dc=afp,dc=com"} len = 28 tag =<value optimized out> modlist = 0x8b4015f0 modtail = 0x8b403694 tmp = {sml_mod = {sm_desc = 0x80ce5ca, sm_values = 0x8b4036a0, sm_nvalues = 0x0, sm_numvals = 2375635128, sm_op = 0, sm_flags = 0, sm_type = {bv_len = 12, bv_val = 0x8b402e57 "userPassword"}}, sml_next = 0x823c27} textbuf = "\025\000\000\000\310\031@\213\b\026@\213\006\340(\000\220[\231\215\000\000\000\000\000\000\000\000\020\000@\213\025\000\000\000\310\031@\213\310+@\213\005\070/\000\200O@\213T\245(\000\000\000\000\000\020\000@\213\364\237\067\000\220[\231\215\000\000\000\000\233\071@\213@:=\n\370O\231\215T\213\202\000b\213\202\000;\334\347\000"\000\000\000\233\071@\213\b\000\000\000\201\354(\000\fP\231\215<\004\350\000\270P\231\215\312\315\347\000\370\326\070\n\233\071@\213\b\000\000\000\001\200\255\373\b\347\067\n@\000\000\000\243P\231\215@\000@\213\026\347\067\n@\000@\213\b\347\067\n@\261\067\000,\000\000\000\020\000@\213",
'\000'<repeats 20 times>, "5\000\000\000@\000@\213\000\000\000\000\340\021@\213\000\000\000\000\000\000\000\000\260+@\213\000\000\000\000\001\000\000\000\004\000\020\000\350Q\231\215\310P\231\215"
rc =<value optimized out> freevals =<value optimized out> oex = {oe = {oe_next = {sle_next = 0x0}, oe_key = 0x8081330},
oe_db = 0x0} #11 0x0807988c in connection_operation (ctx=0x8d9951e8, arg_v=0xa37e608) at connection.c:1150 rc =<value optimized out> cancel =<value optimized out> rs = {sr_type = REP_RESULT, sr_tag = 105, sr_msgid = 2, sr_err = 0, sr_matched = 0x0, sr_text = 0x0, sr_ref = 0x0, sr_ctrls = 0x0, sr_un = {sru_search = {r_entry = 0x0, r_attr_flags = 0, r_operational_attrs = 0x0, r_attrs = 0x0, r_nentries = 0, r_v2ref = 0x0}, sru_sasl = {r_sasldata = 0x0}, sru_extended = {r_rspoid = 0x0, r_rspdata = 0x0}}, sr_flags = 0} tag = 104 opidx = SLAP_OP_ADD conn = 0xb7f3bc10 memctx = 0xa372570 memctx_null = 0x0 __PRETTY_FUNCTION__ = "connection_operation" #12 0x0807a0fd in connection_read_thread (ctx=0x8d9951e8, argv=0x22) at connection.c:1286 s =<value optimized out> #13 0x00717a24 in ldap_int_thread_pool_wrapper (xpool=0xa2265c8) at tpool.c:688 task = 0xa382e10 work_list =<value optimized out> ctx = {ltu_id = 2375637904, ltu_key = {{ltk_key = 0x80ce400, ltk_data = 0xa372570, ltk_free = 0x80ce430<slap_sl_mem_destroy>}, {ltk_key = 0xa363398, ltk_data = 0xa371a88, ltk_free = 0x812e4c0<bdb_reader_free>}, {ltk_key = 0x8078320, ltk_data = 0xa37de68, ltk_free = 0x80783f0<conn_counter_destroy>}, {ltk_key = 0x808dde0, ltk_data = 0x0, ltk_free = 0x808dbf0<slap_op_q_destroy>}, {ltk_key = 0x0, ltk_data = 0x0, ltk_free = 0}<repeats 28 times>}} kctx =<value optimized out> keyslot = 241 hash = 5278961 __PRETTY_FUNCTION__ = "ldap_int_thread_pool_wrapper" #14 0x00821832 in start_thread () from /lib/libpthread.so.0 No symbol table info available. #15 0x002f746e in clone () from /lib/libc.so.6 No symbol table info available.