Hi there,
We upgraded to openldap 2.4.16 about 10 days ago. Since then we are experiencing relatively frequent core dumps (average uptime is around 2-3 days). Investigating the root cause we came across ITS#6034 which reminded us much of problem we are facing, but should be fixed in 2.4.16.
Setup: Solaris 10 Openldap 2.4.16 (64bit) Berkeley DB 4.7
1 Master 5 Replicas with deltasyncrepl
Cores only happened to the master so far, replicas are running stable so far.
I couldn't find a reference to this ITS in the 2.4.16 changelog (probably listed as other ITS or fixed earlier). Also if the fix made it's way into 2.4.16, I am wondering why we still observe this cores.
If you need further information or have any suggestions please let me know. Please find some debugging information below.
Kind regards, David
(dbx) threads t@1 a l@1 ?() LWP suspended in __lwp_wait() t@2 a l@2 slapd_daemon_task() LWP suspended in __pollsys() t@3 a l@3 ldap_int_thread_pool_wrapper() sleep on 0x1004dd428 in __lwp_park() t@4 a l@4 ldap_int_thread_pool_wrapper() LWP suspended in ___lwp_cond_wait() t@5 a l@5 ldap_int_thread_pool_wrapper() LWP suspended in ___lwp_cond_wait() t@6 a l@6 ldap_int_thread_pool_wrapper() LWP suspended in ___lwp_cond_wait() t@7 a l@7 ldap_int_thread_pool_wrapper() sleep on 0x1004dd670 in __lwp_park() t@8 a l@8 ldap_int_thread_pool_wrapper() LWP suspended in ___lwp_cond_wait() t@9 a l@9 ldap_int_thread_pool_wrapper() sleep on 0x1004a9908 in __lwp_park() o t@10 a l@10 ldap_int_thread_pool_wrapper() signal SIGABRT in __lwp_kill() t@11 a l@11 ldap_int_thread_pool_wrapper() sleep on 0x1004a9908 in __lwp_park() t@12 a l@12 ldap_int_thread_pool_wrapper() LWP suspended in ___lwp_cond_wait()
t@13 a l@13 ldap_int_thread_pool_wrapper() LWP suspended in
slap_access_allowed() t@14 a l@14 ldap_int_thread_pool_wrapper() LWP suspended in ___lwp_cond_wait() t@15 a l@15 ldap_int_thread_pool_wrapper() sleep on 0x1004dd1e0 in __lwp_park() t@16 a l@16 ldap_int_thread_pool_wrapper() LWP suspended in ___lwp_cond_wait() t@17 a l@17 ldap_int_thread_pool_wrapper() sleep on 0x1004a9908 in __lwp_park() t@18 a l@18 ldap_int_thread_pool_wrapper() LWP suspended in __lock_getlocker_int() (dbx) lwp l@10 Current function is connection_state_closing 686 assert( c->c_struct_state == SLAP_C_USED ); t@10 (l@10) stopped in __lwp_kill at 0xffffffff7decea00 0xffffffff7decea00: __lwp_kill+0x0008: bcc,a,pt %icc,__lwp_kill+0x18 ! 0xffffffff7decea10 (dbx) where [1] __lwp_kill(0x0, 0x6, 0x10004b128, 0x19d480, 0x0, 0x0), at 0xffffffff7decea00 [2] raise(0x6, 0x0, 0x100445d58, 0xffffffffffffffff, 0xffffffff7dfe6000, 0x0), at 0xffffffff7de6b4f4 [3] abort(0x1, 0x1b8, 0x10004b128, 0x19d480, 0x0, 0x0), at 0xffffffff7de48c8c [4] __assert(0x100271f48, 0x100271f70, 0x2ae, 0x0, 0x19d138, 0x100069f70), at 0xffffffff7de48f30 =>[5] connection_state_closing(c = 0x1004dce30), line 686 in "connection.c" [6] send_ldap_ber(op = 0xffffffff733ff768, ber = 0xffffffff733ff028), line 148 in "result.c" [7] slap_send_search_entry(op = 0xffffffff733ff768, rs = 0xffffffff733ff2d0), line 1247 in "result.c" [8] syncprov_sendresp(op = 0xffffffff733ff768, opc = 0xffffffff733ff4a0, so = 0x113c0a930, e = 0xffffffff733ff500, mode = 3), line 848 in "syncprov.c" [9] syncprov_qplay(op = 0xffffffff733ff768, so = 0x113c0a930), line 913 in "syncprov.c" [10] syncprov_qtask(ctx = 0xffffffff733ffc20, arg = 0x113c0a930), line 971 in "syncprov.c" [11] ldap_int_thread_pool_wrapper(xpool = 0x10046a250), line 663 in "tpool.c" (dbx) list 680,690 680 int connection_state_closing( Connection *c ) 681 { 682 /* c_mutex must be locked by caller */ 683 684 int state; 685 assert( c != NULL ); 686 assert( c->c_struct_state == SLAP_C_USED ); 687 688 state = c->c_conn_state; 689 690 assert( state != SLAP_C_INVALID ); (dbx) print c c = 0x1004dce30 (dbx) print *c *c = { c_struct_state = 1 c_conn_state = 0 c_conn_idx = 26 c_sd = -1 c_close_reason = 0x100271eb0 "?" c_mutex = { __pthread_mutex_flags = { __pthread_mutex_flag1 = 4U __pthread_mutex_flag2 = '\0' __pthread_mutex_ceiling = '\0' __pthread_mutex_type = 0 __pthread_mutex_magic = 19800U } __pthread_mutex_lock = { __pthread_mutex_lock64 = { __pthread_mutex_pad = "" } __pthread_mutex_lock32 = { __pthread_ownerpid = 0 __pthread_lockword = 0 } __pthread_mutex_owner64 = 0 } __pthread_mutex_data = 0 } c_sb = 0x118772d90 c_starttime = 0 c_activitytime = 0 c_connid = 18446744073709551615U c_peer_domain = { bv_len = 0 bv_val = (nil) } c_peer_name = { bv_len = 0 bv_val = (nil) } c_listener = (nil) c_sasl_bind_mech = { bv_len = 0 bv_val = (nil) } c_sasl_dn = { bv_len = 0 bv_val = (nil) } c_sasl_authz_dn = { bv_len = 0 bv_val = (nil) } c_authz_backend = (nil) c_authz_cookie = (nil) c_authz = { sai_method = 128U sai_mech = { bv_len = 0 bv_val = (nil) } sai_dn = { bv_len = 0 bv_val = (nil) } sai_ndn = { bv_len = 0 bv_val = (nil) } sai_ssf = 0 sai_transport_ssf = 0 sai_tls_ssf = 0 sai_sasl_ssf = 0 } c_protocol = 0 c_ops = { stqh_first = (nil) stqh_last = 0x1004dcf38 } c_pending_ops = { stqh_first = (nil) stqh_last = 0x1004dcf48 } c_write1_mutex = { __pthread_mutex_flags = { __pthread_mutex_flag1 = 4U __pthread_mutex_flag2 = '\0' __pthread_mutex_ceiling = '\0' __pthread_mutex_type = 0 __pthread_mutex_magic = 19800U } __pthread_mutex_lock = { __pthread_mutex_lock64 = { __pthread_mutex_pad = "" } __pthread_mutex_lock32 = { __pthread_ownerpid = 0 __pthread_lockword = 4278190080U } __pthread_mutex_owner64 = 4278190080U } __pthread_mutex_data = 18446744071451976704U } c_write1_cv = { __pthread_cond_flags = { __pthread_cond_flag = "" __pthread_cond_type = 0 __pthread_cond_magic = 17238U } __pthread_cond_data = 0 } c_write2_mutex = { __pthread_mutex_flags = { __pthread_mutex_flag1 = 4U __pthread_mutex_flag2 = '\0' __pthread_mutex_ceiling = '\0' __pthread_mutex_type = 0 __pthread_mutex_magic = 19800U } __pthread_mutex_lock = { __pthread_mutex_lock64 = { __pthread_mutex_pad = "" } __pthread_mutex_lock32 = { __pthread_ownerpid = 0 __pthread_lockword = 0 } __pthread_mutex_owner64 = 0 } __pthread_mutex_data = 0 } c_write2_cv = { __pthread_cond_flags = { __pthread_cond_flag = "" __pthread_cond_type = 0 __pthread_cond_magic = 17238U } __pthread_cond_data = 0 } c_currentber = (nil) c_writers = 0 c_sasl_bind_in_progress = '\0' c_writewaiter = '\0' c_sasl_layers = '\0' c_sasl_done = '\0' c_sasl_authctx = (nil) c_sasl_sockctx = (nil) c_sasl_extra = (nil) c_sasl_bindop = (nil) c_pagedresults_state = { ps_be = (nil) ps_size = 0 ps_count = 0 ps_cookie = 0 ps_cookieval = { bv_len = 0 bv_val = (nil) } } c_n_ops_received = 3 c_n_ops_executing = 0 c_n_ops_pending = 0 c_n_ops_completed = 3 c_n_get = 4 c_n_read = 3 c_n_write = 1 c_extensions = (nil) c_clientfunc = (nil) c_clientarg = (nil) c_send_ldap_result = 0x10006b900 = &slap_send_ldap_result(Operation *op, SlapReply *rs) c_send_search_entry = 0x10006cda0 = &slap_send_search_entry(Operation *op, SlapReply *rs) c_send_search_reference = 0x10006fa80 = &slap_send_search_reference(Operation *op, SlapReply *rs) c_send_ldap_extended = 0x10006c5f8 = &slap_send_ldap_extended(Operation *op, SlapReply *rs) c_send_ldap_intermediate = 0x10006ca28 = &slap_send_ldap_intermediate(Operation *op, SlapReply *rs) }