We very occasionally see a problem that goes like this:
1) Send an LDAP search request to a
Windows DC
2) Call ldap_int_select from result.c –
wait4msg()
3) After ldap_int_select returns okay –
(eventually) call down into the sockbuf.c function sb_stream_read() which calls
read(…)
4) The read() hangs forever if it has
to do multiple calls to get the entire response (very very occasionally)
I was not able to get a network trace to see why recv()
hangs forever (AIX 5.3 BTW), but it does have to do with a large response that
comes back in several TCP fragments.
I solved the issue by adding an additional select() call in
sb_stream_read()
Here is what I did in code base 2.2.6
static ber_slen_t
sb_stream_read( Sockbuf_IO_Desc *sbiod, void *buf, ber_len_t
len )
…
#elif defined( HAVE_NCSA )
/*
* NCSA Telnet TCP/IP stack (under DOS)
*/
return
nread( sbiod->sbiod_sb->sb_fd, buf, len );
#else
{
int rc =
ber_int_sb_read_wait(sbiod->sbiod_sb); <- The new check
if (rc > 0)
{
rc = read( sbiod->sbiod_sb->sb_fd, buf, len );
}
return rc;
}
#endif
}
New Function
/**
* If there is a timeout - wait for data, to avoid
* permanent blocking on a read.
*
* @param sb Socket buf with socket
id and timeout
* @return -1=error : 0=timeout : 1=proceed
*/
static int
ber_int_sb_read_wait(Sockbuf *sb)
{
struct timeval tm = sb->sb_timeout;
int rc = 1;
/* If no timeout was specified skip the
select */
if (tm.tv_sec || tm.tv_usec)
{
int sock =
sb->sb_fd;
fd_set rfds;
FD_ZERO(&rfds);
FD_SET(sock,
&rfds);
ber_log_printf(LDAP_DEBUG_PACKETS, sb->sb_debug,
"ber_int_sb_read_wait start: timeout is %d %d",
(int)tm.tv_sec, (int)tm.tv_usec);
rc =
select(sock+1, &rfds, NULL, NULL, &tm);
ber_log_printf(LDAP_DEBUG_PACKETS, sb->sb_debug,
"ber_int_sb_read_wait end: sock=%d rc=%d %s\n",
sock, rc, (rc >= 0) ? "" : STRERROR(errno));
}
return rc;
}
In open.c – ldap_int_open_connection(), I added some
code to propagate the timeout down to ber_int_sb_read_wait((
{
/*
* Propagate the network timeout to sockbuf layer for
* select calls.
*/
struct timeval *tm = 0;
int ret = ldap_get_option(ld, LDAP_OPT_NETWORK_TIMEOUT,
(void *)&tm);
if (ret == LDAP_OPT_SUCCESS && tm) {
ber_sockbuf_ctrl(conn->lconn_sb, LBER_SB_OPT_SET_TIMEOUT, tm);
LDAP_FREE(tm);
}
}
Regards
Dave Daugherty
Centrify Corp.