Hi list,
I have a problem with my openldap 2.4.22 deployment.
Today I upgraded my architecture from 2-way openldap2.4.20 to 3-way openldap.2.4.22, but I continue to have the same problem of slowlyness after some time my systems came up.
One of them in particular but I'm aware of our balancing system heavily prefer this one.
The symptom is that every search, even the same simple search executed repeteadly, cause a cpu-burst and will be served in a enormous time, sometimes 1m30s even after a cold start the same query is served in less than a second.

My user database is accessed for linux, AIX, DB2 and Oracle authentication. I have repeatedly the same searches fired against my openldap instancies.
I suspect to have a problem in the way I configured the memory to be allocated

My data on the user_db are quite static, but I have an accesslog writing very much. Retaining 3 days of history, my db dir grow to about 11/12GB.

I'm using 4 different disk (and scsi controller) for userdb, logdb, transactionlog and OS.

Following information about my environment. I can send others if necessary.

My user database is quite little:

1.5MB slapcat ldif


database        hdb
suffix          "dc=lancse,dc=csebo.it"
rootdn          "cn=Manager,dc=lancse,dc=csebo.it"
rootpw          {SSHA}mySecret
directory       /srv/ldap/db_utenti
index           cn              eq
index           ou              eq
index           objectClass     eq
index           uid             eq
index           gidNumber       eq
index           uidNumber       eq
#index           member          pres,eq
index           memberUid       eq
index           entryCSN        eq
index           entryUUID       eq
index           userPassword    pres
index           uniqueMember    eq
limits          dn="cn=Manager,dc=lancse,dc=csebo.it" size=unlimited

# cachesize: numero di entry del db da tenere in cache.
# Ad oggi (2010-04-20) nel database degli utenti ne abbiamo
cachesize       2000000

# dncachesize: indica il numero di dn da tenere in memoria.
# Il valore di 0 indica nessun limite
#dncachesize     2000000
dncachesize     0

# idlcachesize: index-data-lookup cache. Indica il numero di slot dell'indice tenuti in memoria
idlcachesize    2000000

# cachefree: indica quante entry alla volta liberare quando viene saturata
cachefree       10

# checkpoint ogni 1MB di scritte eseguite e/o 30 minuti di tempo
checkpoint      1024 30


[root@ldap01 db_utenti]# ls -lhtr *.bdb
-rw------- 1 ldap ldap  44K Apr 26 18:09 uid.bdb
-rw------- 1 ldap ldap  40K Apr 26 18:09 gidNumber.bdb
-rw------- 1 ldap ldap 8.0K Apr 27 09:07 uniqueMember.bdb
-rw------- 1 ldap ldap  28K Apr 27 12:32 uidNumber.bdb
-rw------- 1 ldap ldap  40K Apr 27 12:32 ou.bdb
-rw------- 1 ldap ldap  32K Apr 28 11:04 memberUid.bdb
-rw------- 1 ldap ldap  36K Apr 28 16:14 userPassword.bdb
-rw------- 1 ldap ldap 164K Apr 28 16:14 objectClass.bdb
-rw------- 1 ldap ldap  72K Apr 28 16:14 entryUUID.bdb
-rw------- 1 ldap ldap 260K Apr 28 16:14 dn2id.bdb
-rw------- 1 ldap ldap  32K Apr 28 16:14 cn.bdb
-rw------- 1 ldap ldap  64K Apr 28 17:34 entryCSN.bdb
-rw------- 1 ldap ldap 2.0M Apr 28 17:39 id2entry.bdb

The DB_CONFIG is this:

# Uso 256MB di cache splittato in 2 file
set_cachesize 0 268435456 2

# Transaction Log settings
set_lg_regionmax 262144
set_lg_bsize 2097152
# IMPOSTO LA DIR DEI TLOGS IN UN FileSystem DEDICATO
set_lg_dir logs

# PARAMETRI DI TUNING DEI LOCK
set_lk_max_objects 3000
set_lk_max_locks 3000
set_lk_max_lockers 3000

# IMPOSTO LA RIMOZIONE AUTOMATICA DEI LOG
set_flags DB_LOG_AUTOREMOVE

-------------------------------------------------------------------------------------

This is the accesslog db dir

database        hdb
suffix          "cn=log01,dc=csebo.it"
rootdn          "cn=Manager,cn=log01,dc=csebo.it"
rootpw          mySecret
directory       /srv/ldap/db_log
index           entryUUID    eq
index           reqStart     eq
index           reqEnd     eq
index           objectClass  eq
index           reqType      eq
index           reqDN        eq
index           reqAuthzID   eq
index           reqEntries   eq
index           reqAttr      eq
index           reqResult    eq
index           reqFilter    pres,eq
#index          reqEntries    eq
#index          reqSession    eq
#index          reqResult    eq
#index          reqScope    eq
#index          reqDerefAliases    eq
#index          reqAttrsOnly    eq
#index          reqFilter    eq,approx
#index          reqTimeLimit    eq,approx
#index          reqSizeLimit    eq,approx
limits          dn="cn=Manager,cn=log01,dc=csebo.it" size=unlimited

# cachesize: numero di entry del db da tenere in cache.
cachesize       150000

# dncachesize: indica il numero di dn da tenere in memoria.
# Il valore di 0 indica nessun limite
dncachesize     300000

# idlcachesize: index-data-lookup cache. Indica il numero di slot dell'indice tenuti in memoria
#    Per i backend bdb e' suggerito essere uguale a "cachesize"
#    Per i backend hdb e' suggerito essere uguale ad almeno 3 volte "cachesize"
idlcachesize    450000

# cachefree: indica quante entry alla volta liberare quando viene saturata
cachefree       10

# checkpoint ogni 4MB di scritte eseguite e/o 30 minuti di tempo
checkpoint      4096 30



[root@ldap01 db_log]# ls -lhtr
total 7.8G
drwx------ 2 ldap ldap  16K Feb 26 11:54 lost+found
lrwxrwxrwx 1 ldap ldap   26 Feb 26 12:11 logs -> ../transactionlogs/db_logs
-rw-r--r-- 1 ldap ldap 1.3K Apr 28 17:40 DB_CONFIG
-rw------- 1 ldap ldap 2.3M Apr 28 17:40 __db.004
-rw------- 1 ldap ldap 321M Apr 28 17:40 __db.003
-rw------- 1 ldap ldap  16M Apr 28 17:40 __db.002
-rw------- 1 ldap ldap  24K Apr 28 17:40 __db.001
-rw-r--r-- 1 ldap ldap 2.0K Apr 28 17:40 alock
-rw------- 1 ldap ldap  32K Apr 28 17:40 __db.006
-rw------- 1 ldap ldap 2.5M Apr 28 17:40 __db.005
-rw------- 1 ldap ldap 1.3M Apr 28 18:27 reqEntries.bdb
-rw------- 1 ldap ldap  21M Apr 28 18:27 reqFilter.bdb
-rw------- 1 ldap ldap  12M Apr 28 18:27 reqAttr.bdb
-rw------- 1 ldap ldap 1.7M Apr 28 18:27 reqType.bdb
-rw------- 1 ldap ldap 106M Apr 28 18:27 reqStart.bdb
-rw------- 1 ldap ldap 1.2M Apr 28 18:27 reqResult.bdb
-rw------- 1 ldap ldap 105M Apr 28 18:27 reqEnd.bdb
-rw------- 1 ldap ldap 6.3M Apr 28 18:27 reqDN.bdb
-rw------- 1 ldap ldap 5.7M Apr 28 18:27 reqAuthzID.bdb
-rw------- 1 ldap ldap 4.6M Apr 28 18:27 objectClass.bdb
-rw------- 1 ldap ldap 6.0G Apr 28 18:27 id2entry.bdb
-rw------- 1 ldap ldap 163M Apr 28 18:27 entryUUID.bdb
-rw------- 1 ldap ldap 1.1G Apr 28 18:27 dn2id.bdb

This is the DB_CONFIG file
# one 0.25 GB cache
set_cachesize 0 268435456 1

# Transaction Log settings
set_lg_regionmax 262144
set_lg_bsize 2097152
# IMPOSTO LA DIR DEI TLOGS IN UN FileSystem DEDICATO
set_lg_dir logs

# Dimensione dei log --> 60MB
set_lg_max 62914560
#set_lg_max 41943040

# PARAMETRI DI TUNING DEI LOCK
set_lk_max_objects 3000
set_lk_max_locks 3000
set_lk_max_lockers 3000


My system has 15 GB of RAM. This is an example of my RAM situation when the problem arises

[root@ldap01 db_log]# free -m
             total       used       free     shared    buffers     cached
Mem:         14799       8009       6790          0        924       3160
-/+ buffers/cache:       3924      10875
Swap:         6143          8       6135

top -H
  PID USER      PR  NI  VIRT  RES  SHR S %CPU %MEM    TIME+  SWAP DATA COMMAND
 9008 ldap      25   0 4806m 3.8g 346m S  0.0 26.5   0:10.69 880m 4.0g slapd2.4
 9151 ldap      15   0 4806m 3.8g 346m S  0.0 26.5   0:11.23 880m 4.0g slapd2.4
 9152 ldap      15   0 4806m 3.8g 346m S  0.0 26.5   2:37.42 880m 4.0g slapd2.4
 9153 ldap      15   0 4806m 3.8g 346m S  0.0 26.5   3:20.29 880m 4.0g slapd2.4
 9154 ldap      15   0 4806m 3.8g 346m S  0.3 26.5   3:02.49 880m 4.0g slapd2.4
 9155 ldap      16   0 4806m 3.8g 346m S  0.0 26.5   2:23.49 880m 4.0g slapd2.4
 9156 ldap      15   0 4806m 3.8g 346m S  0.0 26.5   2:14.86 880m 4.0g slapd2.4
 9157 ldap      15   0 4806m 3.8g 346m S  0.0 26.5   2:44.86 880m 4.0g slapd2.4
 9158 ldap      16   0 4806m 3.8g 346m S  0.0 26.5   2:03.71 880m 4.0g slapd2.4
 9163 ldap      16   0 4806m 3.8g 346m S  0.0 26.5   2:06.05 880m 4.0g slapd2.4
 9164 ldap      15   0 4806m 3.8g 346m S  0.0 26.5   2:46.38 880m 4.0g slapd2.4
 9165 ldap      15   0 4806m 3.8g 346m S  0.0 26.5   2:25.31 880m 4.0g slapd2.4
 9170 ldap      15   0 4806m 3.8g 346m S  0.0 26.5   2:12.04 880m 4.0g slapd2.4
 9171 ldap      15   0 4806m 3.8g 346m S  0.0 26.5   2:17.15 880m 4.0g slapd2.4
 9172 ldap      18   0 4806m 3.8g 346m R 98.1 26.5   2:06.54 880m 4.0g slapd2.4
 9173 ldap      15   0 4806m 3.8g 346m S  0.0 26.5   2:39.32 880m 4.0g slapd2.4
 9174 ldap      15   0 4806m 3.8g 346m S  0.0 26.5   2:25.64 880m 4.0g slapd2.4
 9175 ldap      15   0 4806m 3.8g 346m S  0.0 26.5   2:33.29 880m 4.0g slapd2.4

With dstat I can see that I *don't* have disk reading, only writings due to the accesslog
---procs--- ------memory-usage----- ---paging-- -dsk/total- ---system-- ----total-cpu-usage---- ------memory-usage-----
run blk new| used  buff  cach  free|  in   out | read  writ| int   csw |usr sys idl wai hiq siq| used  buff  cach  free
  2   0   2|3932M  926M 3178M 6764M|   0     0 |   0   559k| 246   445 | 24   0  75   0   0   0|3932M  926M 3178M 6764M
  1   0   0|3932M  926M 3178M 6764M|   0     0 |   0   464k| 200   333 | 24   0  76   0   0   0|3932M  926M 3178M 6764M
  1   0   2|3933M  926M 3178M 6764M|   0     0 |   0   408k| 213   309 | 24   1  75   0   0   0|3933M  926M 3178M 6764M
  3   0   0|3932M  926M 3178M 6764M|   0     0 |   0   420k| 205   280 | 23   0  75   1   0   0|3932M  926M 3178M 6764M
  3   0   0|3932M  926M 3178M 6764M|   0     0 |   0   376k| 210   349 | 23   0  77   0   0   0|3932M  926M 3178M 6764M
  1   0   0|3932M  926M 3178M 6764M|   0     0 |   0   900k| 233   373 | 24   0  75   0   0   0|3932M  926M 3178M 6764M
  1   0   0|3932M  926M 3178M 6764M|   0     0 |   0   356k| 208   446 | 22   1  77   0   0   0|3932M  926M 3178M 6764M
  1   0   0|3932M  926M 3178M 6764M|   0     0 |   0   440k| 213   389 | 22   0  76   1   0   0|3932M  926M 3178M 6764M
  1   0   0|3932M  926M 3178M 6764M|   0     0 |   0   403k| 200   306 | 23   1  75   1   0   0|3932M  926M 3178M 6764M
  3   0   4|3932M  926M 3178M 6764M|   0     0 |   0   648k| 196   365 | 24   1  74   1   0   0|3932M  926M 3178M 6764M
  2   0   0|3932M  926M 3178M 6764M|   0     0 |   0   572k| 246   287 | 22   2  75   0   0   0|3932M  926M 3178M 6764M
  1   0   2|3932M  926M 3179M 6763M|   0     0 |   0   552k| 197   256 | 25   1  73   1   0   0|3932M  926M 3179M 6763M
  2   0   0|3932M  926M 3179M 6763M|   0     0 |   0   368k| 210   259 | 23   1  75   0   0   0|3932M  926M 3179M 6763M
  2   0   0|3932M  926M 3179M 6763M|   0     0 |   0  4228k| 203   272 | 24   1  73   2   0   0|3932M  926M 3179M 6763M
  2   0   0|3932M  926M 3179M 6763M|   0     0 |   0   440k| 218   346 | 23   1  76   0   0   0|3932M  926M 3179M 6763M
  4   0   0|3932M  926M 3179M 6763M|   0     0 |   0   499k| 215   359 | 24   0  75   1   0   0|3932M  926M 3179M 6763M
  2   0   0|3932M  926M 3179M 6763M|   0     0 |   0   348k| 215   313 | 25   0  74   1   0   0|3932M  926M 3179M 6763M
  1   0   0|3932M  926M 3179M 6763M|   0     0 |   0   360k| 166   245 | 24   0  75   1   0   0|3932M  926M 3179M 6763M
  2   0   7|3933M  926M 3179M 6762M|   0     0 |   0   432k| 225   393 | 25   3  71   0   0   0|3933M  926M 3179M 6762M
  2   0   0|3933M  926M 3179M 6762M|   0     0 |   0   508k| 206   353 | 24   1  75   0   0   0|3933M  926M 3179M 6762M
  1   0   2|3933M  926M 3179M 6762M|   0     0 |   0   716k| 254   342 | 25   1  75   0   0   0|3933M  926M 3179M 6762M
  2   0   0|3933M  926M 3179M 6762M|   0     0 |   0   436k| 252   361 | 24   0  75   0   0   0|3933M  926M 3179M 6762M
  1   0   0|3933M  926M 3179M 6762M|   0     0 |   0   456k| 226   359 | 24   0  75   1   0   0|3933M  926M 3179M 6762M


I have also password_policy with forwarding of operational attributes and auditlog overlay.
Thanks in advance
Marco


--
_________________________________________
Non è forte chi non cade, ma chi cadendo ha la forza di rialzarsi.
Jim Morrison