Re: Improved handling for large number of databases / Access newly opened database from another transaction

27 May 2016


      Here the test results (run on a iMac 2.7 GHz Intel Core i5)
lmdb improved
[1000000] iterations (begin, cursor_open, cursor_close, abort) with [10] databases in [0.41516] seconds
[1000000] iterations (begin, cursor_open, cursor_close, abort) with [100] databases in [0.35304] seconds
[1000000] iterations (begin, cursor_open, cursor_close, abort) with [1000] databases in [0.49425] seconds
[1000000] iterations (begin, cursor_open, cursor_close, abort) with [10000] databases in [2.23236] seconds
[1000000] iterations (begin, cursor_open, cursor_close, abort) with [100000] databases in [15.28527] seconds
lmdb original
[1000000] iterations (begin, cursor_open, cursor_close, abort) with [10] databases in [0.35039] seconds
[1000000] iterations (begin, cursor_open, cursor_close, abort) with [100] databases in [0.65547] seconds
[1000000] iterations (begin, cursor_open, cursor_close, abort) with [1000] databases in [5.48897] seconds
[1000000] iterations (begin, cursor_open, cursor_close, abort) with [10000] databases in [67.13091] seconds
[1000000] iterations (begin, cursor_open, cursor_close, abort) with [100000] databases in [781.53778] seconds
As expected with small number of databases the original lmdb is slightly faster but the improved handling quickly outperforms the original implementation.
Test code:
#include "lmdb.h"
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <unistd.h>
#include <sys/time.h>
#include <mach/clock.h>
#include <mach/mach.h>
static char *env_name = "/Developer/tmp/testdb";
#define MAX_MAP_SIZ     (1024 * 1024 * 100)
#define NUM_ITERATIPONS (1000 * 1000)
#define E(expr) CHECK((rc = (expr)) == MDB_SUCCESS, #expr)
#define RES(err, expr) ((rc = expr) == (err) || (CHECK(!rc, #expr), 0))
#define CHECK(test, msg) ((test) ? (void)0 : ((void)fprintf(stderr, \
"%s:%d: %s: %s\n", __FILE__, __LINE__, msg, mdb_strerror(rc)), abort()))
static MDB_env *env;
static MDB_dbi main_dbi;
static MDB_dbi numDbs = 0;
static MDB_dbi *dbi;
void setup(unsigned int dbNum) {
    int rc;
E(mdb_env_create(&env));
    E(mdb_env_set_maxreaders(env, 1));
    E(mdb_env_set_maxdbs(env, dbNum));
    E(mdb_env_set_mapsize(env, MAX_MAP_SIZ));
    E(mdb_env_open(env, env_name, 0, 0664));
numDbs = dbNum;
    dbi = malloc(sizeof(MDB_dbi) * numDbs);
    MDB_txn *txn;
E(mdb_txn_begin(env, NULL, 0, &txn));
    E(mdb_dbi_open(txn, NULL, 0, &main_dbi));
for (unsigned int i = 0; i < numDbs; i++) {
        char name[16];
sprintf(name, "%03x", i);
E(mdb_dbi_open(txn, name, MDB_CREATE, &dbi[i]));
    }
E(mdb_txn_commit(txn));
}
void cleanup() {
    mdb_env_close(env);
char name[1024];
sprintf(name, "%s/data.mdb", env_name);
    unlink(name);
    sprintf(name, "%s/lock.mdb", env_name);
    unlink(name);
}
struct timespec get_time() {
    struct timespec ts;
clock_serv_t cclock;
    mach_timespec_t mts;
    host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock);
    clock_get_time(cclock, &mts);
    mach_port_deallocate(mach_task_self(), cclock);
    ts.tv_sec = mts.tv_sec;
    ts.tv_nsec = mts.tv_nsec;
return ts;
}
void test(unsigned int num_iterations) {
   int rc;
   MDB_txn *txn;
   MDB_cursor *cursor;
struct timespec ts = get_time();
for (unsigned int i = 0; i < num_iterations; i++) {
      E(mdb_txn_begin(env, NULL, MDB_RDONLY, &txn));
E(mdb_cursor_open(txn, dbi[0], &cursor));
mdb_cursor_close(cursor);
      mdb_txn_abort(txn);
   }
struct timespec te = get_time();
printf("[%d] iterations (begin, cursor_open, cursor_close, abort) with [%d] databases in [%.5f] seconds\n\n",
     num_iterations,
     numDbs,
     ((double)te.tv_sec + 1.0e-9*te.tv_nsec) -
     ((double)ts.tv_sec + 1.0e-9*ts.tv_nsec));
}
int main(int argc,char * argv[]) {
    setup(10);
    test(1000 * 1000);
    cleanup();
setup(100);
    test(1000 * 1000);
    cleanup();
setup(1000);
    test(1000 * 1000);
    cleanup();
setup(10000);
    test(1000 * 1000);
    cleanup();
setup(100000);
    test(1000 * 1000);
    cleanup();
return 0;
}
On 27/05/16 07:37, "Howard Chu" hyc@symas.com wrote:
...
Jürg Bircher wrote:
...
Hello
Improved handling for large number of databases
...
If interested let me know how to contribute.
Looks interesting, yes. I assume you have profiled the code before and after 
the suggested changes, please provide your profiling results.
Please read the Developer Guidelines.
 http://www.openldap.org/devel/contributing.html
...
Access newly opened database from another transaction
Sounds like an oddball case. Applications should open all their DBIs from a 
single thread and not start any other threads/transactions until all setup is 
completed.
...
Hope it is useful!
Thanks.
-- 
  -- Howard Chu
  CTO, Symas Corp.           http://www.symas.com
  Director, Highland Sun     http://highlandsun.com/hyc/
  Chief Architect, OpenLDAP  http://www.openldap.org/project/

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

2008

2007

Re: Improved handling for large number of databases / Access newly opened database from another transaction

Improved handling for large number of databases

Access newly opened database from another transaction