Here the test results (run on a iMac 2.7 GHz Intel Core i5)
lmdb improved
[1000000] iterations (begin, cursor_open, cursor_close, abort) with [10] databases in [0.41516] seconds [1000000] iterations (begin, cursor_open, cursor_close, abort) with [100] databases in [0.35304] seconds [1000000] iterations (begin, cursor_open, cursor_close, abort) with [1000] databases in [0.49425] seconds [1000000] iterations (begin, cursor_open, cursor_close, abort) with [10000] databases in [2.23236] seconds [1000000] iterations (begin, cursor_open, cursor_close, abort) with [100000] databases in [15.28527] seconds
lmdb original
[1000000] iterations (begin, cursor_open, cursor_close, abort) with [10] databases in [0.35039] seconds [1000000] iterations (begin, cursor_open, cursor_close, abort) with [100] databases in [0.65547] seconds [1000000] iterations (begin, cursor_open, cursor_close, abort) with [1000] databases in [5.48897] seconds [1000000] iterations (begin, cursor_open, cursor_close, abort) with [10000] databases in [67.13091] seconds [1000000] iterations (begin, cursor_open, cursor_close, abort) with [100000] databases in [781.53778] seconds
As expected with small number of databases the original lmdb is slightly faster but the improved handling quickly outperforms the original implementation.
Test code:
#include "lmdb.h" #include <stdio.h> #include <stdlib.h> #include <time.h> #include <unistd.h> #include <sys/time.h> #include <mach/clock.h> #include <mach/mach.h>
static char *env_name = "/Developer/tmp/testdb";
#define MAX_MAP_SIZ (1024 * 1024 * 100)
#define NUM_ITERATIPONS (1000 * 1000)
#define E(expr) CHECK((rc = (expr)) == MDB_SUCCESS, #expr) #define RES(err, expr) ((rc = expr) == (err) || (CHECK(!rc, #expr), 0)) #define CHECK(test, msg) ((test) ? (void)0 : ((void)fprintf(stderr, \ "%s:%d: %s: %s\n", __FILE__, __LINE__, msg, mdb_strerror(rc)), abort()))
static MDB_env *env; static MDB_dbi main_dbi;
static MDB_dbi numDbs = 0; static MDB_dbi *dbi;
void setup(unsigned int dbNum) { int rc;
E(mdb_env_create(&env)); E(mdb_env_set_maxreaders(env, 1)); E(mdb_env_set_maxdbs(env, dbNum)); E(mdb_env_set_mapsize(env, MAX_MAP_SIZ)); E(mdb_env_open(env, env_name, 0, 0664));
numDbs = dbNum; dbi = malloc(sizeof(MDB_dbi) * numDbs); MDB_txn *txn;
E(mdb_txn_begin(env, NULL, 0, &txn)); E(mdb_dbi_open(txn, NULL, 0, &main_dbi));
for (unsigned int i = 0; i < numDbs; i++) { char name[16];
sprintf(name, "%03x", i);
E(mdb_dbi_open(txn, name, MDB_CREATE, &dbi[i])); }
E(mdb_txn_commit(txn)); }
void cleanup() { mdb_env_close(env);
char name[1024];
sprintf(name, "%s/data.mdb", env_name); unlink(name); sprintf(name, "%s/lock.mdb", env_name); unlink(name); }
struct timespec get_time() { struct timespec ts;
clock_serv_t cclock; mach_timespec_t mts; host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock); clock_get_time(cclock, &mts); mach_port_deallocate(mach_task_self(), cclock); ts.tv_sec = mts.tv_sec; ts.tv_nsec = mts.tv_nsec;
return ts; }
void test(unsigned int num_iterations) { int rc; MDB_txn *txn; MDB_cursor *cursor;
struct timespec ts = get_time();
for (unsigned int i = 0; i < num_iterations; i++) { E(mdb_txn_begin(env, NULL, MDB_RDONLY, &txn));
E(mdb_cursor_open(txn, dbi[0], &cursor));
mdb_cursor_close(cursor); mdb_txn_abort(txn); }
struct timespec te = get_time();
printf("[%d] iterations (begin, cursor_open, cursor_close, abort) with [%d] databases in [%.5f] seconds\n\n", num_iterations, numDbs, ((double)te.tv_sec + 1.0e-9*te.tv_nsec) - ((double)ts.tv_sec + 1.0e-9*ts.tv_nsec)); }
int main(int argc,char * argv[]) { setup(10); test(1000 * 1000); cleanup();
setup(100); test(1000 * 1000); cleanup();
setup(1000); test(1000 * 1000); cleanup();
setup(10000); test(1000 * 1000); cleanup();
setup(100000); test(1000 * 1000); cleanup();
return 0; }
On 27/05/16 07:37, "Howard Chu" hyc@symas.com wrote:
Jürg Bircher wrote:
Hello
Improved handling for large number of databases
If interested let me know how to contribute.
Looks interesting, yes. I assume you have profiled the code before and after the suggested changes, please provide your profiling results.
Please read the Developer Guidelines. http://www.openldap.org/devel/contributing.html
Access newly opened database from another transaction
Sounds like an oddball case. Applications should open all their DBIs from a single thread and not start any other threads/transactions until all setup is completed.
Hope it is useful!
Thanks.
-- -- Howard Chu CTO, Symas Corp. http://www.symas.com Director, Highland Sun http://highlandsun.com/hyc/ Chief Architect, OpenLDAP http://www.openldap.org/project/