This is a multi-part message in MIME format. --------------000000040604020102000407 Content-Type: text/plain; charset=windows-1251; format=flowed Content-Transfer-Encoding: 7bit
The attached files is derived from OpenLDAP Software. All of the modifications to OpenLDAP Software represented in the following patch(es) were developed by Peter-Service LLC, Moscow, Russia. Peter-Service LLC has not assigned rights and/or interest in this work to any party. I, Leonid Yuriev am authorized by Peter-Service LLC, my employer, to release this work under the following terms.
Peter-Service LLC hereby places the following modifications to OpenLDAP Software (and only these modifications) into the public domain. Hence, these modifications may be freely used and/or redistributed for any purpose with or without attribution and/or other notice.
--------------000000040604020102000407 Content-Type: text/x-patch; name="0001-lmdb-ITS-7974-oomkiller-feature.patch" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="0001-lmdb-ITS-7974-oomkiller-feature.patch"
From 85fce95eaa0e71ee43625ccc202c173f7d4acb4a Mon Sep 17 00:00:00 2001
From: Leo Yuriev leo@yuriev.ru Date: Tue, 21 Oct 2014 19:25:32 +0400 Subject: [PATCH 1/2] lmdb: ITS#7974 oomkiller feature.
--- libraries/liblmdb/lmdb.h | 34 +++++++++++++++++ libraries/liblmdb/mdb.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 126 insertions(+), 3 deletions(-)
diff --git a/libraries/liblmdb/lmdb.h b/libraries/liblmdb/lmdb.h index bdbb0b9..a3ca62e 100644 --- a/libraries/liblmdb/lmdb.h +++ b/libraries/liblmdb/lmdb.h @@ -1537,6 +1537,40 @@ int mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx); * @return 0 on success, non-zero on failure. */ int mdb_reader_check(MDB_env *env, int *dead); + + /** @brief A callback function for killing a laggard readers, + * called in case of MDB_MAP_FULL error. + * + * @param[in] env An environment handle returned by #mdb_env_create(). + * @param[in] pid pid of the reader process. + * @param[in] thread_id thread_id of the reader thread. + * @param[in] txn Transaction number on which stalled. + * @return -1 on failure (reader is not killed), + * 0 on a race condition (no such reader), + * 1 on success (reader was killed), + * >1 on success (reader was SURE killed). + */ +typedef int (MDB_oomkiller_func)(MDB_env *env, int pid, void* thread_id, size_t txn); + + /** @brief Set the oomkiller callback. + * + * Callback will be called only on out-of-pages case for killing + * a laggard readers to allowing reclaiming of freeDB. + * + * @param[in] env An environment handle returned by #mdb_env_create(). + * @param[in] oomkiller A #MDB_oomkiller_func function or NULL to disable. + */ +void mdb_env_set_oomkiller(MDB_env *env, MDB_oomkiller_func *oomkiller); + + /** @brief Get the current oomkiller callback. + * + * Callback will be called only on out-of-pages case for killing + * a laggard readers to allowing reclaiming of freeDB. + * + * @param[in] env An environment handle returned by #mdb_env_create(). + * @return A #MDB_oomkiller_func function or NULL if disabled. + */ +MDB_oomkiller_func* mdb_env_get_oomkiller(MDB_env *env); /** @} */
#ifdef __cplusplus diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 6cc3433..e60d83d 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -1145,6 +1145,7 @@ struct MDB_env { #endif void *me_userctx; /**< User-settable context */ MDB_assert_func *me_assert_func; /**< Callback for assertion failures */ + MDB_oomkiller_func *me_oomkiller; /**< Callback for killing laggard readers */ };
/** Nested transaction */ @@ -1900,6 +1901,77 @@ mdb_find_oldest(MDB_txn *txn) return oldest; }
+static txnid_t +mdb_laggard_reader(MDB_env *env, int *laggard) +{ + txnid_t tail = 0; + if (laggard) + *laggard = -1; + if (env->me_txns->mti_txnid > 1) { + int i; + MDB_reader *r = env->me_txns->mti_readers; + + tail = env->me_txns->mti_txnid - 1; + for (i = env->me_txns->mti_numreaders; --i >= 0; ) { + if (r[i].mr_pid) { + txnid_t mr = r[i].mr_txnid; + if (tail > mr) { + tail = mr; + if (laggard) + *laggard = i; + } + } + } + } + + return tail; +} + +static int +mdb_oomkill_laggard(MDB_env *env) +{ + int dead, idx; + txnid_t tail = mdb_laggard_reader(env, &idx); + if (idx < 0) + return 0; + + for(;;) { + MDB_reader *r; + MDB_THR_T tid; + pid_t pid; + int rc; + + if (mdb_reader_check(env, &dead)) + break; + + if (dead && tail < mdb_laggard_reader(env, NULL)) + return 1; + + if (!env->me_oomkiller) + break; + + r = &env->me_txns->mti_readers[ idx ]; + pid = r->mr_pid; + tid = r->mr_tid; + if (r->mr_txnid != tail || pid <= 0) + continue; + + rc = env->me_oomkiller(env, pid, (void*) tid, tail); + if (rc < 0) + break; + + if (rc) { + r->mr_txnid = (txnid_t)-1; + if (rc > 1) { + r->mr_tid = 0; + r->mr_pid = 0; + } + } + } + + return tail < mdb_laggard_reader(env, NULL); +} + /** Add a page to the txn's dirty list */ static void mdb_page_dirty(MDB_txn *txn, MDB_page *mp) @@ -1978,6 +2050,7 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp) goto fail; }
+oomkill_retry:; for (op = MDB_FIRST;; op = MDB_NEXT) { MDB_val key, data; MDB_node *leaf; @@ -2073,9 +2146,11 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp) i = 0; pgno = txn->mt_next_pgno; if (pgno + num >= env->me_maxpg) { - DPUTS("DB size maxed out"); - rc = MDB_MAP_FULL; - goto fail; + DPUTS("DB size maxed out"); + if (mdb_oomkill_laggard(env)) + goto oomkill_retry; + rc = MDB_MAP_FULL; + goto fail; }
search_done: @@ -9403,4 +9478,18 @@ mdb_reader_check(MDB_env *env, int *dead) *dead = count; return MDB_SUCCESS; } + +void +mdb_env_set_oomkiller(MDB_env *env, MDB_oomkiller_func *oomkiller) +{ + if (env) + env->me_oomkiller = oomkiller; +} + +MDB_oomkiller_func* +mdb_env_get_oomkiller(MDB_env *env) +{ + return env ? env->me_oomkiller : NULL; +} + /** @} */