--Apple-Mail-4--750948447 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset=US-ASCII; delsp=yes; format=flowed
On Aug 23, 2007, at 2:56 PM, Howard Chu wrote: [ ... re attribute value matching ... ]
The only way to do this right, without dying on an O(n^2) algorithm, is to sort both arrays first and then walk through them in order as the original code did. If you want to submit such a patch, please do. I don't have a lot of time to focus on this at the moment.
I did look at this, and the attached patch is what I came up with, relative to HEAD 1.356. I have tested it, but unfortunately the results are confounded by other problems - a crash (syncprov thread queue entry so->s_qtask wasn't in slapd_rq, and some pointer values in the former structure were 0x30303030), and in the end the replica was missing a lot of attribute deletions. Wouldn't think either was related to attr_cmp(), but I am out of time to look at these problems and keep trying to demonstrate a 100% successful syncrepl replication.
I didn't sort the arrays. I tried that (quicksort algorithm), and it was very expensive. If there's reason to hope that the old and new values will be mostly in the same order, then it is much cheaper in this case to start with a simple pairwise walk through, as in the older code, and then clean up with a full O(n^2) match on what's left. The implementation is also simpler.
Actual performance depends on the data, but with the data I used for tests of the algorithms, I found that this approach outperforms the original 2.4.4 implementation, in terms of bvmatch counts. That's because the 2.4.4 needed to look ahead through the rest of the value array in a couple of places in the loop. Performance will be worst where many values are changed, i.e., many adds and deletes - many adds and few deletes, or vice versa, will be significantly faster. I think this is good for most real world situations.
I restored variable names from 2.4.4 because they returned to their original usage, likewise with the sml_values handling, and dedicated a variable to the modtail logic at the end of attr_cmp().
I haven't looked into it, but happened to notice just this afternoon a comment in dn_callback(): "We assume that attributes are saved in the same order in the remote and local databases. So ..."
Donn Cave, donn@u.washington.edu
--Apple-Mail-4--750948447 Content-Transfer-Encoding: 7bit Content-Type: application/octet-stream; x-unix-mode=0644; name=syncrepl.c Content-Disposition: attachment; filename=syncrepl.c
--- servers/slapd/syncrepl.c.orig 2007-09-18 15:10:52.000000000 -0700 +++ servers/slapd/syncrepl.c 2007-09-18 15:57:33.000000000 -0700 @@ -2673,17 +2673,98 @@ return rc; }
+/* Remove null elements. */ +static int +nonulls( struct berval **a, int na ) +{ + int i, n; + for ( i = n = 0; i < na; ++i ) { + if ( a[i] ) { + if ( i > n ) + a[n] = a[i]; + ++n; + } + } + return n; +} + +/* +** Secondary difference: remove any remaining matching elements, +** by brute force M*N comparison. +*/ +static void +ixpurge( struct berval **iva, int niva, int *nivap, + struct berval **ivb, int nivb, int *nivbp ) +{ + int ia, ib; + for ( ia = 0; ia < niva; ++ia ) { + for ( ib = 0; ib < nivb; ++ib ) { + if ( ivb[ib] && bvmatch( iva[ia], ivb[ib] ) ) { + iva[ia] = 0; + ivb[ib] = 0; + break; + } + } + } + *nivap = nonulls( iva, niva ); + *nivbp = nonulls( ivb, nivb ); +} + +/* +** Compare two lists of berval strings that are supposed to be in +** similar order, and return list of indices to old (delete) and +** new (add) elements of the respective lists. If of significantly +** different sizes, the smaller should be first for optimal performance. +*/ +static void +diff1( + struct berval *bva, + int na, + struct berval *bvb, + int nb, + struct berval **dva, + int *ndvap, + struct berval **dvb, + int *ndvbp +) +{ + int ia, ib, ib0, ndva, ndvb, ndvar, ndvbr; + ndva = ndvb = 0; + for ( ia = ib0 = 0; ia < na; ++ia ) { + /* Find a/b match, starting from last b anchor. */ + for ( ib = ib0; ib < nb && !bvmatch( &bva[ia], &bvb[ib] ); ++ib ) + ; + if ( ib < nb ) { + /* Found. Record non-matching b's, reset anchor. */ + for ( ; ib0 < ib; ++ib0 ) + dvb[ndvb++] = &bvb[ib0]; + ++ib0; + } else { + /* Not found, record non-matching a. */ + dva[ndva++] = &bva[ia]; + } + } + /* Record remaining unmatched b's. */ + for ( ; ib0 < nb; ++ib0 ) + dvb[ndvb++] = &bvb[ib0]; + /* Clean up spurious non-matches, in case order assumptions invalid. */ + ixpurge( dva, ndva, &ndvar, dvb, ndvb, &ndvbr ); + *ndvap = ndvar; + *ndvbp = ndvbr; +} + static void attr_cmp( Operation *op, Attribute *old, Attribute *new, Modifications ***mret, Modifications ***mcur ) { - int i, j; + int tails; + int i; Modifications *mod, **modtail;
modtail = *mret;
if ( old ) { - int n, o, nn, no; + int n, o, a, d; struct berval **adds, **dels; /* count old and new */ for ( o=0; old->a_vals[o].bv_val; o++ ) ; @@ -2692,105 +2773,83 @@ /* there MUST be both old and new values */ assert( o != 0 ); assert( n != 0 ); - j = 0;
adds = op->o_tmpalloc( sizeof(struct berval *) * n, op->o_tmpmemctx ); dels = op->o_tmpalloc( sizeof(struct berval *) * o, op->o_tmpmemctx );
- for ( i=0; i<o; i++ ) dels[i] = &old->a_vals[i]; - for ( i=0; i<n; i++ ) adds[i] = &new->a_vals[i]; - - nn = n; no = o; - - for ( i=0; i<o; i++ ) { - for ( j=0; j<n; j++ ) { - if ( !adds[j] ) - continue; - if ( bvmatch( dels[i], adds[j] ) ) { - no--; - nn--; - adds[j] = NULL; - dels[i] = NULL; - break; - } - } - } + if ( o > n ) + diff1( new->a_vals, n, old->a_vals, o, adds, &a, dels, &d ); + else + diff1( old->a_vals, o, new->a_vals, n, dels, &d, adds, &a ); + tails = 0;
- i = j; /* all old values were deleted, just use the replace op */ - if ( no == o ) { - i = j-1; - } else if ( no ) { + if ( d == o ) { + tails = 1; + } else if ( d ) { /* delete some values */ mod = ch_malloc( sizeof( Modifications ) ); mod->sml_op = LDAP_MOD_DELETE; mod->sml_flags = 0; mod->sml_desc = old->a_desc; mod->sml_type = mod->sml_desc->ad_cname; - mod->sml_values = ch_malloc( ( no + 1 ) * sizeof(struct berval) ); + mod->sml_values = ch_malloc( ( d + 1 ) * sizeof(struct berval) ); if ( old->a_vals != old->a_nvals ) { - mod->sml_nvalues = ch_malloc( ( no + 1 ) * sizeof(struct berval) ); + mod->sml_nvalues = ch_malloc( ( d + 1 ) * sizeof(struct berval) ); } else { mod->sml_nvalues = NULL; } - j = 0; - for ( i = 0; i < o; i++ ) { - if ( !dels[i] ) continue; - ber_dupbv( &mod->sml_values[j], &old->a_vals[i] ); + for ( i = 0; i < d; i++ ) { + ber_dupbv( &mod->sml_values[i], dels[i] ); if ( mod->sml_nvalues ) { - ber_dupbv( &mod->sml_nvalues[j], &old->a_nvals[i] ); + ber_dupbv( &mod->sml_nvalues[i], dels[i] ); } - j++; } - BER_BVZERO( &mod->sml_values[j] ); + BER_BVZERO( &mod->sml_values[i] ); if ( mod->sml_nvalues ) { - BER_BVZERO( &mod->sml_nvalues[j] ); + BER_BVZERO( &mod->sml_nvalues[i] ); } *modtail = mod; modtail = &mod->sml_next; - i = j; + tails = 0; } op->o_tmpfree( dels, op->o_tmpmemctx ); /* some values were added */ - if ( nn && no < o ) { + if ( a && d < o ) { mod = ch_malloc( sizeof( Modifications ) ); mod->sml_op = LDAP_MOD_ADD; mod->sml_flags = 0; mod->sml_desc = old->a_desc; mod->sml_type = mod->sml_desc->ad_cname; - mod->sml_values = ch_malloc( ( nn + 1 ) * sizeof(struct berval) ); + mod->sml_values = ch_malloc( ( a + 1 ) * sizeof(struct berval) ); if ( old->a_vals != old->a_nvals ) { - mod->sml_nvalues = ch_malloc( ( nn + 1 ) * sizeof(struct berval) ); + mod->sml_nvalues = ch_malloc( ( a + 1 ) * sizeof(struct berval) ); } else { mod->sml_nvalues = NULL; } - j = 0; - for ( i = 0; i < n; i++ ) { - if ( !adds[i] ) continue; - ber_dupbv( &mod->sml_values[j], &new->a_vals[i] ); + for ( i = 0; i < a; i++ ) { + ber_dupbv( &mod->sml_values[i], adds[i] ); if ( mod->sml_nvalues ) { - ber_dupbv( &mod->sml_nvalues[j], &new->a_nvals[i] ); + ber_dupbv( &mod->sml_nvalues[i], adds[i] ); } - j++; } - BER_BVZERO( &mod->sml_values[j] ); + BER_BVZERO( &mod->sml_values[i] ); if ( mod->sml_nvalues ) { - BER_BVZERO( &mod->sml_nvalues[j] ); + BER_BVZERO( &mod->sml_nvalues[i] ); } *modtail = mod; modtail = &mod->sml_next; - i = j; + tails = 0; } op->o_tmpfree( adds, op->o_tmpmemctx ); } else { /* new attr, just use the new mod */ - i = 0; - j = 1; + tails = 1; } /* advance to next element */ mod = **mcur; if ( mod ) { - if ( i != j ) { + if ( tails ) { **mcur = mod->sml_next; *modtail = mod; modtail = &mod->sml_next;
--Apple-Mail-4--750948447--