From 540ea316e77eb38f09a74b07365964c2a1161d8e Mon Sep 17 00:00:00 2001
From: Yannick Lecaillez <yannick.lecaillez@forgerock.com>
Date: Tue, 31 Mar 2015 16:02:26 +0000
Subject: [PATCH] OPENDJ-1199: Reduce memory/disk usage of JE backend (variable length encoding for EntryIDSet)
---
opendj-server-legacy/src/main/java/org/opends/server/backends/pluggable/EntryIDSet.java | 284 ++++++++++++++++++++++++++++++++++++--------------------
1 files changed, 184 insertions(+), 100 deletions(-)
diff --git a/opendj-server-legacy/src/main/java/org/opends/server/backends/pluggable/EntryIDSet.java b/opendj-server-legacy/src/main/java/org/opends/server/backends/pluggable/EntryIDSet.java
index bcd968d..a65b061 100644
--- a/opendj-server-legacy/src/main/java/org/opends/server/backends/pluggable/EntryIDSet.java
+++ b/opendj-server-legacy/src/main/java/org/opends/server/backends/pluggable/EntryIDSet.java
@@ -48,6 +48,9 @@
@SuppressWarnings("javadoc")
final class EntryIDSet implements Iterable<EntryID>
{
+ public static final EntryIDSetCodec CODEC_V1 = new EntryIDSetCodecV1();
+ public static final EntryIDSetCodec CODEC_V2 = new EntryIDSetCodecV2();
+
private static final ByteSequence NO_KEY = ByteString.valueOf("<none>");
private static final long[] EMPTY_LONG_ARRAY = new long[0];
private static final long[] NO_ENTRY_IDS_RANGE = new long[] { 0, 0 };
@@ -61,8 +64,6 @@
boolean isDefined();
- ByteString toByteString();
-
long[] getRange();
long[] getIDs();
@@ -84,6 +85,20 @@
}
/**
+ * Define serialization contract for EntryIDSet
+ */
+ interface EntryIDSetCodec {
+
+ static final int INT_SIZE = 4;
+
+ static final int LONG_SIZE = 8;
+
+ ByteString encode(EntryIDSet idSet);
+
+ EntryIDSet decode(ByteSequence key, ByteString value);
+ }
+
+ /**
* Concrete implements representing a set of EntryIDs, sorted in ascending order.
*/
private static final class DefinedImpl implements EntryIDSetImplementor
@@ -117,17 +132,6 @@
}
@Override
- public ByteString toByteString()
- {
- final ByteStringBuilder builder = new ByteStringBuilder(8 * entryIDs.length);
- for (long value : entryIDs)
- {
- builder.append(value);
- }
- return builder.toByteString();
- }
-
- @Override
public boolean add(EntryID entryID)
{
long id = entryID.longValue();
@@ -198,7 +202,7 @@
if (entryIDs.length == 0)
{
- entryIDs = Arrays.copyOf(anotherEntryIDSet.getIDs(), anotherEntryIDSet.getIDs().length);
+ entryIDs = anotherEntryIDSet.getIDs();
return;
}
@@ -347,13 +351,6 @@
}
@Override
- public ByteString toByteString()
- {
- // Set top bit.
- return ByteString.valueOf(undefinedSize | Long.MIN_VALUE);
- }
-
- @Override
public boolean add(EntryID entryID)
{
if (maintainUndefinedSize())
@@ -466,6 +463,172 @@
}
}
+ /**
+ * Legacy EntryIDSet codec implementation
+ */
+ private static final class EntryIDSetCodecV1 implements EntryIDSetCodec
+ {
+ @Override
+ public ByteString encode(EntryIDSet idSet)
+ {
+ return ByteString.wrap(append(new ByteStringBuilder(getEstimatedSize(idSet)), idSet).trimToSize()
+ .getBackingArray());
+ }
+
+ @Override
+ public EntryIDSet decode(ByteSequence key, ByteString value)
+ {
+ checkNotNull(key, "key must not be null");
+ checkNotNull(value, "value must not be null");
+
+ if (value.isEmpty())
+ {
+ // Entry limit has exceeded and there is no encoded undefined set size.
+ return newDefinedSet();
+ }
+ else if ((value.byteAt(0) & 0x80) == 0x80)
+ {
+ // Entry limit has exceeded and there is an encoded undefined set size.
+ return newUndefinedSetWithSize(key, decodeUndefinedSize(value));
+ }
+ else
+ {
+ // Seems like entry limit has not been exceeded and the bytes is a list of entry IDs.
+ return newDefinedSet(decodeRaw(value.asReader(), value.length() / LONG_SIZE));
+ }
+ }
+
+ private int getEstimatedSize(EntryIDSet idSet)
+ {
+ if (idSet.isDefined())
+ {
+ return idSet.getIDs().length * LONG_SIZE;
+ }
+ else
+ {
+ return LONG_SIZE;
+ }
+ }
+
+ private long[] decodeRaw(ByteSequenceReader reader, int nbEntriesToDecode)
+ {
+ checkNotNull(reader, "builder must not be null");
+ Reject.ifFalse(nbEntriesToDecode >= 0, "nbEntriesToDecode must be >= 0");
+
+ final long ids[] = new long[nbEntriesToDecode];
+ for(int i = 0 ; i < nbEntriesToDecode ; i++) {
+ ids[i] = reader.getLong();
+ }
+ return ids;
+ }
+
+ private ByteStringBuilder append(ByteStringBuilder builder, EntryIDSet idSet)
+ {
+ checkNotNull(idSet, "idSet must not be null");
+ checkNotNull(builder, "builder must not be null");
+
+ if (idSet.isDefined())
+ {
+ for (long value : idSet.getIDs())
+ {
+ builder.append(value);
+ }
+ return builder;
+ }
+ else
+ {
+ // Set top bit.
+ return builder.append(idSet.size() | Long.MIN_VALUE);
+ }
+ }
+
+ private static long decodeUndefinedSize(ByteSequence bytes)
+ {
+ // remove top bit
+ return bytes.length() == LONG_SIZE ? bytes.asReader().getLong() & Long.MAX_VALUE : Long.MAX_VALUE;
+ }
+ }
+
+ /**
+ * Compacted EntryIDSet codec implementation. Idea is to take advantages of
+ * org.forgerock.opendj.ldap.ByteStringBuilder#appendCompact() able to write small values of long in fewer bytes.
+ * Rather than storing the full list of IDs, we store only the difference of the Nth ID with the N-1th one in the hope
+ * that the result will be small enough to be compacted by appendCompact().
+ */
+ private static final class EntryIDSetCodecV2 implements EntryIDSetCodec
+ {
+ private static final byte UNDEFINED_SET = (byte) 0xFF;
+
+ @Override
+ public ByteString encode(EntryIDSet idSet)
+ {
+ checkNotNull(idSet, "idSet must not be null");
+ ByteStringBuilder builder = new ByteStringBuilder(getEstimatedSize(idSet));
+ return append(builder, idSet).toByteString();
+ }
+
+ @Override
+ public EntryIDSet decode(ByteSequence key, ByteString value)
+ {
+ checkNotNull(key, "key must not be null");
+ checkNotNull(value, "value must not be null");
+
+ final ByteSequenceReader reader = value.asReader();
+ if ( reader.get() == UNDEFINED_SET) {
+ return newUndefinedSetWithSize(key, reader.getLong());
+ } else {
+ reader.rewind();
+ return newDefinedSet(decodeRaw(reader, (int) reader.getCompactUnsigned()));
+ }
+ }
+
+ private ByteStringBuilder append(ByteStringBuilder builder, EntryIDSet idSet)
+ {
+ checkNotNull(idSet, "idSet must not be null");
+ checkNotNull(builder, "builder must not be null");
+
+ if (idSet.isDefined())
+ {
+ builder.appendCompactUnsigned(idSet.size());
+ long basis = 0;
+ for (long value : idSet.getIDs())
+ {
+ builder.appendCompactUnsigned(value - basis);
+ basis = value;
+ }
+ }
+ else
+ {
+ builder.append(UNDEFINED_SET);
+ builder.append(idSet.size());
+ }
+ return builder;
+ }
+
+ private int getEstimatedSize(EntryIDSet idSet)
+ {
+ checkNotNull(idSet, "idSet must not be null");
+ return idSet.getIDs().length * LONG_SIZE + INT_SIZE;
+ }
+
+ private long[] decodeRaw(ByteSequenceReader reader, int nbEntriesToDecode)
+ {
+ checkNotNull(reader, "reader must not be null");
+ Reject.ifFalse(nbEntriesToDecode >= 0, "nbEntriesToDecode must be >= 0");
+
+ if ( nbEntriesToDecode == 0 ) {
+ return EMPTY_LONG_ARRAY;
+ } else {
+ final long ids[] = new long[nbEntriesToDecode];
+ ids[0] = reader.getCompactUnsigned();
+ for(int i = 1 ; i < nbEntriesToDecode ; i++) {
+ ids[i] = ids[i-1] + reader.getCompactUnsigned();
+ }
+ return ids;
+ }
+ }
+ }
+
static EntryIDSet newUndefinedSet()
{
return new EntryIDSet(new UndefinedImpl(NO_KEY, Long.MAX_VALUE));
@@ -495,38 +658,6 @@
return new EntryIDSet(new DefinedImpl(ids));
}
- /**
- * Creates a new entry ID set from the raw database value.
- *
- * @param key
- * The database key that contains this value.
- * @param value
- * The database value, or null if there are no entry IDs.
- * @throws NullPointerException
- * if either key or value is null
- */
- static EntryIDSet newSetFromBytes(ByteSequence key, ByteString value)
- {
- checkNotNull(key, "key must not be null");
- checkNotNull(value, "value must not be null");
-
- if (value.isEmpty())
- {
- // Entry limit has exceeded and there is no encoded undefined set size.
- return newUndefinedSetWithKey(key);
- }
- else if ((value.byteAt(0) & 0x80) == 0x80)
- {
- // Entry limit has exceeded and there is an encoded undefined set size.
- return newUndefinedSetWithSize(key, decodeUndefinedSize(value));
- }
- else
- {
- // Seems like entry limit has not been exceeded and the bytes is a list of entry IDs.
- return newDefinedSet(decodeEntryIDSet(value));
- }
- }
-
private static long[] intersection(long[] set1, long[] set2)
{
long[] target = new long[Math.min(set1.length, set2.length)];
@@ -567,10 +698,6 @@
{
checkNotNull(sets, "sets must not be null");
- // FIXME: Benchmarks shown that its possible to have a 5x performance gain if we sort the non overlapping sets. To
- // do that, we can use compareForOverlap(). In case sets are unordered and non overlapping, this optimization allow
- // to skip the final sort() applied on the resulting set.
-
int count = 0;
boolean containsUndefinedSet = false;
@@ -629,39 +756,6 @@
}
}
- /**
- * Decodes and returns the entryID list out of the provided byte sequence.
- *
- * @param bytes
- * the encoded entryID list
- * @return a long array representing the entryID list
- */
- static long[] decodeEntryIDSet(ByteSequence bytes)
- {
- final ByteSequenceReader reader = bytes.asReader();
- final int count = bytes.length() / 8;
- final long[] entryIDSet = new long[count];
- for (int i = 0; i < count; i++)
- {
- entryIDSet[i] = reader.getLong();
- }
- return entryIDSet;
- }
-
- /**
- * Decodes and returns the undefined size out of the provided byte string.
- *
- * @param bytes
- * the encoded undefined size
- * @return the undefined size
- */
- static long decodeUndefinedSize(ByteString bytes)
- {
- return bytes.length() == 8
- ? bytes.toLong() & Long.MAX_VALUE
- : Long.MAX_VALUE; // remove top bit
- }
-
private EntryIDSetImplementor concreteImpl;
private EntryIDSet(EntryIDSetImplementor concreteImpl)
@@ -711,16 +805,6 @@
}
/**
- * Get a database representation of this object.
- *
- * @return A database representation of this object as a byte array.
- */
- public ByteString toByteString()
- {
- return concreteImpl.toByteString();
- }
-
- /**
* Insert an ID into this set.
*
* @param entryID
--
Gitblit v1.10.0