From 540ea316e77eb38f09a74b07365964c2a1161d8e Mon Sep 17 00:00:00 2001
From: Yannick Lecaillez <yannick.lecaillez@forgerock.com>
Date: Tue, 31 Mar 2015 16:02:26 +0000
Subject: [PATCH] OPENDJ-1199: Reduce memory/disk usage of JE backend (variable length encoding for EntryIDSet)

---
 opendj-server-legacy/src/main/java/org/opends/server/backends/pluggable/EntryIDSet.java |  284 ++++++++++++++++++++++++++++++++++++--------------------
 1 files changed, 184 insertions(+), 100 deletions(-)

diff --git a/opendj-server-legacy/src/main/java/org/opends/server/backends/pluggable/EntryIDSet.java b/opendj-server-legacy/src/main/java/org/opends/server/backends/pluggable/EntryIDSet.java
index bcd968d..a65b061 100644
--- a/opendj-server-legacy/src/main/java/org/opends/server/backends/pluggable/EntryIDSet.java
+++ b/opendj-server-legacy/src/main/java/org/opends/server/backends/pluggable/EntryIDSet.java
@@ -48,6 +48,9 @@
 @SuppressWarnings("javadoc")
 final class EntryIDSet implements Iterable<EntryID>
 {
+  public static final EntryIDSetCodec CODEC_V1 = new EntryIDSetCodecV1();
+  public static final EntryIDSetCodec CODEC_V2 = new EntryIDSetCodecV2();
+
   private static final ByteSequence NO_KEY = ByteString.valueOf("<none>");
   private static final long[] EMPTY_LONG_ARRAY = new long[0];
   private static final long[] NO_ENTRY_IDS_RANGE = new long[] { 0, 0 };
@@ -61,8 +64,6 @@
 
     boolean isDefined();
 
-    ByteString toByteString();
-
     long[] getRange();
 
     long[] getIDs();
@@ -84,6 +85,20 @@
   }
 
   /**
+   * Define serialization contract for EntryIDSet
+   */
+  interface EntryIDSetCodec {
+
+    static final int INT_SIZE = 4;
+
+    static final int LONG_SIZE = 8;
+
+    ByteString encode(EntryIDSet idSet);
+
+    EntryIDSet decode(ByteSequence key, ByteString value);
+  }
+
+  /**
    * Concrete implements representing a set of EntryIDs, sorted in ascending order.
    */
   private static final class DefinedImpl implements EntryIDSetImplementor
@@ -117,17 +132,6 @@
     }
 
     @Override
-    public ByteString toByteString()
-    {
-      final ByteStringBuilder builder = new ByteStringBuilder(8 * entryIDs.length);
-      for (long value : entryIDs)
-      {
-        builder.append(value);
-      }
-      return builder.toByteString();
-    }
-
-    @Override
     public boolean add(EntryID entryID)
     {
       long id = entryID.longValue();
@@ -198,7 +202,7 @@
 
       if (entryIDs.length == 0)
       {
-        entryIDs = Arrays.copyOf(anotherEntryIDSet.getIDs(), anotherEntryIDSet.getIDs().length);
+        entryIDs = anotherEntryIDSet.getIDs();
         return;
       }
 
@@ -347,13 +351,6 @@
     }
 
     @Override
-    public ByteString toByteString()
-    {
-      // Set top bit.
-      return ByteString.valueOf(undefinedSize | Long.MIN_VALUE);
-    }
-
-    @Override
     public boolean add(EntryID entryID)
     {
       if (maintainUndefinedSize())
@@ -466,6 +463,172 @@
     }
   }
 
+  /**
+   * Legacy EntryIDSet codec implementation
+   */
+  private static final class EntryIDSetCodecV1 implements EntryIDSetCodec
+  {
+    @Override
+    public ByteString encode(EntryIDSet idSet)
+    {
+      return ByteString.wrap(append(new ByteStringBuilder(getEstimatedSize(idSet)), idSet).trimToSize()
+          .getBackingArray());
+    }
+
+    @Override
+    public EntryIDSet decode(ByteSequence key, ByteString value)
+    {
+      checkNotNull(key, "key must not be null");
+      checkNotNull(value, "value must not be null");
+
+      if (value.isEmpty())
+      {
+        // Entry limit has exceeded and there is no encoded undefined set size.
+        return newDefinedSet();
+      }
+      else if ((value.byteAt(0) & 0x80) == 0x80)
+      {
+        // Entry limit has exceeded and there is an encoded undefined set size.
+        return newUndefinedSetWithSize(key, decodeUndefinedSize(value));
+      }
+      else
+      {
+        // Seems like entry limit has not been exceeded and the bytes is a list of entry IDs.
+        return newDefinedSet(decodeRaw(value.asReader(), value.length() / LONG_SIZE));
+      }
+    }
+
+    private int getEstimatedSize(EntryIDSet idSet)
+    {
+      if (idSet.isDefined())
+      {
+        return idSet.getIDs().length * LONG_SIZE;
+      }
+      else
+      {
+        return LONG_SIZE;
+      }
+    }
+
+    private long[] decodeRaw(ByteSequenceReader reader, int nbEntriesToDecode)
+    {
+      checkNotNull(reader, "builder must not be null");
+      Reject.ifFalse(nbEntriesToDecode >= 0, "nbEntriesToDecode must be >= 0");
+
+      final long ids[] = new long[nbEntriesToDecode];
+      for(int i = 0 ; i < nbEntriesToDecode ; i++) {
+        ids[i] = reader.getLong();
+      }
+      return ids;
+    }
+
+    private ByteStringBuilder append(ByteStringBuilder builder, EntryIDSet idSet)
+    {
+      checkNotNull(idSet, "idSet must not be null");
+      checkNotNull(builder, "builder must not be null");
+
+      if (idSet.isDefined())
+      {
+        for (long value : idSet.getIDs())
+        {
+          builder.append(value);
+        }
+        return builder;
+      }
+      else
+      {
+        // Set top bit.
+        return builder.append(idSet.size() | Long.MIN_VALUE);
+      }
+    }
+
+    private static long decodeUndefinedSize(ByteSequence bytes)
+    {
+      // remove top bit
+      return bytes.length() == LONG_SIZE ? bytes.asReader().getLong() & Long.MAX_VALUE : Long.MAX_VALUE;
+    }
+  }
+
+  /**
+   * Compacted EntryIDSet codec implementation. Idea is to take advantages of
+   * org.forgerock.opendj.ldap.ByteStringBuilder#appendCompact() able to write small values of long in fewer bytes.
+   * Rather than storing the full list of IDs, we store only the difference of the Nth ID with the N-1th one in the hope
+   * that the result will be small enough to be compacted by appendCompact().
+   */
+  private static final class EntryIDSetCodecV2 implements EntryIDSetCodec
+  {
+    private static final byte UNDEFINED_SET = (byte) 0xFF;
+
+    @Override
+    public ByteString encode(EntryIDSet idSet)
+    {
+      checkNotNull(idSet, "idSet must not be null");
+      ByteStringBuilder builder = new ByteStringBuilder(getEstimatedSize(idSet));
+      return append(builder, idSet).toByteString();
+    }
+
+    @Override
+    public EntryIDSet decode(ByteSequence key, ByteString value)
+    {
+      checkNotNull(key, "key must not be null");
+      checkNotNull(value, "value must not be null");
+
+      final ByteSequenceReader reader = value.asReader();
+      if ( reader.get() == UNDEFINED_SET) {
+          return newUndefinedSetWithSize(key, reader.getLong());
+      } else {
+          reader.rewind();
+          return newDefinedSet(decodeRaw(reader, (int) reader.getCompactUnsigned()));
+      }
+    }
+
+    private ByteStringBuilder append(ByteStringBuilder builder, EntryIDSet idSet)
+    {
+      checkNotNull(idSet, "idSet must not be null");
+      checkNotNull(builder, "builder must not be null");
+
+      if (idSet.isDefined())
+      {
+        builder.appendCompactUnsigned(idSet.size());
+        long basis = 0;
+        for (long value : idSet.getIDs())
+        {
+          builder.appendCompactUnsigned(value - basis);
+          basis = value;
+        }
+      }
+      else
+      {
+        builder.append(UNDEFINED_SET);
+        builder.append(idSet.size());
+      }
+      return builder;
+    }
+
+    private int getEstimatedSize(EntryIDSet idSet)
+    {
+      checkNotNull(idSet, "idSet must not be null");
+      return idSet.getIDs().length * LONG_SIZE + INT_SIZE;
+    }
+
+    private long[] decodeRaw(ByteSequenceReader reader, int nbEntriesToDecode)
+    {
+      checkNotNull(reader, "reader must not be null");
+      Reject.ifFalse(nbEntriesToDecode >= 0, "nbEntriesToDecode must be >= 0");
+
+      if ( nbEntriesToDecode == 0 ) {
+        return EMPTY_LONG_ARRAY;
+      } else {
+        final long ids[] = new long[nbEntriesToDecode];
+        ids[0] = reader.getCompactUnsigned();
+        for(int i = 1 ; i < nbEntriesToDecode ; i++) {
+          ids[i] = ids[i-1] + reader.getCompactUnsigned();
+        }
+        return ids;
+      }
+    }
+  }
+
   static EntryIDSet newUndefinedSet()
   {
     return new EntryIDSet(new UndefinedImpl(NO_KEY, Long.MAX_VALUE));
@@ -495,38 +658,6 @@
     return new EntryIDSet(new DefinedImpl(ids));
   }
 
-  /**
-   * Creates a new entry ID set from the raw database value.
-   *
-   * @param key
-   *          The database key that contains this value.
-   * @param value
-   *          The database value, or null if there are no entry IDs.
-   * @throws NullPointerException
-   *           if either key or value is null
-   */
-  static EntryIDSet newSetFromBytes(ByteSequence key, ByteString value)
-  {
-    checkNotNull(key, "key must not be null");
-    checkNotNull(value, "value must not be null");
-
-    if (value.isEmpty())
-    {
-      // Entry limit has exceeded and there is no encoded undefined set size.
-      return newUndefinedSetWithKey(key);
-    }
-    else if ((value.byteAt(0) & 0x80) == 0x80)
-    {
-      // Entry limit has exceeded and there is an encoded undefined set size.
-      return newUndefinedSetWithSize(key, decodeUndefinedSize(value));
-    }
-    else
-    {
-      // Seems like entry limit has not been exceeded and the bytes is a list of entry IDs.
-      return newDefinedSet(decodeEntryIDSet(value));
-    }
-  }
-
   private static long[] intersection(long[] set1, long[] set2)
   {
     long[] target = new long[Math.min(set1.length, set2.length)];
@@ -567,10 +698,6 @@
   {
     checkNotNull(sets, "sets must not be null");
 
-    // FIXME: Benchmarks shown that its possible to have a 5x performance gain if we sort the non overlapping sets. To
-    // do that, we can use compareForOverlap(). In case sets are unordered and non overlapping, this optimization allow
-    // to skip the final sort() applied on the resulting set.
-
     int count = 0;
 
     boolean containsUndefinedSet = false;
@@ -629,39 +756,6 @@
     }
   }
 
-  /**
-   * Decodes and returns the entryID list out of the provided byte sequence.
-   *
-   * @param bytes
-   *          the encoded entryID list
-   * @return a long array representing the entryID list
-   */
-  static long[] decodeEntryIDSet(ByteSequence bytes)
-  {
-    final ByteSequenceReader reader = bytes.asReader();
-    final int count = bytes.length() / 8;
-    final long[] entryIDSet = new long[count];
-    for (int i = 0; i < count; i++)
-    {
-      entryIDSet[i] = reader.getLong();
-    }
-    return entryIDSet;
-  }
-
-  /**
-   * Decodes and returns the undefined size out of the provided byte string.
-   *
-   * @param bytes
-   *          the encoded undefined size
-   * @return the undefined size
-   */
-  static long decodeUndefinedSize(ByteString bytes)
-  {
-    return bytes.length() == 8
-        ? bytes.toLong() & Long.MAX_VALUE
-        : Long.MAX_VALUE; // remove top bit
-  }
-
   private EntryIDSetImplementor concreteImpl;
 
   private EntryIDSet(EntryIDSetImplementor concreteImpl)
@@ -711,16 +805,6 @@
   }
 
   /**
-   * Get a database representation of this object.
-   *
-   * @return A database representation of this object as a byte array.
-   */
-  public ByteString toByteString()
-  {
-    return concreteImpl.toByteString();
-  }
-
-  /**
    * Insert an ID into this set.
    *
    * @param entryID

--
Gitblit v1.10.0