From 50b5f3c4c4d80598ea5761b987b183e3202cc47f Mon Sep 17 00:00:00 2001
From: dugan <dugan@localhost>
Date: Mon, 07 Apr 2008 13:51:03 +0000
Subject: [PATCH] Import performance commits:
---
opendj-sdk/opends/src/server/org/opends/server/backends/jeb/importLDIF/IntegerImportIDSet.java | 120 ++++++++++++++++++-----
opendj-sdk/opends/src/server/org/opends/server/backends/jeb/importLDIF/LongImportIDSet.java | 33 +++++-
opendj-sdk/opends/src/server/org/opends/server/backends/jeb/importLDIF/WorkThread.java | 3
opendj-sdk/opends/src/server/org/opends/server/backends/jeb/importLDIF/ImportIDSet.java | 15 ++
opendj-sdk/opends/src/server/org/opends/server/backends/jeb/Index.java | 4
opendj-sdk/opends/src/server/org/opends/server/backends/jeb/importLDIF/BufferManager.java | 106 +++++++++++++++++---
6 files changed, 225 insertions(+), 56 deletions(-)
diff --git a/opendj-sdk/opends/src/server/org/opends/server/backends/jeb/Index.java b/opendj-sdk/opends/src/server/org/opends/server/backends/jeb/Index.java
index 748faeb..829bfec 100644
--- a/opendj-sdk/opends/src/server/org/opends/server/backends/jeb/Index.java
+++ b/opendj-sdk/opends/src/server/org/opends/server/backends/jeb/Index.java
@@ -280,8 +280,8 @@
status = read(txn, key, data, LockMode.RMW);
if(status == OperationStatus.SUCCESS) {
ImportIDSet newImportIDSet = new IntegerImportIDSet();
- if (newImportIDSet.merge(data.getData(), importIdSet, indexEntryLimit))
- {
+ if (newImportIDSet.merge(data.getData(), importIdSet,
+ indexEntryLimit, maintainCount)) {
entryLimitExceededCount++;
}
data.setData(newImportIDSet.toDatabase());
diff --git a/opendj-sdk/opends/src/server/org/opends/server/backends/jeb/importLDIF/BufferManager.java b/opendj-sdk/opends/src/server/org/opends/server/backends/jeb/importLDIF/BufferManager.java
index 3af10c5..439ef44 100644
--- a/opendj-sdk/opends/src/server/org/opends/server/backends/jeb/importLDIF/BufferManager.java
+++ b/opendj-sdk/opends/src/server/org/opends/server/backends/jeb/importLDIF/BufferManager.java
@@ -73,7 +73,7 @@
//Overhead values determined from using JHAT. They appear to be the same
//for both 32 and 64 bit machines. Close enough.
private final static int TREEMAP_ENTRY_OVERHEAD = 29;
- private final static int KEY_ELEMENT_OVERHEAD = 28;
+ private final static int KEY_ELEMENT_OVERHEAD = 32;
/**
@@ -98,13 +98,53 @@
* @throws DatabaseException If a problem happened during a flushAll cycle.
*/
void insert(Index index, Entry entry,
- EntryID entryID, Transaction txn)
- throws DatabaseException {
- int entryLimit = index.getIndexEntryLimit();
- Set<byte[]> keySet = new HashSet<byte[]>();
- index.indexer.indexEntry(txn, entry, keySet);
+ EntryID entryID, Transaction txn)
+ throws DatabaseException {
+
+ Set<byte[]> keySet = new HashSet<byte[]>();
+ index.indexer.indexEntry(txn, entry, keySet);
synchronized(elementMap) {
- for(byte[] key : keySet) {
+ insertKeySet(keySet, index, entryID);
+ //If over the memory limit and import hasn't completed
+ //flush some keys from the cache to make room.
+ if(memoryUsage > memoryLimit) {
+ flushUntilUnderLimit();
+ }
+ }
+ }
+
+ /**
+ * Special case for id2children and id2subtree.
+ * Insert an entry ID into the buffer using the both the specified index and
+ * entry to build a key set.
+ * @param id2children The id2children index.
+ * @param id2subtree The id2subtree index.
+ * @param entry The entry to used to build the keyset.
+ * @param entryID The entry ID to insert into the key set.
+ * @param txn A transaction.
+ * @throws DatabaseException If a problem happens formating the keyset.
+ */
+ void insert(Index id2children, Index id2subtree, Entry entry,
+ EntryID entryID, Transaction txn) throws DatabaseException {
+ Set<byte[]> childKeySet = new HashSet<byte[]>();
+ id2children.indexer.indexEntry(txn, entry, childKeySet);
+ Set<byte[]> subKeySet = new HashSet<byte[]>();
+ id2subtree.indexer.indexEntry(txn, entry, subKeySet);
+ synchronized(elementMap) {
+ insertKeySet(childKeySet, id2children, entryID);
+ insertKeySet(subKeySet, id2subtree, entryID);
+ }
+ }
+
+ /**
+ * Insert a keySet into the element map using the provided index and entry ID.
+ * @param keySet The key set to add to the map.
+ * @param index The index that eventually will contain the entry IDs.
+ * @param entryID The entry ID to add to the entry ID set.
+ */
+ private void insertKeySet(Set<byte[]> keySet, Index index, EntryID entryID) {
+ int entryLimit = index.getIndexEntryLimit();
+ for(byte[] key : keySet) {
KeyHashElement elem = new KeyHashElement(key, index, entryID);
total++;
if(!elementMap.containsKey(elem)) {
@@ -112,7 +152,7 @@
memoryUsage += TREEMAP_ENTRY_OVERHEAD + elem.getMemorySize();
} else {
KeyHashElement curElem = elementMap.get(elem);
- if(curElem.isDefined()) {
+ if(curElem.isDefined() || index.getMaintainCount()) {
int oldSize = curElem.getMemorySize();
curElem.addEntryID(entryID, entryLimit);
int newSize = curElem.getMemorySize();
@@ -121,13 +161,7 @@
hit++;
}
}
- }
- //If over the memory limit and import hasn't completed
- //flush some keys from the cache to make room.
- if(memoryUsage > memoryLimit) {
- flushUntilUnderLimit();
- }
- }
+ }
}
/**
@@ -215,6 +249,9 @@
//The set of IDs related to the key.
private ImportIDSet importIDSet;
+ //Used to speed up lookup.
+ private int keyHashCode;
+
/**
* Create instance of an element for the specified key and index, the add
* the specified entry ID to the ID set.
@@ -232,6 +269,7 @@
//Used if there when there are conflicts if two or more indexes have
//the same key.
this.indexHashCode = System.identityHashCode(index);
+ this.keyHashCode = Arrays.hashCode(key);
}
/**
@@ -241,7 +279,7 @@
* @param entryLimit The entry limit
*/
void addEntryID(EntryID entryID, int entryLimit) {
- importIDSet.addEntryID(entryID, entryLimit);
+ importIDSet.addEntryID(entryID, entryLimit, index.getMaintainCount());
}
/**
@@ -263,6 +301,15 @@
}
/**
+ * Return value of the key hash code.
+ *
+ * @return The key hash code value.
+ */
+ int getKeyHashCode() {
+ return keyHashCode;
+ }
+
+ /**
* Return the ID set.
* @return The import ID set.
*/
@@ -280,7 +327,8 @@
}
/**
- * Compare the bytes of two keys.
+ * Compare the bytes of two keys. The is slow, only use if the hashcode
+ * had a collision.
*
* @param a Key a.
* @param b Key b.
@@ -309,6 +357,27 @@
}
/**
+ * Compare two element keys. First check the precomputed hashCode. If
+ * the hashCodes are equal, do a second byte per byte comparision in case
+ * there was a collision.
+ *
+ * @param elem The element to compare.
+ * @return 0 if the keys are equal, -1 if key a is less than key b, 1 if
+ * key a is greater than key b.
+ */
+ private int compare(KeyHashElement elem) {
+ if(keyHashCode == elem.getKeyHashCode()) {
+ return compare(key, elem.key);
+ } else {
+ if(keyHashCode < elem.getKeyHashCode()) {
+ return -1;
+ } else {
+ return 1;
+ }
+ }
+ }
+
+ /**
* Compare the specified object to the current object. If the keys are
* equal, then the indexHashCode value is used as a tie-breaker.
*
@@ -321,7 +390,8 @@
throw new NullPointerException();
}
KeyHashElement inElem = (KeyHashElement) o;
- int keyCompare = compare(key, inElem.key);
+ // int keyCompare = compare(key, inElem.key);
+ int keyCompare = compare(inElem);
if(keyCompare == 0) {
if(indexHashCode == inElem.indexHashCode) {
return 0;
diff --git a/opendj-sdk/opends/src/server/org/opends/server/backends/jeb/importLDIF/ImportIDSet.java b/opendj-sdk/opends/src/server/org/opends/server/backends/jeb/importLDIF/ImportIDSet.java
index ebdaddc..12376ca 100644
--- a/opendj-sdk/opends/src/server/org/opends/server/backends/jeb/importLDIF/ImportIDSet.java
+++ b/opendj-sdk/opends/src/server/org/opends/server/backends/jeb/importLDIF/ImportIDSet.java
@@ -39,8 +39,10 @@
*
* @param entryID The entry ID to add.
* @param entryLimit The entry limit.
+ * @param maintainCount Maintain count of IDs if in undefined mode.
*/
- public void addEntryID(EntryID entryID, int entryLimit);
+ public void
+ addEntryID(EntryID entryID, int entryLimit, boolean maintainCount);
/**
* Return if a set is defined or not.
@@ -76,13 +78,22 @@
* @param dbBytes The byte array read from DB.
* @param bufImportIDSet The import ID set to merge.
* @param entryLimit The entry limit.
+ * @param maintainCount Maintain count of iDs if in undefined mode.
* @return <CODE>True</CODE> if the merged set is undefined.
*/
public boolean merge(byte[] dbBytes, ImportIDSet bufImportIDSet,
- int entryLimit);
+ int entryLimit, boolean maintainCount);
/**
* Set the import ID set to the undefined state.
*/
public void setUndefined();
+
+
+ /**
+ * Return the undefined size.
+ *
+ * @return The undefined count.
+ */
+ public long getUndefinedSize();
}
diff --git a/opendj-sdk/opends/src/server/org/opends/server/backends/jeb/importLDIF/IntegerImportIDSet.java b/opendj-sdk/opends/src/server/org/opends/server/backends/jeb/importLDIF/IntegerImportIDSet.java
index 37b7db9..8ddc315 100644
--- a/opendj-sdk/opends/src/server/org/opends/server/backends/jeb/importLDIF/IntegerImportIDSet.java
+++ b/opendj-sdk/opends/src/server/org/opends/server/backends/jeb/importLDIF/IntegerImportIDSet.java
@@ -36,7 +36,7 @@
public class IntegerImportIDSet implements ImportIDSet {
//Gleamed from JHAT. The same for 32/64 bit.
- private final static int THIS_OVERHEAD = 17;
+ private final static int THIS_OVERHEAD = 25;
/**
* The internal array where elements are stored.
@@ -53,6 +53,10 @@
//Boolean to keep track if the instance is defined or not.
private boolean isDefined=true;
+
+ //Size of the undefines.
+ private long undefinedSize = 0;
+
/**
* Create an empty import set.
*/
@@ -78,6 +82,12 @@
return isDefined;
}
+ /**
+ * {@inheritDoc}
+ */
+ public long getUndefinedSize() {
+ return undefinedSize;
+ }
/**
* {@inheritDoc}
@@ -103,22 +113,62 @@
/**
* {@inheritDoc}
*/
- public boolean merge(byte[] dBbytes, ImportIDSet importIdSet, int limit) {
+ public void addEntryID(EntryID entryID, int limit, boolean maintainCount) {
+ if(!isDefined()) {
+ if(maintainCount) {
+ undefinedSize++;
+ }
+ return;
+ }
+ if(isDefined() && ((count + 1) > limit)) {
+ isDefined = false;
+ array = null;
+ if(maintainCount) {
+ undefinedSize = count + 1;
+ } else {
+ undefinedSize = Long.MAX_VALUE;
+ }
+ count = 0;
+ } else {
+ add((int)entryID.longValue());
+ }
+ }
+
+ /**
+ * More complicated version of merge below that keeps track of the undefined
+ * sizes when in undefined mode or moving to undefined mode.
+ *
+ * @param dBbytes The bytes read from jeb.
+ * @param importIdSet
+ * @param limit
+ * @return
+ */
+ private boolean
+ mergeCount(byte[] dBbytes, ImportIDSet importIdSet, int limit) {
boolean incrLimitCount=false;
boolean dbUndefined = ((dBbytes[0] & 0x80) == 0x80);
- if(dbUndefined) {
- isDefined=false;
+ if(dbUndefined && (!importIdSet.isDefined())) {
+ undefinedSize = JebFormat.entryIDUndefinedSizeFromDatabase(dBbytes) +
+ importIdSet.getUndefinedSize();
+ isDefined=false;
+ } else if(dbUndefined && (importIdSet.isDefined())) {
+ undefinedSize = JebFormat.entryIDUndefinedSizeFromDatabase(dBbytes) +
+ importIdSet.size();
+ importIdSet.setUndefined();
+ isDefined=false;
} else if(!importIdSet.isDefined()) {
- isDefined=false;
- incrLimitCount=true;
+ int dbSize = JebFormat.entryIDListFromDatabase(dBbytes).length;
+ undefinedSize= dbSize + importIdSet.getUndefinedSize();
+ isDefined=false;
+ incrLimitCount = true;
} else {
array = JebFormat.intArrayFromDatabaseBytes(dBbytes);
if(array.length + importIdSet.size() > limit) {
- isDefined=false;
- incrLimitCount=true;
- count = 0;
- importIdSet.setUndefined();
+ undefinedSize = array.length + importIdSet.size();
+ importIdSet.setUndefined();
+ isDefined=false;
+ incrLimitCount=true;
} else {
count = array.length;
addAll((IntegerImportIDSet) importIdSet);
@@ -128,6 +178,38 @@
}
/**
+ * {@inheritDoc}
+ */
+ public boolean merge(byte[] dBbytes, ImportIDSet importIdSet,
+ int limit, boolean maintainCount) {
+ boolean incrLimitCount=false;
+ if(maintainCount) {
+ incrLimitCount = mergeCount(dBbytes, importIdSet, limit);
+ } else {
+ boolean dbUndefined = ((dBbytes[0] & 0x80) == 0x80);
+ if(dbUndefined) {
+ isDefined=false;
+ importIdSet.setUndefined();
+ } else if(!importIdSet.isDefined()) {
+ isDefined=false;
+ incrLimitCount=true;
+ } else {
+ array = JebFormat.intArrayFromDatabaseBytes(dBbytes);
+ if(array.length + importIdSet.size() > limit) {
+ isDefined=false;
+ incrLimitCount=true;
+ count = 0;
+ importIdSet.setUndefined();
+ } else {
+ count = array.length;
+ addAll((IntegerImportIDSet) importIdSet);
+ }
+ }
+ }
+ return incrLimitCount;
+ }
+
+ /**
* Add all of the specified import ID set to the import set.
*
* @param that The import ID set to add.
@@ -222,22 +304,6 @@
/**
- * {@inheritDoc}
- */
- public void addEntryID(EntryID entryID, int limit) {
- if(!isDefined()) {
- return;
- }
- if(isDefined() && ((count + 1) > limit)) {
- isDefined = false;
- array = null;
- count = 0;
- } else {
- add((int)entryID.longValue());
- }
- }
-
- /**
* Add the specified integer to the import set.
*
* @param v The integer value to add.
@@ -330,7 +396,7 @@
if(isDefined) {
return encode(null);
} else {
- return JebFormat.entryIDUndefinedSizeToDatabase(Long.MAX_VALUE);
+ return JebFormat.entryIDUndefinedSizeToDatabase(undefinedSize);
}
}
diff --git a/opendj-sdk/opends/src/server/org/opends/server/backends/jeb/importLDIF/LongImportIDSet.java b/opendj-sdk/opends/src/server/org/opends/server/backends/jeb/importLDIF/LongImportIDSet.java
index d06bccb..cc4269b 100644
--- a/opendj-sdk/opends/src/server/org/opends/server/backends/jeb/importLDIF/LongImportIDSet.java
+++ b/opendj-sdk/opends/src/server/org/opends/server/backends/jeb/importLDIF/LongImportIDSet.java
@@ -58,6 +58,11 @@
//Boolean to keep track if the instance is defined or not.
boolean isDefined=true;
+
+ //Size of the undefines.
+ private long undefinedSize = 0;
+
+
static {
if(RuntimeInformation.is64Bit()) {
LONGS_OVERHEAD = LONGS_OVERHEAD_64;
@@ -99,6 +104,12 @@
isDefined = false;
}
+ /**
+ * {@inheritDoc}
+ */
+ public long getUndefinedSize() {
+ return undefinedSize;
+ }
/**
* {@inheritDoc}
@@ -115,7 +126,8 @@
/**
* {@inheritDoc}
*/
- public boolean merge(byte[] DBbytes, ImportIDSet importIdSet, int limit) {
+ public boolean merge(byte[] DBbytes, ImportIDSet importIdSet,
+ int limit, boolean maintainCount) {
boolean incrLimitCount=false;
boolean dbUndefined = ((DBbytes[0] & 0x80) == 0x80);
@@ -142,13 +154,21 @@
/**
* {@inheritDoc}
*/
- public void addEntryID(EntryID entryID, int limit) {
+ public void addEntryID(EntryID entryID, int limit, boolean maintainCount) {
if(!isDefined()) {
- return;
+ if(maintainCount) {
+ undefinedSize++;
+ }
+ return;
}
if(isDefined() && ((count + 1) > limit)) {
isDefined = false;
array = null;
+ if(maintainCount) {
+ undefinedSize = count + 1;
+ } else {
+ undefinedSize = Long.MAX_VALUE;
+ }
count = 0;
} else {
add(entryID.longValue());
@@ -160,8 +180,11 @@
* {@inheritDoc}
*/
public byte[] toDatabase() {
- if (isDefined()) return encode(null);
- else return JebFormat.entryIDUndefinedSizeToDatabase(Long.MAX_VALUE);
+ if (isDefined()) {
+ return encode(null);
+ } else {
+ return JebFormat.entryIDUndefinedSizeToDatabase(undefinedSize);
+ }
}
/**
diff --git a/opendj-sdk/opends/src/server/org/opends/server/backends/jeb/importLDIF/WorkThread.java b/opendj-sdk/opends/src/server/org/opends/server/backends/jeb/importLDIF/WorkThread.java
index 10f6fed..4d092a0 100644
--- a/opendj-sdk/opends/src/server/org/opends/server/backends/jeb/importLDIF/WorkThread.java
+++ b/opendj-sdk/opends/src/server/org/opends/server/backends/jeb/importLDIF/WorkThread.java
@@ -272,8 +272,7 @@
}
Index id2children = context.getEntryContainer().getID2Children();
Index id2subtree = context.getEntryContainer().getID2Subtree();
- insert(id2children, entry, entryID, txn);
- insert(id2subtree, entry, entryID, txn);
+ bufferMgr.insert(id2children, id2subtree, entry, entryID, txn);
}
/**
--
Gitblit v1.10.0