From 7a34cefa2a5bbdf339f1a50b856e3d7441006b8d Mon Sep 17 00:00:00 2001
From: Matthew Swift <matthew.swift@forgerock.com>
Date: Wed, 08 Jun 2011 14:33:10 +0000
Subject: [PATCH] Fix OPENDJ-184: Transient errors when accessing cn=changelog DraftCN DB result in complete shutdown of the replication service

---
 opends/src/server/org/opends/server/replication/server/ReplicationDbEnv.java |   50 ++++++++++++++++++++++++++++++--------------------
 1 files changed, 30 insertions(+), 20 deletions(-)

diff --git a/opends/src/server/org/opends/server/replication/server/ReplicationDbEnv.java b/opends/src/server/org/opends/server/replication/server/ReplicationDbEnv.java
index cb34f92..7c28c16 100644
--- a/opends/src/server/org/opends/server/replication/server/ReplicationDbEnv.java
+++ b/opends/src/server/org/opends/server/replication/server/ReplicationDbEnv.java
@@ -40,16 +40,8 @@
 import java.io.File;
 import java.io.UnsupportedEncodingException;
 
-import com.sleepycat.je.Cursor;
-import com.sleepycat.je.Database;
-import com.sleepycat.je.DatabaseConfig;
-import com.sleepycat.je.DatabaseEntry;
-import com.sleepycat.je.DatabaseException;
-import com.sleepycat.je.Environment;
-import com.sleepycat.je.EnvironmentConfig;
-import com.sleepycat.je.LockMode;
-import com.sleepycat.je.OperationStatus;
-import com.sleepycat.je.Transaction;
+import com.sleepycat.je.*;
+
 import java.util.concurrent.TimeUnit;
 
 /**
@@ -92,10 +84,24 @@
      */
     envConfig.setAllowCreate(true);
     envConfig.setTransactional(true);
-    envConfig.setConfigParam("je.cleaner.expunge", "true");
     envConfig.setConfigParam("je.cleaner.threads", "2");
     envConfig.setConfigParam("je.checkpointer.highPriority", "true");
 
+    // If the JVM is reasonably large then we can safely default to
+    // bigger read buffers. This will result in more scalable checkpointer
+    // and cleaner performance.
+    if (Runtime.getRuntime().maxMemory() > 256 * 1024 * 1024)
+    {
+      envConfig.setConfigParam("je.cleaner.lookAheadCacheSize", String
+          .valueOf(2 * 1024 * 1024));
+
+      envConfig.setConfigParam("je.log.iteratorReadSize", String
+          .valueOf(2 * 1024 * 1024));
+
+      envConfig.setConfigParam("je.log.faultReadSize", String
+          .valueOf(4 * 1024));
+    }
+
     // Tests have shown that since the parsing of the Replication log is always
     // done sequentially, it is not necessary to use a large DB cache.
     // Use 5M so that the replication can be used with 64M total for the JVM.
@@ -103,9 +109,14 @@
 
     // Since records are always added at the end of the Replication log and
     // deleted at the beginning of the Replication log, this should never
-    // cause any deadlock. It is therefore safe to increase the TXN timeout
-    // to 10 seconds.
-    envConfig.setTxnTimeout(10, TimeUnit.SECONDS);
+    // cause any deadlock.
+    envConfig.setTxnTimeout(0, TimeUnit.SECONDS);
+    envConfig.setLockTimeout(0, TimeUnit.SECONDS);
+
+    // Since replication provides durability, we can reduce the DB durability
+    // level so that we are immune to application / JVM crashes.
+    envConfig.setDurability(Durability.COMMIT_WRITE_NO_SYNC);
+
     dbEnvironment = new Environment(new File(path), envConfig);
 
     /*
@@ -120,7 +131,6 @@
 
     stateDb = dbEnvironment.openDatabase(null, "changelogstate", dbConfig);
     start();
-
   }
 
   /**
@@ -316,7 +326,7 @@
               TRACER.debugInfo("getOrAddDb() Created in the state Db record " +
                 " serverId/Domain=<"+stringId+">");
             stateDb.put(txn, key, data);
-            txn.commitWriteNoSync();
+            txn.commit(Durability.COMMIT_WRITE_NO_SYNC);
           } catch (DatabaseException dbe)
           {
             // Abort the txn and propagate the Exception to the caller
@@ -347,7 +357,7 @@
                   "Created in the state Db record Tag/Domain/GenId key=" +
                   stringId + " value=" + dataStringId);
             stateDb.put(txn, key, data);
-            txn.commitWriteNoSync();
+            txn.commit(Durability.COMMIT_WRITE_NO_SYNC);
           } catch (DatabaseException dbe)
           {
             // Abort the txn and propagate the Exception to the caller
@@ -432,7 +442,7 @@
           try
           {
             stateDb.delete(txn, key);
-            txn.commitWriteNoSync();
+            txn.commit(Durability.COMMIT_WRITE_NO_SYNC);
             if (debugEnabled())
               TRACER.debugInfo(
                 "In " + this.replicationServer.getMonitorInstanceName() +
@@ -495,7 +505,7 @@
           try {
             data.setData(byteId);
             stateDb.delete(txn, key);
-            txn.commitWriteNoSync();
+            txn.commit(Durability.COMMIT_WRITE_NO_SYNC);
             if (debugEnabled())
               TRACER.debugInfo(
                   " In " + this.replicationServer.getMonitorInstanceName() +
@@ -532,7 +542,7 @@
       {
         txn = dbEnvironment.beginTransaction(null, null);
         dbEnvironment.truncateDatabase(txn, databaseName, false);
-        txn.commitWriteNoSync();
+        txn.commit(Durability.COMMIT_WRITE_NO_SYNC);
         txn = null;
       }
       catch (DatabaseException e)

--
Gitblit v1.10.0