From 7a34cefa2a5bbdf339f1a50b856e3d7441006b8d Mon Sep 17 00:00:00 2001
From: Matthew Swift <matthew.swift@forgerock.com>
Date: Wed, 08 Jun 2011 14:33:10 +0000
Subject: [PATCH] Fix OPENDJ-184: Transient errors when accessing cn=changelog DraftCN DB result in complete shutdown of the replication service
---
opends/src/server/org/opends/server/replication/server/DbHandler.java | 120 ++++++++++++++++-------------------------------------------
1 files changed, 33 insertions(+), 87 deletions(-)
diff --git a/opends/src/server/org/opends/server/replication/server/DbHandler.java b/opends/src/server/org/opends/server/replication/server/DbHandler.java
index cfb83e4..b147246 100644
--- a/opends/src/server/org/opends/server/replication/server/DbHandler.java
+++ b/opends/src/server/org/opends/server/replication/server/DbHandler.java
@@ -51,7 +51,6 @@
import org.opends.server.replication.server.ReplicationDB.ReplServerDBCursor;
import com.sleepycat.je.DatabaseException;
-import com.sleepycat.je.LockConflictException;
/**
* This class is used for managing the replicationServer database for each
@@ -111,9 +110,6 @@
private final Object flushLock = new Object();
private ReplicationServer replicationServer;
- // The maximum number of retries in case of DatabaseDeadlock Exception.
- private static final int DEADLOCK_RETRIES = 10;
-
private long latestTrimDate = 0;
/**
@@ -122,7 +118,7 @@
* are older than this age are removed.
*
*/
- private long trimage;
+ private long trimAge;
/**
* Creates a new dbHandler associated to a given LDAP server.
@@ -143,7 +139,7 @@
this.replicationServer = replicationServer;
serverId = id;
this.baseDn = baseDn;
- trimage = replicationServer.getTrimage();
+ trimAge = replicationServer.getTrimAge();
queueMaxSize = queueSize;
queueLowmark = queueSize * 1 / 5;
queueHimark = queueSize * 4 / 5;
@@ -291,43 +287,6 @@
}
/**
- * Return the number of changes between 2 provided change numbers.
- * @param from The lower (older) change number.
- * @param to The upper (newer) change number.
- * @return The computed number of changes.
- */
- public int traverseAndCount(ChangeNumber from, ChangeNumber to)
- {
- int count = 0;
- flush();
- ReplServerDBCursor cursor = null;
- try
- {
- try
- {
- cursor = db.openReadCursor(from);
- }
- catch(Exception e)
- {
- return 0;
- }
- ChangeNumber curr = null;
- while ((curr = cursor.nextChangeNumber())!=null)
- {
- if (curr.newerOrEquals(to))
- break;
- count++;
- }
- }
- finally
- {
- if (cursor != null)
- cursor.abort();
- }
- return count;
- }
-
- /**
* Removes the provided number of messages from the beginning of the msgQueue.
*
* @param number the number of changes to be removed.
@@ -456,80 +415,67 @@
*/
private void trim() throws DatabaseException, Exception
{
- if (trimage == 0)
+ if (trimAge == 0)
+ {
return;
- int size = 0;
- boolean finished = false;
- boolean done = false;
+ }
- latestTrimDate = TimeThread.getTime() - trimage;
+ latestTrimDate = TimeThread.getTime() - trimAge;
ChangeNumber trimDate = new ChangeNumber(latestTrimDate, 0, 0);
// Find the last changeNumber before the trimDate, in the Database.
- ChangeNumber lastBeforeTrimDate = db.getPreviousChangeNumber(trimDate);
+ ChangeNumber lastBeforeTrimDate = db
+ .getPreviousChangeNumber(trimDate);
if (lastBeforeTrimDate != null)
{
// If we found it, we want to stop trimming when reaching it.
trimDate = lastBeforeTrimDate;
}
- // In case of deadlock detection by the Database, this thread can
- // by aborted by a DeadlockException. This is a transient error and
- // the transaction should be attempted again.
- // We will try DEADLOCK_RETRIES times before failing.
- int tries = 0;
- while ((tries++ < DEADLOCK_RETRIES) && (!done))
+
+ for (int i = 0; i < 100; i++)
{
synchronized (flushLock)
{
- /* the trim is done by group in order to save some CPU and IO bandwidth
+ /*
+ * the trim is done by group in order to save some CPU and IO bandwidth
* start the transaction then do a bunch of remove then commit
*/
- ReplServerDBCursor cursor = db.openDeleteCursor();
-
+ final ReplServerDBCursor cursor = db.openDeleteCursor();
try
{
- while ((size < 5000 ) && (!finished))
+ for (int j = 0; j < 50; j++)
{
ChangeNumber changeNumber = cursor.nextChangeNumber();
- if (changeNumber != null)
+ if (changeNumber == null)
{
- if ((!changeNumber.equals(lastChange))
- && (changeNumber.older(trimDate)))
- {
- size++;
- cursor.delete();
- }
- else
- {
- firstChange = changeNumber;
- finished = true;
- }
+ cursor.close();
+ done = true;
+ return;
+ }
+
+ if ((!changeNumber.equals(lastChange))
+ && (changeNumber.older(trimDate)))
+ {
+ cursor.delete();
}
else
- finished = true;
+ {
+ firstChange = changeNumber;
+ cursor.close();
+ done = true;
+ return;
+ }
}
cursor.close();
- done = true;
- }
- catch (LockConflictException e)
- {
- cursor.abort();
- if (tries == DEADLOCK_RETRIES)
- {
- // could not handle the Deadlock after DEADLOCK_RETRIES tries.
- // shutdown the ReplicationServer.
- shutdown = true;
- throw (e);
- }
}
catch (Exception e)
{
// mark shutdown for this db so that we don't try again to
// stop it from cursor.close() or methods called by cursor.close()
- shutdown = true;
cursor.abort();
- throw (e);
+ shutdown = true;
+ throw e;
}
}
}
@@ -644,7 +590,7 @@
*/
public void setPurgeDelay(long delay)
{
- trimage = delay;
+ trimAge = delay;
}
/**
--
Gitblit v1.10.0