From 296090552c8bb9ad83cfa819fbf0bd9d0ba8fac2 Mon Sep 17 00:00:00 2001
From: Matthew Swift <matthew.swift@forgerock.com>
Date: Mon, 18 Apr 2011 15:55:45 +0000
Subject: [PATCH] Fix OpenDJ-117: IllegalMonitorStateException during server shutdown
---
opends/src/server/org/opends/server/replication/server/ReplicationServerDomain.java | 261 +++++++++++++++++++++++++++++-----------------------
1 files changed, 146 insertions(+), 115 deletions(-)
diff --git a/opends/src/server/org/opends/server/replication/server/ReplicationServerDomain.java b/opends/src/server/org/opends/server/replication/server/ReplicationServerDomain.java
index d147ea4..64ac53c 100644
--- a/opends/src/server/org/opends/server/replication/server/ReplicationServerDomain.java
+++ b/opends/src/server/org/opends/server/replication/server/ReplicationServerDomain.java
@@ -1064,22 +1064,25 @@
if (!handler.engageShutdown())
// Only do this once (prevent other thread to enter here again)
{
- try
+ if (!shutdown)
{
try
{
-
// Acquire lock on domain (see more details in comment of start()
// method of ServerHandler)
- if (!shutdown)
- {
- lock();
- }
- } catch (InterruptedException ex)
- {
- // Try doing job anyway...
+ lock();
}
+ catch (InterruptedException ex)
+ {
+ // We can't deal with this here, so re-interrupt thread so that it is
+ // caught during subsequent IO.
+ Thread.currentThread().interrupt();
+ return;
+ }
+ }
+ try
+ {
// Stop useless monitoring publisher if no more RS or DS in domain
if ( (directoryServers.size() + replicationServers.size() )== 1)
{
@@ -1179,15 +1182,20 @@
{
try
{
- try
- {
- // Acquire lock on domain (see more details in comment of start()
- // method of ServerHandler)
- lock();
- } catch (InterruptedException ex)
- {
- // Try doing job anyway...
- }
+ // Acquire lock on domain (see more details in comment of start() method
+ // of ServerHandler)
+ lock();
+ }
+ catch (InterruptedException ex)
+ {
+ // We can't deal with this here, so re-interrupt thread so that it is
+ // caught during subsequent IO.
+ Thread.currentThread().interrupt();
+ return;
+ }
+
+ try
+ {
if (otherHandlers.contains(handler))
{
unRegisterHandler(handler);
@@ -1779,28 +1787,35 @@
return returnMsg;
}
+
+
/**
* Creates a new monitor message including monitoring information for the
- * topology directly connected to this RS. This includes information for:
- * - local RS
- * - all direct DSs
- * - all direct RSs
- * @param sender The sender of this message.
- * @param destination The destination of this message.
- * @return The newly created and filled MonitorMsg. Null if a problem occurred
- * during message creation.
+ * topology directly connected to this RS. This includes information for: -
+ * local RS - all direct DSs - all direct RSs
+ *
+ * @param sender
+ * The sender of this message.
+ * @param destination
+ * The destination of this message.
+ * @return The newly created and filled MonitorMsg. Null if the current thread
+ * was interrupted while attempting to get the domain lock.
*/
public MonitorMsg createLocalTopologyMonitorMsg(int sender, int destination)
{
- MonitorMsg monitorMsg = null;
-
- try {
-
+ try
+ {
// Lock domain as we need to go through connected servers list
lock();
+ }
+ catch (InterruptedException e)
+ {
+ return null;
+ }
- monitorMsg = new MonitorMsg(sender, destination);
-
+ try
+ {
+ MonitorMsg monitorMsg = new MonitorMsg(sender, destination);
// Populate for each connected LDAP Server
// from the states stored in the serverHandler.
@@ -1808,35 +1823,27 @@
// - the older missing change
for (DataServerHandler lsh : this.directoryServers.values())
{
- monitorMsg.setServerState(
- lsh.getServerId(),
- lsh.getServerState(),
- lsh.getApproxFirstMissingDate(),
- true);
+ monitorMsg.setServerState(lsh.getServerId(),
+ lsh.getServerState(), lsh.getApproxFirstMissingDate(),
+ true);
}
// Same for the connected RS
for (ReplicationServerHandler rsh : this.replicationServers.values())
{
- monitorMsg.setServerState(
- rsh.getServerId(),
- rsh.getServerState(),
- rsh.getApproxFirstMissingDate(),
- false);
+ monitorMsg.setServerState(rsh.getServerId(),
+ rsh.getServerState(), rsh.getApproxFirstMissingDate(),
+ false);
}
// Populate the RS state in the msg from the DbState
monitorMsg.setReplServerDbState(this.getDbServerState());
- } catch(InterruptedException e)
- {
- // At lock, too bad...
- } finally
- {
- if (hasLock())
- release();
+ return monitorMsg;
}
-
- return monitorMsg;
+ finally
+ {
+ release();
+ }
}
/**
@@ -2126,18 +2133,23 @@
"In " + this +
" Receiving ResetGenerationIdMsg from " + senderHandler.getServerId()+
" for baseDn " + baseDn + ":\n" + genIdMsg);
+
try
{
- try
- {
- // Acquire lock on domain (see more details in comment of start() method
- // of ServerHandler)
- lock();
- } catch (InterruptedException ex)
- {
- // Try doing job anyway...
- }
+ // Acquire lock on domain (see more details in comment of start() method
+ // of ServerHandler)
+ lock();
+ }
+ catch (InterruptedException ex)
+ {
+ // We can't deal with this here, so re-interrupt thread so that it is
+ // caught during subsequent IO.
+ Thread.currentThread().interrupt();
+ return;
+ }
+ try
+ {
long newGenId = genIdMsg.getGenerationId();
if (newGenId != this.generationId)
@@ -2233,16 +2245,20 @@
try
{
- try
- {
- // Acquire lock on domain (see more details in comment of start() method
- // of ServerHandler)
- lock();
- } catch (InterruptedException ex)
- {
- // Try doing job anyway...
- }
+ // Acquire lock on domain (see more details in comment of start() method
+ // of ServerHandler)
+ lock();
+ }
+ catch (InterruptedException ex)
+ {
+ // We can't deal with this here, so re-interrupt thread so that it is
+ // caught during subsequent IO.
+ Thread.currentThread().interrupt();
+ return;
+ }
+ try
+ {
ServerStatus newStatus = senderHandler.processNewStatus(csMsg);
if (newStatus == ServerStatus.INVALID_STATUS)
{
@@ -2258,7 +2274,6 @@
Message message = NOTE_DIRECTORY_SERVER_CHANGED_STATUS.get(
senderHandler.getServerId(), baseDn, newStatus.toString());
logError(message);
-
}
catch(Exception e)
{
@@ -2278,17 +2293,16 @@
* @param event The event to be used for new status computation
* @return True if we have been interrupted (must stop), false otherwise
*/
- public boolean changeStatusFromStatusAnalyzer(DataServerHandler serverHandler,
- StatusMachineEvent event)
+ public boolean changeStatusFromStatusAnalyzer(
+ DataServerHandler serverHandler, StatusMachineEvent event)
{
try
{
- try
- {
// Acquire lock on domain (see more details in comment of start() method
// of ServerHandler)
lock();
- } catch (InterruptedException ex)
+ }
+ catch (InterruptedException ex)
{
// We have been interrupted for dying, from stopStatusAnalyzer
// to prevent deadlock in this situation:
@@ -2299,43 +2313,50 @@
// waiting for analyzer thread death, a deadlock occurs. So we force
// interruption of the status analyzer thread death after 2 seconds if
// it has not finished (see StatusAnalyzer.waitForShutdown). This allows
- // to have the analyzer thread taking the domain lock only when the status
- // of a DS has to be changed. See more comments in run method of
+ // to have the analyzer thread taking the domain lock only when the
+ // status of a DS has to be changed. See more comments in run method of
// StatusAnalyzer.
if (debugEnabled())
- TRACER.debugInfo(
- "Status analyzer for domain " + baseDn + " has been interrupted when"
- + " trying to acquire domain lock for changing the status of DS " +
- serverHandler.getServerId());
+ TRACER
+ .debugInfo("Status analyzer for domain "
+ + baseDn
+ + " has been interrupted when"
+ + " trying to acquire domain lock for changing the status"
+ + " of DS "
+ + serverHandler.getServerId());
return true;
}
- ServerStatus newStatus = ServerStatus.INVALID_STATUS;
- ServerStatus oldStatus = serverHandler.getStatus();
try
{
- newStatus = serverHandler.changeStatusFromStatusAnalyzer(event);
- } catch (IOException e)
- {
- logError(ERR_EXCEPTION_CHANGING_STATUS_FROM_STATUS_ANALYZER.get(baseDn.
- toString(),
- Integer.toString(serverHandler.getServerId()),
- e.getMessage()));
- }
+ ServerStatus newStatus = ServerStatus.INVALID_STATUS;
+ ServerStatus oldStatus = serverHandler.getStatus();
+ try
+ {
+ newStatus = serverHandler
+ .changeStatusFromStatusAnalyzer(event);
+ }
+ catch (IOException e)
+ {
+ logError(ERR_EXCEPTION_CHANGING_STATUS_FROM_STATUS_ANALYZER
+ .get(baseDn.toString(),
+ Integer.toString(serverHandler.getServerId()),
+ e.getMessage()));
+ }
- if ( (newStatus == ServerStatus.INVALID_STATUS) ||
- (newStatus == oldStatus) )
- {
- // Change was impossible or already occurred (see StatusAnalyzer comments)
- return false;
- }
+ if ((newStatus == ServerStatus.INVALID_STATUS)
+ || (newStatus == oldStatus))
+ {
+ // Change was impossible or already occurred (see StatusAnalyzer
+ // comments)
+ return false;
+ }
- // Update every peers (RS/DS) with topology changes
- buildAndSendTopoInfoToDSs(serverHandler);
- buildAndSendTopoInfoToRSs();
-
+ // Update every peers (RS/DS) with topology changes
+ buildAndSendTopoInfoToDSs(serverHandler);
+ buildAndSendTopoInfoToRSs();
}
- catch(Exception e)
+ catch (Exception e)
{
logError(Message.raw(Category.SYNC, Severity.NOTICE,
stackTraceToSingleLineString(e)));
@@ -2344,6 +2365,7 @@
{
release();
}
+
return false;
}
@@ -2456,11 +2478,14 @@
{
// Acquire lock on domain (see more details in comment of start() method
// of ServerHandler)
- if (!hasLock())
- lock();
- } catch (InterruptedException ex)
+ lock();
+ }
+ catch (InterruptedException ex)
{
- // Try doing job anyway...
+ // We can't deal with this here, so re-interrupt thread so that it is
+ // caught during subsequent IO.
+ Thread.currentThread().interrupt();
+ return;
}
try
@@ -2502,7 +2527,6 @@
* DS we have.
*/
buildAndSendTopoInfoToDSs(null);
-
}
catch(Exception e)
{
@@ -2867,7 +2891,7 @@
* - when creating and sending a TopologyMsg
* - when a DS status is changing (ChangeStatusMsg received or sent)...
*/
- private ReentrantLock lock = new ReentrantLock();
+ private final ReentrantLock lock = new ReentrantLock();
/**
* This lock is used to protect the generationid variable.
@@ -3344,9 +3368,13 @@
// Acquire lock on domain (see more details in comment of start() method
// of ServerHandler)
lock();
- } catch (InterruptedException ex)
+ }
+ catch (InterruptedException ex)
{
- // Try doing job anyway...
+ // We can't deal with this here, so re-interrupt thread so that it is
+ // caught during subsequent IO.
+ Thread.currentThread().interrupt();
+ return;
}
try
@@ -3356,7 +3384,8 @@
{
// If we are the first replication server warned,
// then forwards the message to the remote replication servers
- for (ReplicationServerHandler rsHandler : replicationServers.values())
+ for (ReplicationServerHandler rsHandler : replicationServers
+ .values())
{
try
{
@@ -3365,12 +3394,14 @@
{
rsHandler.send(msg);
}
- } catch (IOException e)
+ }
+ catch (IOException e)
{
TRACER.debugCaught(DebugLogLevel.ERROR, e);
- logError(ERR_CHANGELOG_ERROR_SENDING_MSG.get(
- "Replication Server " + replicationServer.getReplicationPort() +
- " " + baseDn + " " + replicationServer.getServerId()));
+ logError(ERR_CHANGELOG_ERROR_SENDING_MSG
+ .get("Replication Server "
+ + replicationServer.getReplicationPort() + " "
+ + baseDn + " " + replicationServer.getServerId()));
stopServer(rsHandler, false);
}
}
--
Gitblit v1.10.0