From 9e1f377c4f21b899d16f4c62450c68691f4b42a8 Mon Sep 17 00:00:00 2001
From: Ludovic Poitou <ludovic.poitou@forgerock.com>
Date: Thu, 20 Jun 2013 15:02:35 +0000
Subject: [PATCH] Fix for OPENDJ-846, Intermittent Replication failure. The issue was triggered by the mix of AssuredReplication and bad network conditions, which resulted in a deadlock between 2 RS, as both were blocked on writing to the TCP socket and not reading (because waiting on the write lock). The solution (more of a workaround) is to have another thread for sending data to the socket and have the reader and writer posting data to send to a queue that this new thread is polling. There are still potential deadlocks but they will occur much later, if the sendQueue gets full.  The code needs more work post 2.6 to be fully non blocking, but the changes are enough for now to resolve the customer deadlock case.

---
 opends/src/server/org/opends/server/replication/plugin/LDAPReplicationDomain.java |   18 +++---------------
 1 files changed, 3 insertions(+), 15 deletions(-)

diff --git a/opends/src/server/org/opends/server/replication/plugin/LDAPReplicationDomain.java b/opends/src/server/org/opends/server/replication/plugin/LDAPReplicationDomain.java
index e52b41b..4a7af7e 100644
--- a/opends/src/server/org/opends/server/replication/plugin/LDAPReplicationDomain.java
+++ b/opends/src/server/org/opends/server/replication/plugin/LDAPReplicationDomain.java
@@ -86,19 +86,7 @@
 import org.opends.server.replication.common.ServerState;
 import org.opends.server.replication.common.ServerStatus;
 import org.opends.server.replication.common.StatusMachineEvent;
-import org.opends.server.replication.protocol.AddContext;
-import org.opends.server.replication.protocol.AddMsg;
-import org.opends.server.replication.protocol.DeleteContext;
-import org.opends.server.replication.protocol.DeleteMsg;
-import org.opends.server.replication.protocol.LDAPUpdateMsg;
-import org.opends.server.replication.protocol.ModifyContext;
-import org.opends.server.replication.protocol.ModifyDNMsg;
-import org.opends.server.replication.protocol.ModifyDnContext;
-import org.opends.server.replication.protocol.ModifyMsg;
-import org.opends.server.replication.protocol.OperationContext;
-import org.opends.server.replication.protocol.ProtocolSession;
-import org.opends.server.replication.protocol.RoutableMsg;
-import org.opends.server.replication.protocol.UpdateMsg;
+import org.opends.server.replication.protocol.*;
 import org.opends.server.replication.service.ReplicationBroker;
 import org.opends.server.replication.service.ReplicationDomain;
 import org.opends.server.replication.service.ReplicationMonitor;
@@ -4581,7 +4569,7 @@
       ServerStatus initStatus,
       ServerState replicationServerState,
       long generationID,
-      ProtocolSession session)
+      Session session)
   {
     // Check domain fractional configuration consistency with local
     // configuration variables
@@ -4876,7 +4864,7 @@
   @Override
   public boolean processUpdate(UpdateMsg updateMsg)
   {
-    // Ignore message if fractional configuration is inconcsistent and
+    // Ignore message if fractional configuration is inconsistent and
     // we have been passed into bad data set status
     if (forceBadDataSet)
     {

--
Gitblit v1.10.0