From 9e1f377c4f21b899d16f4c62450c68691f4b42a8 Mon Sep 17 00:00:00 2001
From: Ludovic Poitou <ludovic.poitou@forgerock.com>
Date: Thu, 20 Jun 2013 15:02:35 +0000
Subject: [PATCH] Fix for OPENDJ-846, Intermittent Replication failure. The issue was triggered by the mix of AssuredReplication and bad network conditions, which resulted in a deadlock between 2 RS, as both were blocked on writing to the TCP socket and not reading (because waiting on the write lock). The solution (more of a workaround) is to have another thread for sending data to the socket and have the reader and writer posting data to send to a queue that this new thread is polling. There are still potential deadlocks but they will occur much later, if the sendQueue gets full. The code needs more work post 2.6 to be fully non blocking, but the changes are enough for now to resolve the customer deadlock case.
---
opends/tests/unit-tests-testng/src/server/org/opends/server/replication/ReplicationTestCase.java | 55 ++++++++++++++++++-------------------------------------
1 files changed, 18 insertions(+), 37 deletions(-)
diff --git a/opends/tests/unit-tests-testng/src/server/org/opends/server/replication/ReplicationTestCase.java b/opends/tests/unit-tests-testng/src/server/org/opends/server/replication/ReplicationTestCase.java
index df8ff6f..13f15e1 100644
--- a/opends/tests/unit-tests-testng/src/server/org/opends/server/replication/ReplicationTestCase.java
+++ b/opends/tests/unit-tests-testng/src/server/org/opends/server/replication/ReplicationTestCase.java
@@ -27,25 +27,6 @@
*/
package org.opends.server.replication;
-import static org.opends.server.config.ConfigConstants.ATTR_TASK_COMPLETION_TIME;
-import static org.opends.server.config.ConfigConstants.ATTR_TASK_LOG_MESSAGES;
-import static org.opends.server.config.ConfigConstants.ATTR_TASK_STATE;
-import static org.opends.server.loggers.ErrorLogger.logError;
-import static org.opends.server.loggers.debug.DebugLogger.getTracer;
-import static org.opends.server.util.StaticUtils.stackTraceToSingleLineString;
-import static org.testng.Assert.assertEquals;
-import static org.testng.Assert.assertNotNull;
-import static org.testng.Assert.assertTrue;
-import static org.testng.Assert.fail;
-
-import java.io.File;
-import java.net.SocketException;
-import java.util.ArrayList;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.NoSuchElementException;
-import java.util.concurrent.locks.Lock;
-
import org.opends.messages.Category;
import org.opends.messages.Message;
import org.opends.messages.Severity;
@@ -66,34 +47,34 @@
import org.opends.server.replication.plugin.LDAPReplicationDomain;
import org.opends.server.replication.plugin.MultimasterReplication;
import org.opends.server.replication.plugin.PersistentServerState;
-import org.opends.server.replication.protocol.ProtocolSession;
import org.opends.server.replication.protocol.ReplSessionSecurity;
import org.opends.server.replication.protocol.ReplicationMsg;
+import org.opends.server.replication.protocol.Session;
import org.opends.server.replication.server.ReplicationServer;
import org.opends.server.replication.service.ReplicationBroker;
import org.opends.server.replication.service.ReplicationDomain;
import org.opends.server.schema.DirectoryStringSyntax;
import org.opends.server.schema.IntegerSyntax;
-import org.opends.server.types.Attribute;
-import org.opends.server.types.AttributeType;
-import org.opends.server.types.AttributeValue;
-import org.opends.server.types.AttributeValues;
-import org.opends.server.types.Attributes;
-import org.opends.server.types.ByteString;
-import org.opends.server.types.DN;
-import org.opends.server.types.Entry;
-import org.opends.server.types.LockManager;
-import org.opends.server.types.Modification;
-import org.opends.server.types.ModificationType;
-import org.opends.server.types.ResultCode;
-import org.opends.server.types.SearchFilter;
-import org.opends.server.types.SearchResultEntry;
-import org.opends.server.types.SearchScope;
+import org.opends.server.types.*;
import org.opends.server.util.StaticUtils;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
+import java.io.File;
+import java.net.SocketException;
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.NoSuchElementException;
+import java.util.concurrent.locks.Lock;
+
+import static org.opends.server.config.ConfigConstants.*;
+import static org.opends.server.loggers.ErrorLogger.logError;
+import static org.opends.server.loggers.debug.DebugLogger.getTracer;
+import static org.opends.server.util.StaticUtils.stackTraceToSingleLineString;
+import static org.testng.Assert.*;
+
/**
* An abstract class that all Replication unit test should extend.
*/
@@ -900,7 +881,7 @@
* Add a task to the configuration of the current running DS.
* @param taskEntry The task to add.
* @param expectedResult The expected result code for the ADD.
- * @param errorMessageID The expected error messageID when the expected
+ * @param errorMessage The expected error message when the expected
* result code is not SUCCESS
*/
protected void addTask(Entry taskEntry, ResultCode expectedResult,
@@ -1168,7 +1149,7 @@
* @param msgType Class of the message we are waiting for.
* @return The expected message if it comes in time or fails (assertion).
*/
- protected static ReplicationMsg waitForSpecificMsg(ProtocolSession session, String msgType) {
+ protected static ReplicationMsg waitForSpecificMsg(Session session, String msgType) {
ReplicationMsg replMsg = null;
--
Gitblit v1.10.0