From 9f2023d9f48efcc3dba2921004f05e635e2582e2 Mon Sep 17 00:00:00 2001
From: Matthew Swift <matthew.swift@forgerock.com>
Date: Tue, 05 May 2015 23:03:01 +0000
Subject: [PATCH] Fix OPENDJ-1984: avoid performing reentrant synchronized bucket accesses
---
opendj-server-legacy/src/main/java/org/opends/server/types/LockManager.java | 55 ++++++++++++++++++++-------
opendj-server-legacy/src/test/java/org/opends/server/types/LockManagerTest.java | 42 ++++++++++++++++++++
2 files changed, 82 insertions(+), 15 deletions(-)
diff --git a/opendj-server-legacy/src/main/java/org/opends/server/types/LockManager.java b/opendj-server-legacy/src/main/java/org/opends/server/types/LockManager.java
index f5c29a6..651eb42 100644
--- a/opendj-server-legacy/src/main/java/org/opends/server/types/LockManager.java
+++ b/opendj-server-legacy/src/main/java/org/opends/server/types/LockManager.java
@@ -463,19 +463,39 @@
private DNLockHolder acquireLockFromLockTable(final DN dn, final int dnHashCode, final LinkedList<DNLockHolder> cache)
{
- final LinkedList<DNLockHolder> bucket = getBucket(dnHashCode);
- synchronized (bucket)
+ /*
+ * The lock doesn't exist yet so we'll have to create a new one referencing its parent lock. The
+ * parent lock may not yet exist in the lock table either so acquire it before locking the
+ * bucket in order to avoid deadlocks resulting from reentrant bucket locks. Note that we
+ * pre-emptively fetch the parent lock because experiments show that the requested child lock is
+ * almost never in the lock-table. Specifically, this method is only called if we are already on
+ * the slow path due to a cache miss in the thread-local cache.
+ */
+ final DN parentDN = dn.parent();
+ final DNLockHolder parentLock = parentDN != null ? acquireLockFromCache0(parentDN, cache) : null;
+ boolean parentLockWasUsed = false;
+ try
{
- DNLockHolder lock = removeLock(bucket, dn, dnHashCode);
- if (lock == null)
+ final LinkedList<DNLockHolder> bucket = getBucket(dnHashCode);
+ synchronized (bucket)
{
- final DN parentDN = dn.parent();
- final DNLockHolder parentLock = parentDN != null ? acquireLockFromCache0(parentDN, cache) : null;
- lock = new DNLockHolder(parentLock, dn, dnHashCode);
+ DNLockHolder lock = removeLock(bucket, dn, dnHashCode);
+ if (lock == null)
+ {
+ lock = new DNLockHolder(parentLock, dn, dnHashCode);
+ parentLockWasUsed = true;
+ }
+ bucket.addFirst(lock); // optimize for LRU
+ lock.refCount.incrementAndGet();
+ return lock;
}
- bucket.addFirst(lock); // optimize for LRU
- lock.refCount.incrementAndGet();
- return lock;
+ }
+ finally
+ {
+ if (!parentLockWasUsed && parentLock != null)
+ {
+ dereference(parentLock);
+ }
}
}
@@ -484,18 +504,25 @@
if (lock.refCount.decrementAndGet() <= 0)
{
final LinkedList<DNLockHolder> bucket = getBucket(lock.dnHashCode);
+ boolean lockWasRemoved = false;
synchronized (bucket)
{
// Double check: another thread could have acquired the lock since we decremented it to zero.
if (lock.refCount.get() <= 0)
{
removeLock(bucket, lock.dn, lock.dnHashCode);
- if (lock.parent != null)
- {
- dereference(lock.parent);
- }
+ lockWasRemoved = true;
}
}
+
+ /*
+ * Dereference the parent outside of the bucket lock to avoid potential deadlocks due to
+ * reentrant bucket locks.
+ */
+ if (lockWasRemoved && lock.parent != null)
+ {
+ dereference(lock.parent);
+ }
}
}
diff --git a/opendj-server-legacy/src/test/java/org/opends/server/types/LockManagerTest.java b/opendj-server-legacy/src/test/java/org/opends/server/types/LockManagerTest.java
index 8b2f3ea..7511e81 100644
--- a/opendj-server-legacy/src/test/java/org/opends/server/types/LockManagerTest.java
+++ b/opendj-server-legacy/src/test/java/org/opends/server/types/LockManagerTest.java
@@ -28,6 +28,7 @@
import static org.assertj.core.api.Assertions.assertThat;
import java.util.LinkedList;
+import java.util.Random;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@@ -77,7 +78,6 @@
private DN dnA;
private DN dnAB;
-
private DN dnABC;
private DN dnABD;
private final ExecutorService thread1 = Executors.newSingleThreadExecutor();
@@ -275,6 +275,46 @@
assertThat(lockManager.getLockTableRefCountFor(dn(99))).isGreaterThan(0);
}
+ @Test(description = "OPENDJ-1984")
+ public void stressTestForDeadlocks() throws Exception
+ {
+ final LockManager lockManager = new LockManager();
+ final int threadCount = Runtime.getRuntime().availableProcessors();
+ final ExecutorService threadPool = Executors.newFixedThreadPool(threadCount);
+ for (int i = 0; i < threadCount; i++)
+ {
+ threadPool.submit(new Runnable()
+ {
+ @Override
+ public void run()
+ {
+ final Random rng = new Random();
+ for (int j = 0; j < 1000000; j++)
+ {
+ try
+ {
+ /*
+ * Lock a DN whose parent is different each time in order to prevent the thread local
+ * cache being used for retrieving the parent DN lock.
+ */
+ final int uid = rng.nextInt();
+ final int deviceId = rng.nextInt();
+ final DN dn = DN.valueOf("uid=" + deviceId + ",uid=" + uid + ",dc=example,dc=com");
+ lockManager.tryWriteLockEntry(dn).unlock();
+ }
+ catch (DirectoryException e)
+ {
+ throw new RuntimeException(e);
+ }
+ }
+ }
+ });
+ }
+
+ threadPool.shutdown();
+ assertThat(threadPool.awaitTermination(60, TimeUnit.SECONDS)).as("Deadlock detected during stress test").isTrue();
+ }
+
private DN dn(final int i) throws DirectoryException
{
return DN.valueOf(String.format("uid=user.%d,ou=people,dc=example,dc=com", i));
--
Gitblit v1.10.0