From 9f2023d9f48efcc3dba2921004f05e635e2582e2 Mon Sep 17 00:00:00 2001
From: Matthew Swift <matthew.swift@forgerock.com>
Date: Tue, 05 May 2015 23:03:01 +0000
Subject: [PATCH] Fix OPENDJ-1984: avoid performing reentrant synchronized bucket accesses

---
 opendj-server-legacy/src/main/java/org/opends/server/types/LockManager.java     |   55 ++++++++++++++++++++-------
 opendj-server-legacy/src/test/java/org/opends/server/types/LockManagerTest.java |   42 ++++++++++++++++++++
 2 files changed, 82 insertions(+), 15 deletions(-)

diff --git a/opendj-server-legacy/src/main/java/org/opends/server/types/LockManager.java b/opendj-server-legacy/src/main/java/org/opends/server/types/LockManager.java
index f5c29a6..651eb42 100644
--- a/opendj-server-legacy/src/main/java/org/opends/server/types/LockManager.java
+++ b/opendj-server-legacy/src/main/java/org/opends/server/types/LockManager.java
@@ -463,19 +463,39 @@
 
   private DNLockHolder acquireLockFromLockTable(final DN dn, final int dnHashCode, final LinkedList<DNLockHolder> cache)
   {
-    final LinkedList<DNLockHolder> bucket = getBucket(dnHashCode);
-    synchronized (bucket)
+    /*
+     * The lock doesn't exist yet so we'll have to create a new one referencing its parent lock. The
+     * parent lock may not yet exist in the lock table either so acquire it before locking the
+     * bucket in order to avoid deadlocks resulting from reentrant bucket locks. Note that we
+     * pre-emptively fetch the parent lock because experiments show that the requested child lock is
+     * almost never in the lock-table. Specifically, this method is only called if we are already on
+     * the slow path due to a cache miss in the thread-local cache.
+     */
+    final DN parentDN = dn.parent();
+    final DNLockHolder parentLock = parentDN != null ? acquireLockFromCache0(parentDN, cache) : null;
+    boolean parentLockWasUsed = false;
+    try
     {
-      DNLockHolder lock = removeLock(bucket, dn, dnHashCode);
-      if (lock == null)
+      final LinkedList<DNLockHolder> bucket = getBucket(dnHashCode);
+      synchronized (bucket)
       {
-        final DN parentDN = dn.parent();
-        final DNLockHolder parentLock = parentDN != null ? acquireLockFromCache0(parentDN, cache) : null;
-        lock = new DNLockHolder(parentLock, dn, dnHashCode);
+        DNLockHolder lock = removeLock(bucket, dn, dnHashCode);
+        if (lock == null)
+        {
+          lock = new DNLockHolder(parentLock, dn, dnHashCode);
+          parentLockWasUsed = true;
+        }
+        bucket.addFirst(lock); // optimize for LRU
+        lock.refCount.incrementAndGet();
+        return lock;
       }
-      bucket.addFirst(lock); // optimize for LRU
-      lock.refCount.incrementAndGet();
-      return lock;
+    }
+    finally
+    {
+      if (!parentLockWasUsed && parentLock != null)
+      {
+        dereference(parentLock);
+      }
     }
   }
 
@@ -484,18 +504,25 @@
     if (lock.refCount.decrementAndGet() <= 0)
     {
       final LinkedList<DNLockHolder> bucket = getBucket(lock.dnHashCode);
+      boolean lockWasRemoved = false;
       synchronized (bucket)
       {
         // Double check: another thread could have acquired the lock since we decremented it to zero.
         if (lock.refCount.get() <= 0)
         {
           removeLock(bucket, lock.dn, lock.dnHashCode);
-          if (lock.parent != null)
-          {
-            dereference(lock.parent);
-          }
+          lockWasRemoved = true;
         }
       }
+
+      /*
+       * Dereference the parent outside of the bucket lock to avoid potential deadlocks due to
+       * reentrant bucket locks.
+       */
+      if (lockWasRemoved && lock.parent != null)
+      {
+        dereference(lock.parent);
+      }
     }
   }
 
diff --git a/opendj-server-legacy/src/test/java/org/opends/server/types/LockManagerTest.java b/opendj-server-legacy/src/test/java/org/opends/server/types/LockManagerTest.java
index 8b2f3ea..7511e81 100644
--- a/opendj-server-legacy/src/test/java/org/opends/server/types/LockManagerTest.java
+++ b/opendj-server-legacy/src/test/java/org/opends/server/types/LockManagerTest.java
@@ -28,6 +28,7 @@
 import static org.assertj.core.api.Assertions.assertThat;
 
 import java.util.LinkedList;
+import java.util.Random;
 import java.util.concurrent.Callable;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
@@ -77,7 +78,6 @@
 
   private DN dnA;
   private DN dnAB;
-
   private DN dnABC;
   private DN dnABD;
   private final ExecutorService thread1 = Executors.newSingleThreadExecutor();
@@ -275,6 +275,46 @@
     assertThat(lockManager.getLockTableRefCountFor(dn(99))).isGreaterThan(0);
   }
 
+  @Test(description = "OPENDJ-1984")
+  public void stressTestForDeadlocks() throws Exception
+  {
+    final LockManager lockManager = new LockManager();
+    final int threadCount = Runtime.getRuntime().availableProcessors();
+    final ExecutorService threadPool = Executors.newFixedThreadPool(threadCount);
+    for (int i = 0; i < threadCount; i++)
+    {
+      threadPool.submit(new Runnable()
+      {
+        @Override
+        public void run()
+        {
+          final Random rng = new Random();
+          for (int j = 0; j < 1000000; j++)
+          {
+            try
+            {
+              /*
+               * Lock a DN whose parent is different each time in order to prevent the thread local
+               * cache being used for retrieving the parent DN lock.
+               */
+              final int uid = rng.nextInt();
+              final int deviceId = rng.nextInt();
+              final DN dn = DN.valueOf("uid=" + deviceId + ",uid=" + uid + ",dc=example,dc=com");
+              lockManager.tryWriteLockEntry(dn).unlock();
+            }
+            catch (DirectoryException e)
+            {
+              throw new RuntimeException(e);
+            }
+          }
+        }
+      });
+    }
+
+    threadPool.shutdown();
+    assertThat(threadPool.awaitTermination(60, TimeUnit.SECONDS)).as("Deadlock detected during stress test").isTrue();
+  }
+
   private DN dn(final int i) throws DirectoryException
   {
     return DN.valueOf(String.format("uid=user.%d,ou=people,dc=example,dc=com", i));

--
Gitblit v1.10.0