Updating java.util.concurrent to 11+28

This merges in the changes for java.util.concurrent from 11+28.

It only contains the merge resolutions, without having the
necessary changes to make everything work. In order to help with
the review, those changes come in subsequent CLs.

The merge has been made using tools/upstream/merge-from-upstream.

The merging of conflicts was done as specified in
http://b/188889082 with the only notable difference that upstream
references to the Collections Framework were preserved since they
will be made to work as-is later on (see http://b/200009966).

Test: code inspection
Bug: 188889082
Change-Id: I5a22e6768fa12ba82222a7cef027fb8e346e0e8e
diff --git a/ojluni/src/main/java/java/util/concurrent/AbstractExecutorService.java b/ojluni/src/main/java/java/util/concurrent/AbstractExecutorService.java
index e4d2235..a206867 100644
--- a/ojluni/src/main/java/java/util/concurrent/AbstractExecutorService.java
+++ b/ojluni/src/main/java/java/util/concurrent/AbstractExecutorService.java
@@ -245,8 +245,7 @@
                 Future<T> f = futures.get(i);
                 if (!f.isDone()) {
                     try { f.get(); }
-                    catch (CancellationException ignore) {}
-                    catch (ExecutionException ignore) {}
+                    catch (CancellationException | ExecutionException ignore) {}
                 }
             }
             return futures;
@@ -283,8 +282,7 @@
                 Future<T> f = futures.get(j);
                 if (!f.isDone()) {
                     try { f.get(deadline - System.nanoTime(), NANOSECONDS); }
-                    catch (CancellationException ignore) {}
-                    catch (ExecutionException ignore) {}
+                    catch (CancellationException | ExecutionException ignore) {}
                     catch (TimeoutException timedOut) {
                         break timedOut;
                     }
diff --git a/ojluni/src/main/java/java/util/concurrent/ArrayBlockingQueue.java b/ojluni/src/main/java/java/util/concurrent/ArrayBlockingQueue.java
index 96a60b3..dced5a4 100644
--- a/ojluni/src/main/java/java/util/concurrent/ArrayBlockingQueue.java
+++ b/ojluni/src/main/java/java/util/concurrent/ArrayBlockingQueue.java
@@ -46,10 +46,8 @@
 import java.util.Spliterators;
 import java.util.concurrent.locks.Condition;
 import java.util.concurrent.locks.ReentrantLock;
-
-// BEGIN android-note
-// removed link to collections framework docs
-// END android-note
+import java.util.function.Consumer;
+import java.util.function.Predicate;
 
 /**
  * A bounded {@linkplain BlockingQueue blocking queue} backed by an
@@ -74,17 +72,24 @@
  * generally decreases throughput but reduces variability and avoids
  * starvation.
  *
- * <p>This class and its iterator implement all of the
- * <em>optional</em> methods of the {@link Collection} and {@link
- * Iterator} interfaces.
+ * <p>This class and its iterator implement all of the <em>optional</em>
+ * methods of the {@link Collection} and {@link Iterator} interfaces.
  *
- * @since 1.5
+ * <p>This class is a member of the
+ * <a href="{@docRoot}/java.base/java/util/package-summary.html#CollectionsFramework">
+ * Java Collections Framework</a>.
+ *
  * @author Doug Lea
  * @param <E> the type of elements held in this queue
  */
 public class ArrayBlockingQueue<E> extends AbstractQueue<E>
         implements BlockingQueue<E>, java.io.Serializable {
 
+    /*
+     * Much of the implementation mechanics, especially the unusual
+     * nested loops, are shared and co-maintained with ArrayDeque.
+     */
+
     /**
      * Serialization ID. This class relies on default serialization
      * even for the items array, which is default-serialized, even if
@@ -129,10 +134,21 @@
     // Internal helper methods
 
     /**
-     * Circularly decrements array index i.
+     * Increments i, mod modulus.
+     * Precondition and postcondition: 0 <= i < modulus.
      */
-    final int dec(int i) {
-        return ((i == 0) ? items.length : i) - 1;
+    static final int inc(int i, int modulus) {
+        if (++i >= modulus) i = 0;
+        return i;
+    }
+
+    /**
+     * Decrements i, mod modulus.
+     * Precondition and postcondition: 0 <= i < modulus.
+     */
+    static final int dec(int i, int modulus) {
+        if (--i < 0) i = modulus - 1;
+        return i;
     }
 
     /**
@@ -144,14 +160,24 @@
     }
 
     /**
+     * Returns element at array index i.
+     * This is a slight abuse of generics, accepted by javac.
+     */
+    @SuppressWarnings("unchecked")
+    static <E> E itemAt(Object[] items, int i) {
+        return (E) items[i];
+    }
+
+    /**
      * Inserts element at current put position, advances, and signals.
      * Call only when holding lock.
      */
-    private void enqueue(E x) {
+    private void enqueue(E e) {
+        // assert lock.isHeldByCurrentThread();
         // assert lock.getHoldCount() == 1;
         // assert items[putIndex] == null;
         final Object[] items = this.items;
-        items[putIndex] = x;
+        items[putIndex] = e;
         if (++putIndex == items.length) putIndex = 0;
         count++;
         notEmpty.signal();
@@ -162,18 +188,19 @@
      * Call only when holding lock.
      */
     private E dequeue() {
+        // assert lock.isHeldByCurrentThread();
         // assert lock.getHoldCount() == 1;
         // assert items[takeIndex] != null;
         final Object[] items = this.items;
         @SuppressWarnings("unchecked")
-        E x = (E) items[takeIndex];
+        E e = (E) items[takeIndex];
         items[takeIndex] = null;
         if (++takeIndex == items.length) takeIndex = 0;
         count--;
         if (itrs != null)
             itrs.elementDequeued();
         notFull.signal();
-        return x;
+        return e;
     }
 
     /**
@@ -182,6 +209,7 @@
      * Call only when holding lock.
      */
     void removeAt(final int removeIndex) {
+        // assert lock.isHeldByCurrentThread();
         // assert lock.getHoldCount() == 1;
         // assert items[removeIndex] != null;
         // assert removeIndex >= 0 && removeIndex < items.length;
@@ -267,6 +295,7 @@
         final ReentrantLock lock = this.lock;
         lock.lock(); // Lock only for visibility, not mutual exclusion
         try {
+            final Object[] items = this.items;
             int i = 0;
             try {
                 for (E e : c)
@@ -481,15 +510,16 @@
         try {
             if (count > 0) {
                 final Object[] items = this.items;
-                final int putIndex = this.putIndex;
-                int i = takeIndex;
-                do {
-                    if (o.equals(items[i])) {
-                        removeAt(i);
-                        return true;
-                    }
-                    if (++i == items.length) i = 0;
-                } while (i != putIndex);
+                for (int i = takeIndex, end = putIndex,
+                         to = (i < end) ? end : items.length;
+                     ; i = 0, to = end) {
+                    for (; i < to; i++)
+                        if (o.equals(items[i])) {
+                            removeAt(i);
+                            return true;
+                        }
+                    if (to == end) break;
+                }
             }
             return false;
         } finally {
@@ -512,13 +542,14 @@
         try {
             if (count > 0) {
                 final Object[] items = this.items;
-                final int putIndex = this.putIndex;
-                int i = takeIndex;
-                do {
-                    if (o.equals(items[i]))
-                        return true;
-                    if (++i == items.length) i = 0;
-                } while (i != putIndex);
+                for (int i = takeIndex, end = putIndex,
+                         to = (i < end) ? end : items.length;
+                     ; i = 0, to = end) {
+                    for (; i < to; i++)
+                        if (o.equals(items[i]))
+                            return true;
+                    if (to == end) break;
+                }
             }
             return false;
         } finally {
@@ -625,15 +656,9 @@
         final ReentrantLock lock = this.lock;
         lock.lock();
         try {
-            int k = count;
-            if (k > 0) {
-                final Object[] items = this.items;
-                final int putIndex = this.putIndex;
-                int i = takeIndex;
-                do {
-                    items[i] = null;
-                    if (++i == items.length) i = 0;
-                } while (i != putIndex);
+            int k;
+            if ((k = count) > 0) {
+                circularClear(items, takeIndex, putIndex);
                 takeIndex = putIndex;
                 count = 0;
                 if (itrs != null)
@@ -647,6 +672,20 @@
     }
 
     /**
+     * Nulls out slots starting at array index i, upto index end.
+     * Condition i == end means "full" - the entire array is cleared.
+     */
+    private static void circularClear(Object[] items, int i, int end) {
+        // assert 0 <= i && i < items.length;
+        // assert 0 <= end && end < items.length;
+        for (int to = (i < end) ? end : items.length;
+             ; i = 0, to = end) {
+            for (; i < to; i++) items[i] = null;
+            if (to == end) break;
+        }
+    }
+
+    /**
      * @throws UnsupportedOperationException {@inheritDoc}
      * @throws ClassCastException            {@inheritDoc}
      * @throws NullPointerException          {@inheritDoc}
@@ -678,8 +717,8 @@
             try {
                 while (i < n) {
                     @SuppressWarnings("unchecked")
-                    E x = (E) items[take];
-                    c.add(x);
+                    E e = (E) items[take];
+                    c.add(e);
                     items[take] = null;
                     if (++take == items.length) take = 0;
                     i++;
@@ -808,7 +847,7 @@
          * there is known to be at least one iterator to collect
          */
         void doSomeSweeping(boolean tryHarder) {
-            // assert lock.getHoldCount() == 1;
+            // assert lock.isHeldByCurrentThread();
             // assert head != null;
             int probes = tryHarder ? LONG_SWEEP_PROBES : SHORT_SWEEP_PROBES;
             Node o, p;
@@ -864,7 +903,7 @@
          * Adds a new iterator to the linked list of tracked iterators.
          */
         void register(Itr itr) {
-            // assert lock.getHoldCount() == 1;
+            // assert lock.isHeldByCurrentThread();
             head = new Node(itr, head);
         }
 
@@ -874,7 +913,7 @@
          * Notifies all iterators, and expunges any that are now stale.
          */
         void takeIndexWrapped() {
-            // assert lock.getHoldCount() == 1;
+            // assert lock.isHeldByCurrentThread();
             cycles++;
             for (Node o = null, p = head; p != null;) {
                 final Itr it = p.get();
@@ -931,7 +970,7 @@
          * clears all weak refs, and unlinks the itrs datastructure.
          */
         void queueIsEmpty() {
-            // assert lock.getHoldCount() == 1;
+            // assert lock.isHeldByCurrentThread();
             for (Node p = head; p != null; p = p.next) {
                 Itr it = p.get();
                 if (it != null) {
@@ -947,7 +986,7 @@
          * Called whenever an element has been dequeued (at takeIndex).
          */
         void elementDequeued() {
-            // assert lock.getHoldCount() == 1;
+            // assert lock.isHeldByCurrentThread();
             if (count == 0)
                 queueIsEmpty();
             else if (takeIndex == 0)
@@ -972,6 +1011,11 @@
      * expected element to remove, in lastItem.  Yes, we may fail to
      * remove lastItem from the queue if it moved due to an interleaved
      * interior remove while in detached mode.
+     *
+     * Method forEachRemaining, added in Java 8, is treated similarly
+     * to hasNext returning false, in that we switch to detached mode,
+     * but we regard it as an even stronger request to "close" this
+     * iteration, and don't bother supporting subsequent remove().
      */
     private class Itr implements Iterator<E> {
         /** Index to look for new nextItem; NONE at end */
@@ -1008,7 +1052,6 @@
         private static final int DETACHED = -3;
 
         Itr() {
-            // assert lock.getHoldCount() == 0;
             lastRet = NONE;
             final ReentrantLock lock = ArrayBlockingQueue.this.lock;
             lock.lock();
@@ -1041,12 +1084,12 @@
         }
 
         boolean isDetached() {
-            // assert lock.getHoldCount() == 1;
+            // assert lock.isHeldByCurrentThread();
             return prevTakeIndex < 0;
         }
 
         private int incCursor(int index) {
-            // assert lock.getHoldCount() == 1;
+            // assert lock.isHeldByCurrentThread();
             if (++index == items.length) index = 0;
             if (index == putIndex) index = NONE;
             return index;
@@ -1071,7 +1114,7 @@
          * operation on this iterator.  Call only from iterating thread.
          */
         private void incorporateDequeues() {
-            // assert lock.getHoldCount() == 1;
+            // assert lock.isHeldByCurrentThread();
             // assert itrs != null;
             // assert !isDetached();
             // assert count > 0;
@@ -1085,7 +1128,7 @@
                 final int len = items.length;
                 // how far takeIndex has advanced since the previous
                 // operation of this iterator
-                long dequeues = (cycles - prevCycles) * len
+                long dequeues = (long) (cycles - prevCycles) * len
                     + (takeIndex - prevTakeIndex);
 
                 // Check indices for invalidation
@@ -1114,7 +1157,7 @@
          */
         private void detach() {
             // Switch to detached mode
-            // assert lock.getHoldCount() == 1;
+            // assert lock.isHeldByCurrentThread();
             // assert cursor == NONE;
             // assert nextIndex < 0;
             // assert lastRet < 0 || nextItem == null;
@@ -1134,7 +1177,6 @@
          * triggered by queue modifications.
          */
         public boolean hasNext() {
-            // assert lock.getHoldCount() == 0;
             if (nextItem != null)
                 return true;
             noNext();
@@ -1164,9 +1206,8 @@
         }
 
         public E next() {
-            // assert lock.getHoldCount() == 0;
-            final E x = nextItem;
-            if (x == null)
+            final E e = nextItem;
+            if (e == null)
                 throw new NoSuchElementException();
             final ReentrantLock lock = ArrayBlockingQueue.this.lock;
             lock.lock();
@@ -1184,17 +1225,48 @@
                 } else {
                     nextIndex = NONE;
                     nextItem = null;
+                    if (lastRet == REMOVED) detach();
                 }
             } finally {
                 lock.unlock();
             }
-            return x;
+            return e;
+        }
+
+        public void forEachRemaining(Consumer<? super E> action) {
+            Objects.requireNonNull(action);
+            final ReentrantLock lock = ArrayBlockingQueue.this.lock;
+            lock.lock();
+            try {
+                final E e = nextItem;
+                if (e == null) return;
+                if (!isDetached())
+                    incorporateDequeues();
+                action.accept(e);
+                if (isDetached() || cursor < 0) return;
+                final Object[] items = ArrayBlockingQueue.this.items;
+                for (int i = cursor, end = putIndex,
+                         to = (i < end) ? end : items.length;
+                     ; i = 0, to = end) {
+                    for (; i < to; i++)
+                        action.accept(itemAt(items, i));
+                    if (to == end) break;
+                }
+            } finally {
+                // Calling forEachRemaining is a strong hint that this
+                // iteration is surely over; supporting remove() after
+                // forEachRemaining() is more trouble than it's worth
+                cursor = nextIndex = lastRet = NONE;
+                nextItem = lastItem = null;
+                detach();
+                lock.unlock();
+            }
         }
 
         public void remove() {
-            // assert lock.getHoldCount() == 0;
             final ReentrantLock lock = ArrayBlockingQueue.this.lock;
             lock.lock();
+            // assert lock.getHoldCount() == 1;
             try {
                 if (!isDetached())
                     incorporateDequeues(); // might update lastRet or detach
@@ -1232,7 +1304,7 @@
          * from next(), as promised by returning true from hasNext().
          */
         void shutdown() {
-            // assert lock.getHoldCount() == 1;
+            // assert lock.isHeldByCurrentThread();
             cursor = NONE;
             if (nextIndex >= 0)
                 nextIndex = REMOVED;
@@ -1260,7 +1332,7 @@
          * @return true if this iterator should be unlinked from itrs
          */
         boolean removedAt(int removedIndex) {
-            // assert lock.getHoldCount() == 1;
+            // assert lock.isHeldByCurrentThread();
             if (isDetached())
                 return true;
 
@@ -1285,7 +1357,7 @@
                 }
                 else if (x > removedDistance) {
                     // assert cursor != prevTakeIndex;
-                    this.cursor = cursor = dec(cursor);
+                    this.cursor = cursor = dec(cursor, len);
                 }
             }
             int lastRet = this.lastRet;
@@ -1294,7 +1366,7 @@
                 if (x == removedDistance)
                     this.lastRet = lastRet = REMOVED;
                 else if (x > removedDistance)
-                    this.lastRet = lastRet = dec(lastRet);
+                    this.lastRet = lastRet = dec(lastRet, len);
             }
             int nextIndex = this.nextIndex;
             if (nextIndex >= 0) {
@@ -1302,7 +1374,7 @@
                 if (x == removedDistance)
                     this.nextIndex = nextIndex = REMOVED;
                 else if (x > removedDistance)
-                    this.nextIndex = nextIndex = dec(nextIndex);
+                    this.nextIndex = nextIndex = dec(nextIndex, len);
             }
             if (cursor < 0 && nextIndex < 0 && lastRet < 0) {
                 this.prevTakeIndex = DETACHED;
@@ -1317,7 +1389,7 @@
          * @return true if this iterator should be unlinked from itrs
          */
         boolean takeIndexWrapped() {
-            // assert lock.getHoldCount() == 1;
+            // assert lock.isHeldByCurrentThread();
             if (isDetached())
                 return true;
             if (itrs.cycles - prevCycles > 1) {
@@ -1366,4 +1438,197 @@
                     Spliterator.CONCURRENT));
     }
 
+    /**
+     * @throws NullPointerException {@inheritDoc}
+     */
+    public void forEach(Consumer<? super E> action) {
+        Objects.requireNonNull(action);
+        final ReentrantLock lock = this.lock;
+        lock.lock();
+        try {
+            if (count > 0) {
+                final Object[] items = this.items;
+                for (int i = takeIndex, end = putIndex,
+                         to = (i < end) ? end : items.length;
+                     ; i = 0, to = end) {
+                    for (; i < to; i++)
+                        action.accept(itemAt(items, i));
+                    if (to == end) break;
+                }
+            }
+        } finally {
+            lock.unlock();
+        }
+    }
+
+    /**
+     * @throws NullPointerException {@inheritDoc}
+     */
+    public boolean removeIf(Predicate<? super E> filter) {
+        Objects.requireNonNull(filter);
+        return bulkRemove(filter);
+    }
+
+    /**
+     * @throws NullPointerException {@inheritDoc}
+     */
+    public boolean removeAll(Collection<?> c) {
+        Objects.requireNonNull(c);
+        return bulkRemove(e -> c.contains(e));
+    }
+
+    /**
+     * @throws NullPointerException {@inheritDoc}
+     */
+    public boolean retainAll(Collection<?> c) {
+        Objects.requireNonNull(c);
+        return bulkRemove(e -> !c.contains(e));
+    }
+
+    /** Implementation of bulk remove methods. */
+    private boolean bulkRemove(Predicate<? super E> filter) {
+        final ReentrantLock lock = this.lock;
+        lock.lock();
+        try {
+            if (itrs == null) { // check for active iterators
+                if (count > 0) {
+                    final Object[] items = this.items;
+                    // Optimize for initial run of survivors
+                    for (int i = takeIndex, end = putIndex,
+                             to = (i < end) ? end : items.length;
+                         ; i = 0, to = end) {
+                        for (; i < to; i++)
+                            if (filter.test(itemAt(items, i)))
+                                return bulkRemoveModified(filter, i);
+                        if (to == end) break;
+                    }
+                }
+                return false;
+            }
+        } finally {
+            lock.unlock();
+        }
+        // Active iterators are too hairy!
+        // Punting (for now) to the slow n^2 algorithm ...
+        return super.removeIf(filter);
+    }
+
+    // A tiny bit set implementation
+
+    private static long[] nBits(int n) {
+        return new long[((n - 1) >> 6) + 1];
+    }
+    private static void setBit(long[] bits, int i) {
+        bits[i >> 6] |= 1L << i;
+    }
+    private static boolean isClear(long[] bits, int i) {
+        return (bits[i >> 6] & (1L << i)) == 0;
+    }
+
+    /**
+     * Returns circular distance from i to j, disambiguating i == j to
+     * items.length; never returns 0.
+     */
+    private int distanceNonEmpty(int i, int j) {
+        if ((j -= i) <= 0) j += items.length;
+        return j;
+    }
+
+    /**
+     * Helper for bulkRemove, in case of at least one deletion.
+     * Tolerate predicates that reentrantly access the collection for
+     * read (but not write), so traverse once to find elements to
+     * delete, a second pass to physically expunge.
+     *
+     * @param beg valid index of first element to be deleted
+     */
+    private boolean bulkRemoveModified(
+        Predicate<? super E> filter, final int beg) {
+        final Object[] es = items;
+        final int capacity = items.length;
+        final int end = putIndex;
+        final long[] deathRow = nBits(distanceNonEmpty(beg, putIndex));
+        deathRow[0] = 1L;   // set bit 0
+        for (int i = beg + 1, to = (i <= end) ? end : es.length, k = beg;
+             ; i = 0, to = end, k -= capacity) {
+            for (; i < to; i++)
+                if (filter.test(itemAt(es, i)))
+                    setBit(deathRow, i - k);
+            if (to == end) break;
+        }
+        // a two-finger traversal, with hare i reading, tortoise w writing
+        int w = beg;
+        for (int i = beg + 1, to = (i <= end) ? end : es.length, k = beg;
+             ; w = 0) { // w rejoins i on second leg
+            // In this loop, i and w are on the same leg, with i > w
+            for (; i < to; i++)
+                if (isClear(deathRow, i - k))
+                    es[w++] = es[i];
+            if (to == end) break;
+            // In this loop, w is on the first leg, i on the second
+            for (i = 0, to = end, k -= capacity; i < to && w < capacity; i++)
+                if (isClear(deathRow, i - k))
+                    es[w++] = es[i];
+            if (i >= to) {
+                if (w == capacity) w = 0; // "corner" case
+                break;
+            }
+        }
+        count -= distanceNonEmpty(w, end);
+        circularClear(es, putIndex = w, end);
+        return true;
+    }
+
+    /** debugging */
+    void checkInvariants() {
+        // meta-assertions
+        // assert lock.isHeldByCurrentThread();
+        if (!invariantsSatisfied()) {
+            String detail = String.format(
+                "takeIndex=%d putIndex=%d count=%d capacity=%d items=%s",
+                takeIndex, putIndex, count, items.length,
+                Arrays.toString(items));
+            System.err.println(detail);
+            throw new AssertionError(detail);
+        }
+    }
+
+    private boolean invariantsSatisfied() {
+        // Unlike ArrayDeque, we have a count field but no spare slot.
+        // We prefer ArrayDeque's strategy (and the names of its fields!),
+        // but our field layout is baked into the serial form, and so is
+        // too annoying to change.
+        //
+        // putIndex == takeIndex must be disambiguated by checking count.
+        int capacity = items.length;
+        return capacity > 0
+            && items.getClass() == Object[].class
+            && (takeIndex | putIndex | count) >= 0
+            && takeIndex <  capacity
+            && putIndex  <  capacity
+            && count     <= capacity
+            && (putIndex - takeIndex - count) % capacity == 0
+            && (count == 0 || items[takeIndex] != null)
+            && (count == capacity || items[putIndex] == null)
+            && (count == 0 || items[dec(putIndex, capacity)] != null);
+    }
+
+    /**
+     * Reconstitutes this queue from a stream (that is, deserializes it).
+     *
+     * @param s the stream
+     * @throws ClassNotFoundException if the class of a serialized object
+     *         could not be found
+     * @throws java.io.InvalidObjectException if invariants are violated
+     * @throws java.io.IOException if an I/O error occurs
+     */
+    private void readObject(java.io.ObjectInputStream s)
+        throws java.io.IOException, ClassNotFoundException {
+
+        // Read in items array and various fields
+        s.defaultReadObject();
+
+        if (!invariantsSatisfied())
+            throw new java.io.InvalidObjectException("invariants violated");
+    }
 }
diff --git a/ojluni/src/main/java/java/util/concurrent/BlockingDeque.java b/ojluni/src/main/java/java/util/concurrent/BlockingDeque.java
index 290fee1..8043510 100644
--- a/ojluni/src/main/java/java/util/concurrent/BlockingDeque.java
+++ b/ojluni/src/main/java/java/util/concurrent/BlockingDeque.java
@@ -39,10 +39,6 @@
 import java.util.Iterator;
 import java.util.NoSuchElementException;
 
-// BEGIN android-note
-// fixed framework docs link to "Collection#optional"
-// END android-note
-
 /**
  * A {@link Deque} that additionally supports blocking operations that wait
  * for the deque to become non-empty when retrieving an element, and wait for
@@ -57,69 +53,69 @@
  * and the fourth blocks for only a given maximum time limit before giving
  * up.  These methods are summarized in the following table:
  *
- * <table BORDER CELLPADDING=3 CELLSPACING=1>
+ * <table class="plain">
  * <caption>Summary of BlockingDeque methods</caption>
  *  <tr>
- *    <td ALIGN=CENTER COLSPAN = 5> <b>First Element (Head)</b></td>
+ *    <th id="First" colspan="5"> First Element (Head)</th>
  *  </tr>
  *  <tr>
  *    <td></td>
- *    <td ALIGN=CENTER><em>Throws exception</em></td>
- *    <td ALIGN=CENTER><em>Special value</em></td>
- *    <td ALIGN=CENTER><em>Blocks</em></td>
- *    <td ALIGN=CENTER><em>Times out</em></td>
+ *    <th id="FThrow" style="font-weight:normal; font-style: italic">Throws exception</th>
+ *    <th id="FValue" style="font-weight:normal; font-style: italic">Special value</th>
+ *    <th id="FBlock" style="font-weight:normal; font-style: italic">Blocks</th>
+ *    <th id="FTimes" style="font-weight:normal; font-style: italic">Times out</th>
  *  </tr>
  *  <tr>
- *    <td><b>Insert</b></td>
- *    <td>{@link #addFirst addFirst(e)}</td>
- *    <td>{@link #offerFirst(Object) offerFirst(e)}</td>
- *    <td>{@link #putFirst putFirst(e)}</td>
- *    <td>{@link #offerFirst(Object, long, TimeUnit) offerFirst(e, time, unit)}</td>
+ *    <th id="FInsert" style="text-align:left">Insert</th>
+ *    <td headers="First FInsert FThrow">{@link #addFirst(Object) addFirst(e)}</td>
+ *    <td headers="First FInsert FValue">{@link #offerFirst(Object) offerFirst(e)}</td>
+ *    <td headers="First FInsert FBlock">{@link #putFirst(Object) putFirst(e)}</td>
+ *    <td headers="First FInsert FTimes">{@link #offerFirst(Object, long, TimeUnit) offerFirst(e, time, unit)}</td>
  *  </tr>
  *  <tr>
- *    <td><b>Remove</b></td>
- *    <td>{@link #removeFirst removeFirst()}</td>
- *    <td>{@link #pollFirst pollFirst()}</td>
- *    <td>{@link #takeFirst takeFirst()}</td>
- *    <td>{@link #pollFirst(long, TimeUnit) pollFirst(time, unit)}</td>
+ *    <th id="FRemove" style="text-align:left">Remove</th>
+ *    <td headers="First FRemove FThrow">{@link #removeFirst() removeFirst()}</td>
+ *    <td headers="First FRemove FValue">{@link #pollFirst() pollFirst()}</td>
+ *    <td headers="First FRemove FBlock">{@link #takeFirst() takeFirst()}</td>
+ *    <td headers="First FRemove FTimes">{@link #pollFirst(long, TimeUnit) pollFirst(time, unit)}</td>
  *  </tr>
  *  <tr>
- *    <td><b>Examine</b></td>
- *    <td>{@link #getFirst getFirst()}</td>
- *    <td>{@link #peekFirst peekFirst()}</td>
- *    <td><em>not applicable</em></td>
- *    <td><em>not applicable</em></td>
+ *    <th id="FExamine" style="text-align:left">Examine</th>
+ *    <td headers="First FExamine FThrow">{@link #getFirst() getFirst()}</td>
+ *    <td headers="First FExamine FValue">{@link #peekFirst() peekFirst()}</td>
+ *    <td headers="First FExamine FBlock" style="font-style:italic">not applicable</td>
+ *    <td headers="First FExamine FTimes" style="font-style:italic">not applicable</td>
  *  </tr>
  *  <tr>
- *    <td ALIGN=CENTER COLSPAN = 5> <b>Last Element (Tail)</b></td>
+ *    <th id="Last" colspan="5"> Last Element (Tail)</th>
  *  </tr>
  *  <tr>
  *    <td></td>
- *    <td ALIGN=CENTER><em>Throws exception</em></td>
- *    <td ALIGN=CENTER><em>Special value</em></td>
- *    <td ALIGN=CENTER><em>Blocks</em></td>
- *    <td ALIGN=CENTER><em>Times out</em></td>
+ *    <th id="LThrow" style="font-weight:normal; font-style: italic">Throws exception</th>
+ *    <th id="LValue" style="font-weight:normal; font-style: italic">Special value</th>
+ *    <th id="LBlock" style="font-weight:normal; font-style: italic">Blocks</th>
+ *    <th id="LTimes" style="font-weight:normal; font-style: italic">Times out</th>
  *  </tr>
  *  <tr>
- *    <td><b>Insert</b></td>
- *    <td>{@link #addLast addLast(e)}</td>
- *    <td>{@link #offerLast(Object) offerLast(e)}</td>
- *    <td>{@link #putLast putLast(e)}</td>
- *    <td>{@link #offerLast(Object, long, TimeUnit) offerLast(e, time, unit)}</td>
+ *    <th id="LInsert" style="text-align:left">Insert</th>
+ *    <td headers="Last LInsert LThrow">{@link #addLast(Object) addLast(e)}</td>
+ *    <td headers="Last LInsert LValue">{@link #offerLast(Object) offerLast(e)}</td>
+ *    <td headers="Last LInsert LBlock">{@link #putLast(Object) putLast(e)}</td>
+ *    <td headers="Last LInsert LTimes">{@link #offerLast(Object, long, TimeUnit) offerLast(e, time, unit)}</td>
  *  </tr>
  *  <tr>
- *    <td><b>Remove</b></td>
- *    <td>{@link #removeLast() removeLast()}</td>
- *    <td>{@link #pollLast() pollLast()}</td>
- *    <td>{@link #takeLast takeLast()}</td>
- *    <td>{@link #pollLast(long, TimeUnit) pollLast(time, unit)}</td>
+ *    <th id="LRemove" style="text-align:left">Remove</th>
+ *    <td headers="Last LRemove LThrow">{@link #removeLast() removeLast()}</td>
+ *    <td headers="Last LRemove LValue">{@link #pollLast() pollLast()}</td>
+ *    <td headers="Last LRemove LBlock">{@link #takeLast() takeLast()}</td>
+ *    <td headers="Last LRemove LTimes">{@link #pollLast(long, TimeUnit) pollLast(time, unit)}</td>
  *  </tr>
  *  <tr>
- *    <td><b>Examine</b></td>
- *    <td>{@link #getLast getLast()}</td>
- *    <td>{@link #peekLast peekLast()}</td>
- *    <td><em>not applicable</em></td>
- *    <td><em>not applicable</em></td>
+ *    <th id="LExamine" style="text-align:left">Examine</th>
+ *    <td headers="Last LExamine LThrow">{@link #getLast() getLast()}</td>
+ *    <td headers="Last LExamine LValue">{@link #peekLast() peekLast()}</td>
+ *    <td headers="Last LExamine LBlock" style="font-style:italic">not applicable</td>
+ *    <td headers="Last LExamine LTimes" style="font-style:italic">not applicable</td>
  *  </tr>
  * </table>
  *
@@ -132,60 +128,55 @@
  * {@code BlockingQueue} interface are precisely equivalent to
  * {@code BlockingDeque} methods as indicated in the following table:
  *
- * <table BORDER CELLPADDING=3 CELLSPACING=1>
+ * <table class="plain">
  * <caption>Comparison of BlockingQueue and BlockingDeque methods</caption>
  *  <tr>
- *    <td ALIGN=CENTER> <b>{@code BlockingQueue} Method</b></td>
- *    <td ALIGN=CENTER> <b>Equivalent {@code BlockingDeque} Method</b></td>
+ *    <td></td>
+ *    <th id="BQueue"> {@code BlockingQueue} Method</th>
+ *    <th id="BDeque"> Equivalent {@code BlockingDeque} Method</th>
  *  </tr>
  *  <tr>
- *    <td ALIGN=CENTER COLSPAN = 2> <b>Insert</b></td>
+ *    <th id="Insert" rowspan="4" style="text-align:left; vertical-align:top">Insert</th>
+ *    <th id="add" style="font-weight:normal; text-align:left">{@link #add(Object) add(e)}</th>
+ *    <td headers="Insert BDeque add">{@link #addLast(Object) addLast(e)}</td>
  *  </tr>
  *  <tr>
- *    <td>{@link #add(Object) add(e)}</td>
- *    <td>{@link #addLast(Object) addLast(e)}</td>
+ *    <th id="offer1" style="font-weight:normal; text-align:left">{@link #offer(Object) offer(e)}</th>
+ *    <td headers="Insert BDeque offer1">{@link #offerLast(Object) offerLast(e)}</td>
  *  </tr>
  *  <tr>
- *    <td>{@link #offer(Object) offer(e)}</td>
- *    <td>{@link #offerLast(Object) offerLast(e)}</td>
+ *    <th id="put" style="font-weight:normal; text-align:left">{@link #put(Object) put(e)}</th>
+ *    <td headers="Insert BDeque put">{@link #putLast(Object) putLast(e)}</td>
  *  </tr>
  *  <tr>
- *    <td>{@link #put(Object) put(e)}</td>
- *    <td>{@link #putLast(Object) putLast(e)}</td>
+ *    <th id="offer2" style="font-weight:normal; text-align:left">{@link #offer(Object, long, TimeUnit) offer(e, time, unit)}</th>
+ *    <td headers="Insert BDeque offer2">{@link #offerLast(Object, long, TimeUnit) offerLast(e, time, unit)}</td>
  *  </tr>
  *  <tr>
- *    <td>{@link #offer(Object, long, TimeUnit) offer(e, time, unit)}</td>
- *    <td>{@link #offerLast(Object, long, TimeUnit) offerLast(e, time, unit)}</td>
+ *    <th id="Remove" rowspan="4" style="text-align:left; vertical-align:top">Remove</th>
+ *    <th id="remove" style="font-weight:normal; text-align:left">{@link #remove() remove()}</th>
+ *    <td headers="Remove BDeque remove">{@link #removeFirst() removeFirst()}</td>
  *  </tr>
  *  <tr>
- *    <td ALIGN=CENTER COLSPAN = 2> <b>Remove</b></td>
+ *    <th id="poll1" style="font-weight:normal; text-align:left">{@link #poll() poll()}</th>
+ *    <td headers="Remove BDeque poll1">{@link #pollFirst() pollFirst()}</td>
  *  </tr>
  *  <tr>
- *    <td>{@link #remove() remove()}</td>
- *    <td>{@link #removeFirst() removeFirst()}</td>
+ *    <th id="take" style="font-weight:normal; text-align:left">{@link #take() take()}</th>
+ *    <td headers="Remove BDeque take">{@link #takeFirst() takeFirst()}</td>
  *  </tr>
  *  <tr>
- *    <td>{@link #poll() poll()}</td>
- *    <td>{@link #pollFirst() pollFirst()}</td>
+ *    <th id="poll2" style="font-weight:normal; text-align:left">{@link #poll(long, TimeUnit) poll(time, unit)}</th>
+ *    <td headers="Remove BDeque poll2">{@link #pollFirst(long, TimeUnit) pollFirst(time, unit)}</td>
  *  </tr>
  *  <tr>
- *    <td>{@link #take() take()}</td>
- *    <td>{@link #takeFirst() takeFirst()}</td>
+ *    <th id="Examine" rowspan="2" style="text-align:left; vertical-align:top">Examine</th>
+ *    <th id="element" style="font-weight:normal; text-align:left">{@link #element() element()}</th>
+ *    <td headers="Examine BDeque element">{@link #getFirst() getFirst()}</td>
  *  </tr>
  *  <tr>
- *    <td>{@link #poll(long, TimeUnit) poll(time, unit)}</td>
- *    <td>{@link #pollFirst(long, TimeUnit) pollFirst(time, unit)}</td>
- *  </tr>
- *  <tr>
- *    <td ALIGN=CENTER COLSPAN = 2> <b>Examine</b></td>
- *  </tr>
- *  <tr>
- *    <td>{@link #element() element()}</td>
- *    <td>{@link #getFirst() getFirst()}</td>
- *  </tr>
- *  <tr>
- *    <td>{@link #peek() peek()}</td>
- *    <td>{@link #peekFirst() peekFirst()}</td>
+ *    <th id="peek" style="font-weight:normal; text-align:left">{@link #peek() peek()}</th>
+ *    <td headers="Examine BDeque peek">{@link #peekFirst() peekFirst()}</td>
  *  </tr>
  * </table>
  *
@@ -197,7 +188,7 @@
  * the {@code BlockingDeque} in another thread.
  *
  * <p>This interface is a member of the
- * <a href="{@docRoot}/../technotes/guides/collections/index.html">
+ * <a href="{@docRoot}/java.base/java/util/package-summary.html#CollectionsFramework">
  * Java Collections Framework</a>.
  *
  * @since 1.6
@@ -408,9 +399,9 @@
      * @return {@code true} if an element was removed as a result of this call
      * @throws ClassCastException if the class of the specified element
      *         is incompatible with this deque
-     * (<a href="../Collection.html#optional-restrictions">optional</a>)
+     * (<a href="{@docRoot}/java.base/java/util/Collection.html#optional-restrictions">optional</a>)
      * @throws NullPointerException if the specified element is null
-     * (<a href="../Collection.html#optional-restrictions">optional</a>)
+     * (<a href="{@docRoot}/java.base/java/util/Collection.html#optional-restrictions">optional</a>)
      */
     boolean removeFirstOccurrence(Object o);
 
@@ -426,9 +417,9 @@
      * @return {@code true} if an element was removed as a result of this call
      * @throws ClassCastException if the class of the specified element
      *         is incompatible with this deque
-     * (<a href="../Collection.html#optional-restrictions">optional</a>)
+     * (<a href="{@docRoot}/java.base/java/util/Collection.html#optional-restrictions">optional</a>)
      * @throws NullPointerException if the specified element is null
-     * (<a href="../Collection.html#optional-restrictions">optional</a>)
+     * (<a href="{@docRoot}/java.base/java/util/Collection.html#optional-restrictions">optional</a>)
      */
     boolean removeLastOccurrence(Object o);
 
@@ -516,7 +507,7 @@
     /**
      * Retrieves and removes the head of the queue represented by this deque
      * (in other words, the first element of this deque).
-     * This method differs from {@link #poll poll} only in that it
+     * This method differs from {@link #poll() poll()} only in that it
      * throws an exception if this deque is empty.
      *
      * <p>This method is equivalent to {@link #removeFirst() removeFirst}.
@@ -567,7 +558,7 @@
     /**
      * Retrieves, but does not remove, the head of the queue represented by
      * this deque (in other words, the first element of this deque).
-     * This method differs from {@link #peek peek} only in that it throws an
+     * This method differs from {@link #peek() peek} only in that it throws an
      * exception if this deque is empty.
      *
      * <p>This method is equivalent to {@link #getFirst() getFirst}.
@@ -603,9 +594,9 @@
      * @return {@code true} if this deque changed as a result of the call
      * @throws ClassCastException if the class of the specified element
      *         is incompatible with this deque
-     * (<a href="../Collection.html#optional-restrictions">optional</a>)
+     * (<a href="{@docRoot}/java.base/java/util/Collection.html#optional-restrictions">optional</a>)
      * @throws NullPointerException if the specified element is null
-     * (<a href="../Collection.html#optional-restrictions">optional</a>)
+     * (<a href="{@docRoot}/java.base/java/util/Collection.html#optional-restrictions">optional</a>)
      */
     boolean remove(Object o);
 
@@ -618,9 +609,9 @@
      * @return {@code true} if this deque contains the specified element
      * @throws ClassCastException if the class of the specified element
      *         is incompatible with this deque
-     * (<a href="../Collection.html#optional-restrictions">optional</a>)
+     * (<a href="{@docRoot}/java.base/java/util/Collection.html#optional-restrictions">optional</a>)
      * @throws NullPointerException if the specified element is null
-     * (<a href="../Collection.html#optional-restrictions">optional</a>)
+     * (<a href="{@docRoot}/java.base/java/util/Collection.html#optional-restrictions">optional</a>)
      */
     boolean contains(Object o);
 
diff --git a/ojluni/src/main/java/java/util/concurrent/BlockingQueue.java b/ojluni/src/main/java/java/util/concurrent/BlockingQueue.java
index 6f01b77..6fecc27 100644
--- a/ojluni/src/main/java/java/util/concurrent/BlockingQueue.java
+++ b/ojluni/src/main/java/java/util/concurrent/BlockingQueue.java
@@ -38,16 +38,10 @@
 import java.util.Collection;
 import java.util.Queue;
 
-// BEGIN android-note
-// removed link to collections framework docs from header
-// fixed framework docs link to "Collection#optional"
-// END android-note
-
 /**
- * A {@link java.util.Queue} that additionally supports operations
- * that wait for the queue to become non-empty when retrieving an
- * element, and wait for space to become available in the queue when
- * storing an element.
+ * A {@link Queue} that additionally supports operations that wait for
+ * the queue to become non-empty when retrieving an element, and wait
+ * for space to become available in the queue when storing an element.
  *
  * <p>{@code BlockingQueue} methods come in four forms, with different ways
  * of handling operations that cannot be satisfied immediately, but may be
@@ -58,35 +52,35 @@
  * and the fourth blocks for only a given maximum time limit before giving
  * up.  These methods are summarized in the following table:
  *
- * <table BORDER CELLPADDING=3 CELLSPACING=1>
+ * <table class="plain">
  * <caption>Summary of BlockingQueue methods</caption>
  *  <tr>
  *    <td></td>
- *    <td ALIGN=CENTER><em>Throws exception</em></td>
- *    <td ALIGN=CENTER><em>Special value</em></td>
- *    <td ALIGN=CENTER><em>Blocks</em></td>
- *    <td ALIGN=CENTER><em>Times out</em></td>
+ *    <th scope="col" style="font-weight:normal; font-style:italic">Throws exception</th>
+ *    <th scope="col" style="font-weight:normal; font-style:italic">Special value</th>
+ *    <th scope="col" style="font-weight:normal; font-style:italic">Blocks</th>
+ *    <th scope="col" style="font-weight:normal; font-style:italic">Times out</th>
  *  </tr>
  *  <tr>
- *    <td><b>Insert</b></td>
- *    <td>{@link #add add(e)}</td>
- *    <td>{@link #offer offer(e)}</td>
- *    <td>{@link #put put(e)}</td>
+ *    <th scope="row" style="text-align:left">Insert</th>
+ *    <td>{@link #add(Object) add(e)}</td>
+ *    <td>{@link #offer(Object) offer(e)}</td>
+ *    <td>{@link #put(Object) put(e)}</td>
  *    <td>{@link #offer(Object, long, TimeUnit) offer(e, time, unit)}</td>
  *  </tr>
  *  <tr>
- *    <td><b>Remove</b></td>
- *    <td>{@link #remove remove()}</td>
- *    <td>{@link #poll poll()}</td>
- *    <td>{@link #take take()}</td>
+ *    <th scope="row" style="text-align:left">Remove</th>
+ *    <td>{@link #remove() remove()}</td>
+ *    <td>{@link #poll() poll()}</td>
+ *    <td>{@link #take() take()}</td>
  *    <td>{@link #poll(long, TimeUnit) poll(time, unit)}</td>
  *  </tr>
  *  <tr>
- *    <td><b>Examine</b></td>
- *    <td>{@link #element element()}</td>
- *    <td>{@link #peek peek()}</td>
- *    <td><em>not applicable</em></td>
- *    <td><em>not applicable</em></td>
+ *    <th scope="row" style="text-align:left">Examine</th>
+ *    <td>{@link #element() element()}</td>
+ *    <td>{@link #peek() peek()}</td>
+ *    <td style="font-style: italic">not applicable</td>
+ *    <td style="font-style: italic">not applicable</td>
  *  </tr>
  * </table>
  *
@@ -104,7 +98,7 @@
  *
  * <p>{@code BlockingQueue} implementations are designed to be used
  * primarily for producer-consumer queues, but additionally support
- * the {@link java.util.Collection} interface.  So, for example, it is
+ * the {@link Collection} interface.  So, for example, it is
  * possible to remove an arbitrary element from a queue using
  * {@code remove(x)}. However, such operations are in general
  * <em>not</em> performed very efficiently, and are intended for only
@@ -174,7 +168,10 @@
  * actions subsequent to the access or removal of that element from
  * the {@code BlockingQueue} in another thread.
  *
- * @since 1.5
+ * <p>This interface is a member of the
+ * <a href="{@docRoot}/java.base/java/util/package-summary.html#CollectionsFramework">
+ * Java Collections Framework</a>.
+ *
  * @author Doug Lea
  * @param <E> the type of elements held in this queue
  */
@@ -304,9 +301,9 @@
      * @return {@code true} if this queue changed as a result of the call
      * @throws ClassCastException if the class of the specified element
      *         is incompatible with this queue
-     * (<a href="../Collection.html#optional-restrictions">optional</a>)
+     * (<a href="{@docRoot}/java.base/java/util/Collection.html#optional-restrictions">optional</a>)
      * @throws NullPointerException if the specified element is null
-     * (<a href="../Collection.html#optional-restrictions">optional</a>)
+     * (<a href="{@docRoot}/java.base/java/util/Collection.html#optional-restrictions">optional</a>)
      */
     boolean remove(Object o);
 
@@ -319,9 +316,9 @@
      * @return {@code true} if this queue contains the specified element
      * @throws ClassCastException if the class of the specified element
      *         is incompatible with this queue
-     * (<a href="../Collection.html#optional-restrictions">optional</a>)
+     * (<a href="{@docRoot}/java.base/java/util/Collection.html#optional-restrictions">optional</a>)
      * @throws NullPointerException if the specified element is null
-     * (<a href="../Collection.html#optional-restrictions">optional</a>)
+     * (<a href="{@docRoot}/java.base/java/util/Collection.html#optional-restrictions">optional</a>)
      */
     boolean contains(Object o);
 
diff --git a/ojluni/src/main/java/java/util/concurrent/CompletableFuture.java b/ojluni/src/main/java/java/util/concurrent/CompletableFuture.java
index e98e1be..936ffc3 100644
--- a/ojluni/src/main/java/java/util/concurrent/CompletableFuture.java
+++ b/ojluni/src/main/java/java/util/concurrent/CompletableFuture.java
@@ -35,6 +35,8 @@
 
 package java.util.concurrent;
 
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.VarHandle;
 import java.util.concurrent.locks.LockSupport;
 import java.util.function.BiConsumer;
 import java.util.function.BiFunction;
@@ -110,9 +112,9 @@
  * in a {@link NullPointerException} being thrown.
  *
  * @author Doug Lea
- * @since 1.8
  * @param <T> The result type returned by this future's {@code join}
  * and {@code get} methods
+ * @since 1.8
  */
 public class CompletableFuture<T> implements Future<T>, CompletionStage<T> {
 
@@ -125,26 +127,29 @@
      * applies across normal vs exceptional outcomes, sync vs async
      * actions, binary triggers, and various forms of completions.
      *
-     * Non-nullness of field result (set via CAS) indicates done.  An
-     * AltResult is used to box null as a result, as well as to hold
-     * exceptions.  Using a single field makes completion simple to
-     * detect and trigger.  Encoding and decoding is straightforward
-     * but adds to the sprawl of trapping and associating exceptions
-     * with targets.  Minor simplifications rely on (static) NIL (to
-     * box null results) being the only AltResult with a null
-     * exception field, so we don't usually need explicit comparisons.
-     * Even though some of the generics casts are unchecked (see
-     * SuppressWarnings annotations), they are placed to be
-     * appropriate even if checked.
+     * Non-nullness of volatile field "result" indicates done.  It may
+     * be set directly if known to be thread-confined, else via CAS.
+     * An AltResult is used to box null as a result, as well as to
+     * hold exceptions.  Using a single field makes completion simple
+     * to detect and trigger.  Result encoding and decoding is
+     * straightforward but tedious and adds to the sprawl of trapping
+     * and associating exceptions with targets.  Minor simplifications
+     * rely on (static) NIL (to box null results) being the only
+     * AltResult with a null exception field, so we don't usually need
+     * explicit comparisons.  Even though some of the generics casts
+     * are unchecked (see SuppressWarnings annotations), they are
+     * placed to be appropriate even if checked.
      *
      * Dependent actions are represented by Completion objects linked
      * as Treiber stacks headed by field "stack". There are Completion
-     * classes for each kind of action, grouped into single-input
-     * (UniCompletion), two-input (BiCompletion), projected
-     * (BiCompletions using either (not both) of two inputs), shared
-     * (CoCompletion, used by the second of two sources), zero-input
-     * source actions, and Signallers that unblock waiters. Class
-     * Completion extends ForkJoinTask to enable async execution
+     * classes for each kind of action, grouped into:
+     * - single-input (UniCompletion),
+     * - two-input (BiCompletion),
+     * - projected (BiCompletions using exactly one of two inputs),
+     * - shared (CoCompletion, used by the second of two sources),
+     * - zero-input source actions,
+     * - Signallers that unblock waiters.
+     * Class Completion extends ForkJoinTask to enable async execution
      * (adding no space overhead because we exploit its "tag" methods
      * to maintain claims). It is also declared as Runnable to allow
      * usage with arbitrary executors.
@@ -160,7 +165,7 @@
      *   encounter layers of adapters in common usages.
      *
      * * Boolean CompletableFuture method x(...) (for example
-     *   uniApply) takes all of the arguments needed to check that an
+     *   biApply) takes all of the arguments needed to check that an
      *   action is triggerable, and then either runs the action or
      *   arranges its async execution by executing its Completion
      *   argument, if present. The method returns true if known to be
@@ -170,24 +175,32 @@
      *   method with its held arguments, and on success cleans up.
      *   The mode argument allows tryFire to be called twice (SYNC,
      *   then ASYNC); the first to screen and trap exceptions while
-     *   arranging to execute, and the second when called from a
-     *   task. (A few classes are not used async so take slightly
-     *   different forms.)  The claim() callback suppresses function
-     *   invocation if already claimed by another thread.
+     *   arranging to execute, and the second when called from a task.
+     *   (A few classes are not used async so take slightly different
+     *   forms.)  The claim() callback suppresses function invocation
+     *   if already claimed by another thread.
+     *
+     * * Some classes (for example UniApply) have separate handling
+     *   code for when known to be thread-confined ("now" methods) and
+     *   for when shared (in tryFire), for efficiency.
      *
      * * CompletableFuture method xStage(...) is called from a public
-     *   stage method of CompletableFuture x. It screens user
+     *   stage method of CompletableFuture f. It screens user
      *   arguments and invokes and/or creates the stage object.  If
-     *   not async and x is already complete, the action is run
-     *   immediately.  Otherwise a Completion c is created, pushed to
-     *   x's stack (unless done), and started or triggered via
-     *   c.tryFire.  This also covers races possible if x completes
-     *   while pushing.  Classes with two inputs (for example BiApply)
-     *   deal with races across both while pushing actions.  The
-     *   second completion is a CoCompletion pointing to the first,
-     *   shared so that at most one performs the action.  The
-     *   multiple-arity methods allOf and anyOf do this pairwise to
-     *   form trees of completions.
+     *   not async and already triggerable, the action is run
+     *   immediately.  Otherwise a Completion c is created, and
+     *   submitted to the executor if triggerable, or pushed onto f's
+     *   stack if not.  Completion actions are started via c.tryFire.
+     *   We recheck after pushing to a source future's stack to cover
+     *   possible races if the source completes while pushing.
+     *   Classes with two inputs (for example BiApply) deal with races
+     *   across both while pushing actions.  The second completion is
+     *   a CoCompletion pointing to the first, shared so that at most
+     *   one performs the action.  The multiple-arity methods allOf
+     *   does this pairwise to form trees of completions.  Method
+     *   anyOf is handled differently from allOf because completion of
+     *   any source should trigger a cleanStack of other sources.
+     *   Each AnyOf completion can reach others via a shared array.
      *
      * Note that the generic type parameters of methods vary according
      * to whether "this" is a source, dependent, or completion.
@@ -212,29 +225,30 @@
      * pointing back to its sources. So we null out fields as soon as
      * possible.  The screening checks needed anyway harmlessly ignore
      * null arguments that may have been obtained during races with
-     * threads nulling out fields.  We also try to unlink fired
-     * Completions from stacks that might never be popped (see method
-     * postFire).  Completion fields need not be declared as final or
-     * volatile because they are only visible to other threads upon
-     * safe publication.
+     * threads nulling out fields.  We also try to unlink non-isLive
+     * (fired or cancelled) Completions from stacks that might
+     * otherwise never be popped: Method cleanStack always unlinks non
+     * isLive completions from the head of stack; others may
+     * occasionally remain if racing with other cancellations or
+     * removals.
+     *
+     * Completion fields need not be declared as final or volatile
+     * because they are only visible to other threads upon safe
+     * publication.
      */
 
     volatile Object result;       // Either the result or boxed AltResult
     volatile Completion stack;    // Top of Treiber stack of dependent actions
 
     final boolean internalComplete(Object r) { // CAS from null to r
-        return U.compareAndSwapObject(this, RESULT, null, r);
-    }
-
-    final boolean casStack(Completion cmp, Completion val) {
-        return U.compareAndSwapObject(this, STACK, cmp, val);
+        return RESULT.compareAndSet(this, null, r);
     }
 
     /** Returns true if successfully pushed c onto stack. */
     final boolean tryPushStack(Completion c) {
         Completion h = stack;
-        lazySetNext(c, h);
-        return U.compareAndSwapObject(this, STACK, h, c);
+        NEXT.set(c, h);         // CAS piggyback
+        return STACK.compareAndSet(this, h, c);
     }
 
     /** Unconditionally pushes c onto stack, retrying if necessary. */
@@ -254,8 +268,7 @@
 
     /** Completes with the null value, unless already completed. */
     final boolean completeNull() {
-        return U.compareAndSwapObject(this, RESULT, null,
-                                      NIL);
+        return RESULT.compareAndSet(this, null, NIL);
     }
 
     /** Returns the encoding of the given non-exceptional value. */
@@ -265,8 +278,7 @@
 
     /** Completes with a non-exceptional result, unless already completed. */
     final boolean completeValue(T t) {
-        return U.compareAndSwapObject(this, RESULT, null,
-                                      (t == null) ? NIL : t);
+        return RESULT.compareAndSet(this, null, (t == null) ? NIL : t);
     }
 
     /**
@@ -280,8 +292,7 @@
 
     /** Completes with an exceptional result, unless already completed. */
     final boolean completeThrowable(Throwable x) {
-        return U.compareAndSwapObject(this, RESULT, null,
-                                      encodeThrowable(x));
+        return RESULT.compareAndSet(this, null, encodeThrowable(x));
     }
 
     /**
@@ -308,8 +319,7 @@
      * existing CompletionException.
      */
     final boolean completeThrowable(Throwable x, Object r) {
-        return U.compareAndSwapObject(this, RESULT, null,
-                                      encodeThrowable(x, r));
+        return RESULT.compareAndSet(this, null, encodeThrowable(x, r));
     }
 
     /**
@@ -327,10 +337,11 @@
      */
     static Object encodeRelay(Object r) {
         Throwable x;
-        return (((r instanceof AltResult) &&
-                 (x = ((AltResult)r).ex) != null &&
-                 !(x instanceof CompletionException)) ?
-                new AltResult(new CompletionException(x)) : r);
+        if (r instanceof AltResult
+            && (x = ((AltResult)r).ex) != null
+            && !(x instanceof CompletionException))
+            r = new AltResult(new CompletionException(x));
+        return r;
     }
 
     /**
@@ -338,14 +349,13 @@
      * If exceptional, r is first coerced to a CompletionException.
      */
     final boolean completeRelay(Object r) {
-        return U.compareAndSwapObject(this, RESULT, null,
-                                      encodeRelay(r));
+        return RESULT.compareAndSet(this, null, encodeRelay(r));
     }
 
     /**
      * Reports result using Future.get conventions.
      */
-    private static <T> T reportGet(Object r)
+    private static Object reportGet(Object r)
         throws InterruptedException, ExecutionException {
         if (r == null) // by convention below, null means interrupted
             throw new InterruptedException();
@@ -360,14 +370,13 @@
                 x = cause;
             throw new ExecutionException(x);
         }
-        @SuppressWarnings("unchecked") T t = (T) r;
-        return t;
+        return r;
     }
 
     /**
      * Decodes outcome to return result or throw unchecked exception.
      */
-    private static <T> T reportJoin(Object r) {
+    private static Object reportJoin(Object r) {
         if (r instanceof AltResult) {
             Throwable x;
             if ((x = ((AltResult)r).ex) == null)
@@ -378,8 +387,7 @@
                 throw (CompletionException)x;
             throw new CompletionException(x);
         }
-        @SuppressWarnings("unchecked") T t = (T) r;
-        return t;
+        return r;
     }
 
     /* ------------- Async task preliminaries -------------- */
@@ -425,12 +433,6 @@
     static final int ASYNC  =  1;
     static final int NESTED = -1;
 
-    /**
-     * Spins before blocking in waitingGet
-     */
-    static final int SPINS = (Runtime.getRuntime().availableProcessors() > 1 ?
-                              1 << 8 : 0);
-
     /* ------------- Base Completion classes and operations -------------- */
 
     @SuppressWarnings("serial")
@@ -455,10 +457,6 @@
         public final void setRawResult(Void v) {}
     }
 
-    static void lazySetNext(Completion c, Completion next) {
-        U.putOrderedObject(c, NEXT, next);
-    }
-
     /**
      * Pops and tries to trigger all reachable dependents.  Call only
      * when known to be done.
@@ -473,40 +471,47 @@
         while ((h = f.stack) != null ||
                (f != this && (h = (f = this).stack) != null)) {
             CompletableFuture<?> d; Completion t;
-            if (f.casStack(h, t = h.next)) {
+            if (STACK.compareAndSet(f, h, t = h.next)) {
                 if (t != null) {
                     if (f != this) {
                         pushStack(h);
                         continue;
                     }
-                    h.next = null;    // detach
+                    NEXT.compareAndSet(h, t, null); // try to detach
                 }
                 f = (d = h.tryFire(NESTED)) == null ? this : d;
             }
         }
     }
 
-    /** Traverses stack and unlinks dead Completions. */
+    /** Traverses stack and unlinks one or more dead Completions, if found. */
     final void cleanStack() {
-        for (Completion p = null, q = stack; q != null;) {
+        Completion p = stack;
+        // ensure head of stack live
+        for (boolean unlinked = false;;) {
+            if (p == null)
+                return;
+            else if (p.isLive()) {
+                if (unlinked)
+                    return;
+                else
+                    break;
+            }
+            else if (STACK.weakCompareAndSet(this, p, (p = p.next)))
+                unlinked = true;
+            else
+                p = stack;
+        }
+        // try to unlink first non-live
+        for (Completion q = p.next; q != null;) {
             Completion s = q.next;
             if (q.isLive()) {
                 p = q;
                 q = s;
-            }
-            else if (p == null) {
-                casStack(q, s);
-                q = stack;
-            }
-            else {
-                p.next = s;
-                if (p.isLive())
-                    q = s;
-                else {
-                    p = null;  // restart
-                    q = stack;
-                }
-            }
+            } else if (NEXT.weakCompareAndSet(p, q, s))
+                break;
+            else
+                q = p.next;
         }
     }
 
@@ -544,24 +549,34 @@
         final boolean isLive() { return dep != null; }
     }
 
-    /** Pushes the given completion (if it exists) unless done. */
-    final void push(UniCompletion<?,?> c) {
+    /**
+     * Pushes the given completion unless it completes while trying.
+     * Caller should first check that result is null.
+     */
+    final void unipush(Completion c) {
         if (c != null) {
-            while (result == null && !tryPushStack(c))
-                lazySetNext(c, null); // clear on failure
+            while (!tryPushStack(c)) {
+                if (result != null) {
+                    NEXT.set(c, null);
+                    break;
+                }
+            }
+            if (result != null)
+                c.tryFire(SYNC);
         }
     }
 
     /**
-     * Post-processing by dependent after successful UniCompletion
-     * tryFire.  Tries to clean stack of source a, and then either runs
-     * postComplete or returns this to caller, depending on mode.
+     * Post-processing by dependent after successful UniCompletion tryFire.
+     * Tries to clean stack of source a, and then either runs postComplete
+     * or returns this to caller, depending on mode.
      */
     final CompletableFuture<T> postFire(CompletableFuture<?> a, int mode) {
         if (a != null && a.stack != null) {
-            if (mode < 0 || a.result == null)
+            Object r;
+            if ((r = a.result) == null)
                 a.cleanStack();
-            else
+            if (mode >= 0 && (r != null || a.result != null))
                 a.postComplete();
         }
         if (result != null && stack != null) {
@@ -583,48 +598,65 @@
         }
         final CompletableFuture<V> tryFire(int mode) {
             CompletableFuture<V> d; CompletableFuture<T> a;
-            if ((d = dep) == null ||
-                !d.uniApply(a = src, fn, mode > 0 ? null : this))
+            Object r; Throwable x; Function<? super T,? extends V> f;
+            if ((d = dep) == null || (f = fn) == null
+                || (a = src) == null || (r = a.result) == null)
                 return null;
+            tryComplete: if (d.result == null) {
+                if (r instanceof AltResult) {
+                    if ((x = ((AltResult)r).ex) != null) {
+                        d.completeThrowable(x, r);
+                        break tryComplete;
+                    }
+                    r = null;
+                }
+                try {
+                    if (mode <= 0 && !claim())
+                        return null;
+                    else {
+                        @SuppressWarnings("unchecked") T t = (T) r;
+                        d.completeValue(f.apply(t));
+                    }
+                } catch (Throwable ex) {
+                    d.completeThrowable(ex);
+                }
+            }
             dep = null; src = null; fn = null;
             return d.postFire(a, mode);
         }
     }
 
-    final <S> boolean uniApply(CompletableFuture<S> a,
-                               Function<? super S,? extends T> f,
-                               UniApply<S,T> c) {
-        Object r; Throwable x;
-        if (a == null || (r = a.result) == null || f == null)
-            return false;
-        tryComplete: if (result == null) {
-            if (r instanceof AltResult) {
-                if ((x = ((AltResult)r).ex) != null) {
-                    completeThrowable(x, r);
-                    break tryComplete;
-                }
-                r = null;
-            }
-            try {
-                if (c != null && !c.claim())
-                    return false;
-                @SuppressWarnings("unchecked") S s = (S) r;
-                completeValue(f.apply(s));
-            } catch (Throwable ex) {
-                completeThrowable(ex);
-            }
-        }
-        return true;
-    }
-
     private <V> CompletableFuture<V> uniApplyStage(
         Executor e, Function<? super T,? extends V> f) {
         if (f == null) throw new NullPointerException();
+        Object r;
+        if ((r = result) != null)
+            return uniApplyNow(r, e, f);
         CompletableFuture<V> d = newIncompleteFuture();
-        if (e != null || !d.uniApply(this, f, null)) {
-            UniApply<T,V> c = new UniApply<T,V>(e, d, this, f);
-            push(c);
-            c.tryFire(SYNC);
+        unipush(new UniApply<T,V>(e, d, this, f));
+        return d;
+    }
+
+    private <V> CompletableFuture<V> uniApplyNow(
+        Object r, Executor e, Function<? super T,? extends V> f) {
+        Throwable x;
+        CompletableFuture<V> d = newIncompleteFuture();
+        if (r instanceof AltResult) {
+            if ((x = ((AltResult)r).ex) != null) {
+                d.result = encodeThrowable(x, r);
+                return d;
+            }
+            r = null;
+        }
+        try {
+            if (e != null) {
+                e.execute(new UniApply<T,V>(null, d, this, f));
+            } else {
+                @SuppressWarnings("unchecked") T t = (T) r;
+                d.result = d.encodeValue(f.apply(t));
+            }
+        } catch (Throwable ex) {
+            d.result = encodeThrowable(ex);
         }
         return d;
     }
@@ -638,48 +670,67 @@
         }
         final CompletableFuture<Void> tryFire(int mode) {
             CompletableFuture<Void> d; CompletableFuture<T> a;
-            if ((d = dep) == null ||
-                !d.uniAccept(a = src, fn, mode > 0 ? null : this))
+            Object r; Throwable x; Consumer<? super T> f;
+            if ((d = dep) == null || (f = fn) == null
+                || (a = src) == null || (r = a.result) == null)
                 return null;
+            tryComplete: if (d.result == null) {
+                if (r instanceof AltResult) {
+                    if ((x = ((AltResult)r).ex) != null) {
+                        d.completeThrowable(x, r);
+                        break tryComplete;
+                    }
+                    r = null;
+                }
+                try {
+                    if (mode <= 0 && !claim())
+                        return null;
+                    else {
+                        @SuppressWarnings("unchecked") T t = (T) r;
+                        f.accept(t);
+                        d.completeNull();
+                    }
+                } catch (Throwable ex) {
+                    d.completeThrowable(ex);
+                }
+            }
             dep = null; src = null; fn = null;
             return d.postFire(a, mode);
         }
     }
 
-    final <S> boolean uniAccept(CompletableFuture<S> a,
-                                Consumer<? super S> f, UniAccept<S> c) {
-        Object r; Throwable x;
-        if (a == null || (r = a.result) == null || f == null)
-            return false;
-        tryComplete: if (result == null) {
-            if (r instanceof AltResult) {
-                if ((x = ((AltResult)r).ex) != null) {
-                    completeThrowable(x, r);
-                    break tryComplete;
-                }
-                r = null;
-            }
-            try {
-                if (c != null && !c.claim())
-                    return false;
-                @SuppressWarnings("unchecked") S s = (S) r;
-                f.accept(s);
-                completeNull();
-            } catch (Throwable ex) {
-                completeThrowable(ex);
-            }
-        }
-        return true;
-    }
-
     private CompletableFuture<Void> uniAcceptStage(Executor e,
                                                    Consumer<? super T> f) {
         if (f == null) throw new NullPointerException();
+        Object r;
+        if ((r = result) != null)
+            return uniAcceptNow(r, e, f);
         CompletableFuture<Void> d = newIncompleteFuture();
-        if (e != null || !d.uniAccept(this, f, null)) {
-            UniAccept<T> c = new UniAccept<T>(e, d, this, f);
-            push(c);
-            c.tryFire(SYNC);
+        unipush(new UniAccept<T>(e, d, this, f));
+        return d;
+    }
+
+    private CompletableFuture<Void> uniAcceptNow(
+        Object r, Executor e, Consumer<? super T> f) {
+        Throwable x;
+        CompletableFuture<Void> d = newIncompleteFuture();
+        if (r instanceof AltResult) {
+            if ((x = ((AltResult)r).ex) != null) {
+                d.result = encodeThrowable(x, r);
+                return d;
+            }
+            r = null;
+        }
+        try {
+            if (e != null) {
+                e.execute(new UniAccept<T>(null, d, this, f));
+            } else {
+                @SuppressWarnings("unchecked") T t = (T) r;
+                f.accept(t);
+                d.result = NIL;
+            }
+        } catch (Throwable ex) {
+            d.result = encodeThrowable(ex);
         }
         return d;
     }
@@ -693,42 +744,56 @@
         }
         final CompletableFuture<Void> tryFire(int mode) {
             CompletableFuture<Void> d; CompletableFuture<T> a;
-            if ((d = dep) == null ||
-                !d.uniRun(a = src, fn, mode > 0 ? null : this))
+            Object r; Throwable x; Runnable f;
+            if ((d = dep) == null || (f = fn) == null
+                || (a = src) == null || (r = a.result) == null)
                 return null;
+            if (d.result == null) {
+                if (r instanceof AltResult && (x = ((AltResult)r).ex) != null)
+                    d.completeThrowable(x, r);
+                else
+                    try {
+                        if (mode <= 0 && !claim())
+                            return null;
+                        else {
+                            f.run();
+                            d.completeNull();
+                        }
+                    } catch (Throwable ex) {
+                        d.completeThrowable(ex);
+                    }
+            }
             dep = null; src = null; fn = null;
             return d.postFire(a, mode);
         }
     }
 
-    final boolean uniRun(CompletableFuture<?> a, Runnable f, UniRun<?> c) {
-        Object r; Throwable x;
-        if (a == null || (r = a.result) == null || f == null)
-            return false;
-        if (result == null) {
-            if (r instanceof AltResult && (x = ((AltResult)r).ex) != null)
-                completeThrowable(x, r);
-            else
-                try {
-                    if (c != null && !c.claim())
-                        return false;
-                    f.run();
-                    completeNull();
-                } catch (Throwable ex) {
-                    completeThrowable(ex);
-                }
-        }
-        return true;
-    }
-
     private CompletableFuture<Void> uniRunStage(Executor e, Runnable f) {
         if (f == null) throw new NullPointerException();
+        Object r;
+        if ((r = result) != null)
+            return uniRunNow(r, e, f);
         CompletableFuture<Void> d = newIncompleteFuture();
-        if (e != null || !d.uniRun(this, f, null)) {
-            UniRun<T> c = new UniRun<T>(e, d, this, f);
-            push(c);
-            c.tryFire(SYNC);
-        }
+        unipush(new UniRun<T>(e, d, this, f));
+        return d;
+    }
+
+    private CompletableFuture<Void> uniRunNow(Object r, Executor e, Runnable f) {
+        Throwable x;
+        CompletableFuture<Void> d = newIncompleteFuture();
+        if (r instanceof AltResult && (x = ((AltResult)r).ex) != null)
+            d.result = encodeThrowable(x, r);
+        else
+            try {
+                if (e != null) {
+                    e.execute(new UniRun<T>(null, d, this, f));
+                } else {
+                    f.run();
+                    d.result = NIL;
+                }
+            } catch (Throwable ex) {
+                d.result = encodeThrowable(ex);
+            }
         return d;
     }
 
@@ -742,20 +807,20 @@
         }
         final CompletableFuture<T> tryFire(int mode) {
             CompletableFuture<T> d; CompletableFuture<T> a;
-            if ((d = dep) == null ||
-                !d.uniWhenComplete(a = src, fn, mode > 0 ? null : this))
+            Object r; BiConsumer<? super T, ? super Throwable> f;
+            if ((d = dep) == null || (f = fn) == null
+                || (a = src) == null || (r = a.result) == null
+                || !d.uniWhenComplete(r, f, mode > 0 ? null : this))
                 return null;
             dep = null; src = null; fn = null;
             return d.postFire(a, mode);
         }
     }
 
-    final boolean uniWhenComplete(CompletableFuture<T> a,
+    final boolean uniWhenComplete(Object r,
                                   BiConsumer<? super T,? super Throwable> f,
                                   UniWhenComplete<T> c) {
-        Object r; T t; Throwable x = null;
-        if (a == null || (r = a.result) == null || f == null)
-            return false;
+        T t; Throwable x = null;
         if (result == null) {
             try {
                 if (c != null && !c.claim())
@@ -787,10 +852,17 @@
         Executor e, BiConsumer<? super T, ? super Throwable> f) {
         if (f == null) throw new NullPointerException();
         CompletableFuture<T> d = newIncompleteFuture();
-        if (e != null || !d.uniWhenComplete(this, f, null)) {
-            UniWhenComplete<T> c = new UniWhenComplete<T>(e, d, this, f);
-            push(c);
-            c.tryFire(SYNC);
+        Object r;
+        if ((r = result) == null)
+            unipush(new UniWhenComplete<T>(e, d, this, f));
+        else if (e == null)
+            d.uniWhenComplete(r, f, null);
+        else {
+            try {
+                e.execute(new UniWhenComplete<T>(null, d, this, f));
+            } catch (Throwable ex) {
+                d.result = encodeThrowable(ex);
+            }
         }
         return d;
     }
@@ -805,20 +877,20 @@
         }
         final CompletableFuture<V> tryFire(int mode) {
             CompletableFuture<V> d; CompletableFuture<T> a;
-            if ((d = dep) == null ||
-                !d.uniHandle(a = src, fn, mode > 0 ? null : this))
+            Object r; BiFunction<? super T, Throwable, ? extends V> f;
+            if ((d = dep) == null || (f = fn) == null
+                || (a = src) == null || (r = a.result) == null
+                || !d.uniHandle(r, f, mode > 0 ? null : this))
                 return null;
             dep = null; src = null; fn = null;
             return d.postFire(a, mode);
         }
     }
 
-    final <S> boolean uniHandle(CompletableFuture<S> a,
+    final <S> boolean uniHandle(Object r,
                                 BiFunction<? super S, Throwable, ? extends T> f,
                                 UniHandle<S,T> c) {
-        Object r; S s; Throwable x;
-        if (a == null || (r = a.result) == null || f == null)
-            return false;
+        S s; Throwable x;
         if (result == null) {
             try {
                 if (c != null && !c.claim())
@@ -843,10 +915,17 @@
         Executor e, BiFunction<? super T, Throwable, ? extends V> f) {
         if (f == null) throw new NullPointerException();
         CompletableFuture<V> d = newIncompleteFuture();
-        if (e != null || !d.uniHandle(this, f, null)) {
-            UniHandle<T,V> c = new UniHandle<T,V>(e, d, this, f);
-            push(c);
-            c.tryFire(SYNC);
+        Object r;
+        if ((r = result) == null)
+            unipush(new UniHandle<T,V>(e, d, this, f));
+        else if (e == null)
+            d.uniHandle(r, f, null);
+        else {
+            try {
+                e.execute(new UniHandle<T,V>(null, d, this, f));
+            } catch (Throwable ex) {
+                d.result = encodeThrowable(ex);
+            }
         }
         return d;
     }
@@ -861,19 +940,20 @@
         final CompletableFuture<T> tryFire(int mode) { // never ASYNC
             // assert mode != ASYNC;
             CompletableFuture<T> d; CompletableFuture<T> a;
-            if ((d = dep) == null || !d.uniExceptionally(a = src, fn, this))
+            Object r; Function<? super Throwable, ? extends T> f;
+            if ((d = dep) == null || (f = fn) == null
+                || (a = src) == null || (r = a.result) == null
+                || !d.uniExceptionally(r, f, this))
                 return null;
             dep = null; src = null; fn = null;
             return d.postFire(a, mode);
         }
     }
 
-    final boolean uniExceptionally(CompletableFuture<T> a,
+    final boolean uniExceptionally(Object r,
                                    Function<? super Throwable, ? extends T> f,
                                    UniExceptionally<T> c) {
-        Object r; Throwable x;
-        if (a == null || (r = a.result) == null || f == null)
-            return false;
+        Throwable x;
         if (result == null) {
             try {
                 if (r instanceof AltResult && (x = ((AltResult)r).ex) != null) {
@@ -893,47 +973,39 @@
         Function<Throwable, ? extends T> f) {
         if (f == null) throw new NullPointerException();
         CompletableFuture<T> d = newIncompleteFuture();
-        if (!d.uniExceptionally(this, f, null)) {
-            UniExceptionally<T> c = new UniExceptionally<T>(d, this, f);
-            push(c);
-            c.tryFire(SYNC);
-        }
+        Object r;
+        if ((r = result) == null)
+            unipush(new UniExceptionally<T>(d, this, f));
+        else
+            d.uniExceptionally(r, f, null);
         return d;
     }
 
     @SuppressWarnings("serial")
-    static final class UniRelay<T> extends UniCompletion<T,T> { // for Compose
-        UniRelay(CompletableFuture<T> dep, CompletableFuture<T> src) {
+    static final class UniRelay<U, T extends U> extends UniCompletion<T,U> {
+        UniRelay(CompletableFuture<U> dep, CompletableFuture<T> src) {
             super(null, dep, src);
         }
-        final CompletableFuture<T> tryFire(int mode) {
-            CompletableFuture<T> d; CompletableFuture<T> a;
-            if ((d = dep) == null || !d.uniRelay(a = src))
+        final CompletableFuture<U> tryFire(int mode) {
+            CompletableFuture<U> d; CompletableFuture<T> a; Object r;
+            if ((d = dep) == null
+                || (a = src) == null || (r = a.result) == null)
                 return null;
+            if (d.result == null)
+                d.completeRelay(r);
             src = null; dep = null;
             return d.postFire(a, mode);
         }
     }
 
-    final boolean uniRelay(CompletableFuture<T> a) {
+    private static <U, T extends U> CompletableFuture<U> uniCopyStage(
+        CompletableFuture<T> src) {
         Object r;
-        if (a == null || (r = a.result) == null)
-            return false;
-        if (result == null) // no need to claim
-            completeRelay(r);
-        return true;
-    }
-
-    private CompletableFuture<T> uniCopyStage() {
-        Object r;
-        CompletableFuture<T> d = newIncompleteFuture();
-        if ((r = result) != null)
-            d.completeRelay(r);
-        else {
-            UniRelay<T> c = new UniRelay<T>(d, this);
-            push(c);
-            c.tryFire(SYNC);
-        }
+        CompletableFuture<U> d = src.newIncompleteFuture();
+        if ((r = src.result) != null)
+            d.result = encodeRelay(r);
+        else
+            src.unipush(new UniRelay<U,T>(d, src));
         return d;
     }
 
@@ -942,9 +1014,7 @@
         if ((r = result) != null)
             return new MinimalStage<T>(encodeRelay(r));
         MinimalStage<T> d = new MinimalStage<T>();
-        UniRelay<T> c = new UniRelay<T>(d, this);
-        push(c);
-        c.tryFire(SYNC);
+        unipush(new UniRelay<T,T>(d, this));
         return d;
     }
 
@@ -958,54 +1028,48 @@
         }
         final CompletableFuture<V> tryFire(int mode) {
             CompletableFuture<V> d; CompletableFuture<T> a;
-            if ((d = dep) == null ||
-                !d.uniCompose(a = src, fn, mode > 0 ? null : this))
+            Function<? super T, ? extends CompletionStage<V>> f;
+            Object r; Throwable x;
+            if ((d = dep) == null || (f = fn) == null
+                || (a = src) == null || (r = a.result) == null)
                 return null;
+            tryComplete: if (d.result == null) {
+                if (r instanceof AltResult) {
+                    if ((x = ((AltResult)r).ex) != null) {
+                        d.completeThrowable(x, r);
+                        break tryComplete;
+                    }
+                    r = null;
+                }
+                try {
+                    if (mode <= 0 && !claim())
+                        return null;
+                    @SuppressWarnings("unchecked") T t = (T) r;
+                    CompletableFuture<V> g = f.apply(t).toCompletableFuture();
+                    if ((r = g.result) != null)
+                        d.completeRelay(r);
+                    else {
+                        g.unipush(new UniRelay<V,V>(d, g));
+                        if (d.result == null)
+                            return null;
+                    }
+                } catch (Throwable ex) {
+                    d.completeThrowable(ex);
+                }
+            }
             dep = null; src = null; fn = null;
             return d.postFire(a, mode);
         }
     }
 
-    final <S> boolean uniCompose(
-        CompletableFuture<S> a,
-        Function<? super S, ? extends CompletionStage<T>> f,
-        UniCompose<S,T> c) {
-        Object r; Throwable x;
-        if (a == null || (r = a.result) == null || f == null)
-            return false;
-        tryComplete: if (result == null) {
-            if (r instanceof AltResult) {
-                if ((x = ((AltResult)r).ex) != null) {
-                    completeThrowable(x, r);
-                    break tryComplete;
-                }
-                r = null;
-            }
-            try {
-                if (c != null && !c.claim())
-                    return false;
-                @SuppressWarnings("unchecked") S s = (S) r;
-                CompletableFuture<T> g = f.apply(s).toCompletableFuture();
-                if (g.result == null || !uniRelay(g)) {
-                    UniRelay<T> copy = new UniRelay<T>(this, g);
-                    g.push(copy);
-                    copy.tryFire(SYNC);
-                    if (result == null)
-                        return false;
-                }
-            } catch (Throwable ex) {
-                completeThrowable(ex);
-            }
-        }
-        return true;
-    }
-
     private <V> CompletableFuture<V> uniComposeStage(
         Executor e, Function<? super T, ? extends CompletionStage<V>> f) {
         if (f == null) throw new NullPointerException();
-        Object r, s; Throwable x;
         CompletableFuture<V> d = newIncompleteFuture();
-        if (e == null && (r = result) != null) {
+        Object r, s; Throwable x;
+        if ((r = result) == null)
+            unipush(new UniCompose<T,V>(e, d, this, f));
+        else if (e == null) {
             if (r instanceof AltResult) {
                 if ((x = ((AltResult)r).ex) != null) {
                     d.result = encodeThrowable(x, r);
@@ -1017,21 +1081,20 @@
                 @SuppressWarnings("unchecked") T t = (T) r;
                 CompletableFuture<V> g = f.apply(t).toCompletableFuture();
                 if ((s = g.result) != null)
-                    d.completeRelay(s);
+                    d.result = encodeRelay(s);
                 else {
-                    UniRelay<V> c = new UniRelay<V>(d, g);
-                    g.push(c);
-                    c.tryFire(SYNC);
+                    g.unipush(new UniRelay<V,V>(d, g));
                 }
-                return d;
             } catch (Throwable ex) {
                 d.result = encodeThrowable(ex);
-                return d;
             }
         }
-        UniCompose<T,V> c = new UniCompose<T,V>(e, d, this, f);
-        push(c);
-        c.tryFire(SYNC);
+        else
+            try {
+                e.execute(new UniCompose<T,V>(null, d, this, f));
+            } catch (Throwable ex) {
+                d.result = encodeThrowable(ex);
+            }
         return d;
     }
 
@@ -1061,21 +1124,28 @@
         }
         final boolean isLive() {
             BiCompletion<?,?,?> c;
-            return (c = base) != null && c.dep != null;
+            return (c = base) != null
+                // && c.isLive()
+                && c.dep != null;
         }
     }
 
-    /** Pushes completion to this and b unless both done. */
+    /**
+     * Pushes completion to this and b unless both done.
+     * Caller should first check that either result or b.result is null.
+     */
     final void bipush(CompletableFuture<?> b, BiCompletion<?,?,?> c) {
         if (c != null) {
-            Object r;
-            while ((r = result) == null && !tryPushStack(c))
-                lazySetNext(c, null); // clear on failure
-            if (b != null && b != this && b.result == null) {
-                Completion q = (r != null) ? c : new CoCompletion(c);
-                while (b.result == null && !b.tryPushStack(q))
-                    lazySetNext(q, null); // clear on failure
+            while (result == null) {
+                if (tryPushStack(c)) {
+                    if (b.result == null)
+                        b.unipush(new CoCompletion(c));
+                    else if (result != null)
+                        c.tryFire(SYNC);
+                    return;
+                }
             }
+            b.unipush(c);
         }
     }
 
@@ -1083,9 +1153,10 @@
     final CompletableFuture<T> postFire(CompletableFuture<?> a,
                                         CompletableFuture<?> b, int mode) {
         if (b != null && b.stack != null) { // clean second source
-            if (mode < 0 || b.result == null)
+            Object r;
+            if ((r = b.result) == null)
                 b.cleanStack();
-            else
+            if (mode >= 0 && (r != null || b.result != null))
                 b.postComplete();
         }
         return postFire(a, mode);
@@ -1103,22 +1174,21 @@
             CompletableFuture<V> d;
             CompletableFuture<T> a;
             CompletableFuture<U> b;
-            if ((d = dep) == null ||
-                !d.biApply(a = src, b = snd, fn, mode > 0 ? null : this))
+            Object r, s; BiFunction<? super T,? super U,? extends V> f;
+            if ((d = dep) == null || (f = fn) == null
+                || (a = src) == null || (r = a.result) == null
+                || (b = snd) == null || (s = b.result) == null
+                || !d.biApply(r, s, f, mode > 0 ? null : this))
                 return null;
             dep = null; src = null; snd = null; fn = null;
             return d.postFire(a, b, mode);
         }
     }
 
-    final <R,S> boolean biApply(CompletableFuture<R> a,
-                                CompletableFuture<S> b,
+    final <R,S> boolean biApply(Object r, Object s,
                                 BiFunction<? super R,? super S,? extends T> f,
                                 BiApply<R,S,T> c) {
-        Object r, s; Throwable x;
-        if (a == null || (r = a.result) == null ||
-            b == null || (s = b.result) == null || f == null)
-            return false;
+        Throwable x;
         tryComplete: if (result == null) {
             if (r instanceof AltResult) {
                 if ((x = ((AltResult)r).ex) != null) {
@@ -1150,15 +1220,20 @@
     private <U,V> CompletableFuture<V> biApplyStage(
         Executor e, CompletionStage<U> o,
         BiFunction<? super T,? super U,? extends V> f) {
-        CompletableFuture<U> b;
+        CompletableFuture<U> b; Object r, s;
         if (f == null || (b = o.toCompletableFuture()) == null)
             throw new NullPointerException();
         CompletableFuture<V> d = newIncompleteFuture();
-        if (e != null || !d.biApply(this, b, f, null)) {
-            BiApply<T,U,V> c = new BiApply<T,U,V>(e, d, this, b, f);
-            bipush(b, c);
-            c.tryFire(SYNC);
-        }
+        if ((r = result) == null || (s = b.result) == null)
+            bipush(b, new BiApply<T,U,V>(e, d, this, b, f));
+        else if (e == null)
+            d.biApply(r, s, f, null);
+        else
+            try {
+                e.execute(new BiApply<T,U,V>(null, d, this, b, f));
+            } catch (Throwable ex) {
+                d.result = encodeThrowable(ex);
+            }
         return d;
     }
 
@@ -1174,22 +1249,21 @@
             CompletableFuture<Void> d;
             CompletableFuture<T> a;
             CompletableFuture<U> b;
-            if ((d = dep) == null ||
-                !d.biAccept(a = src, b = snd, fn, mode > 0 ? null : this))
+            Object r, s; BiConsumer<? super T,? super U> f;
+            if ((d = dep) == null || (f = fn) == null
+                || (a = src) == null || (r = a.result) == null
+                || (b = snd) == null || (s = b.result) == null
+                || !d.biAccept(r, s, f, mode > 0 ? null : this))
                 return null;
             dep = null; src = null; snd = null; fn = null;
             return d.postFire(a, b, mode);
         }
     }
 
-    final <R,S> boolean biAccept(CompletableFuture<R> a,
-                                 CompletableFuture<S> b,
+    final <R,S> boolean biAccept(Object r, Object s,
                                  BiConsumer<? super R,? super S> f,
                                  BiAccept<R,S> c) {
-        Object r, s; Throwable x;
-        if (a == null || (r = a.result) == null ||
-            b == null || (s = b.result) == null || f == null)
-            return false;
+        Throwable x;
         tryComplete: if (result == null) {
             if (r instanceof AltResult) {
                 if ((x = ((AltResult)r).ex) != null) {
@@ -1222,15 +1296,20 @@
     private <U> CompletableFuture<Void> biAcceptStage(
         Executor e, CompletionStage<U> o,
         BiConsumer<? super T,? super U> f) {
-        CompletableFuture<U> b;
+        CompletableFuture<U> b; Object r, s;
         if (f == null || (b = o.toCompletableFuture()) == null)
             throw new NullPointerException();
         CompletableFuture<Void> d = newIncompleteFuture();
-        if (e != null || !d.biAccept(this, b, f, null)) {
-            BiAccept<T,U> c = new BiAccept<T,U>(e, d, this, b, f);
-            bipush(b, c);
-            c.tryFire(SYNC);
-        }
+        if ((r = result) == null || (s = b.result) == null)
+            bipush(b, new BiAccept<T,U>(e, d, this, b, f));
+        else if (e == null)
+            d.biAccept(r, s, f, null);
+        else
+            try {
+                e.execute(new BiAccept<T,U>(null, d, this, b, f));
+            } catch (Throwable ex) {
+                d.result = encodeThrowable(ex);
+            }
         return d;
     }
 
@@ -1238,8 +1317,7 @@
     static final class BiRun<T,U> extends BiCompletion<T,U,Void> {
         Runnable fn;
         BiRun(Executor executor, CompletableFuture<Void> dep,
-              CompletableFuture<T> src,
-              CompletableFuture<U> snd,
+              CompletableFuture<T> src, CompletableFuture<U> snd,
               Runnable fn) {
             super(executor, dep, src, snd); this.fn = fn;
         }
@@ -1247,25 +1325,25 @@
             CompletableFuture<Void> d;
             CompletableFuture<T> a;
             CompletableFuture<U> b;
-            if ((d = dep) == null ||
-                !d.biRun(a = src, b = snd, fn, mode > 0 ? null : this))
+            Object r, s; Runnable f;
+            if ((d = dep) == null || (f = fn) == null
+                || (a = src) == null || (r = a.result) == null
+                || (b = snd) == null || (s = b.result) == null
+                || !d.biRun(r, s, f, mode > 0 ? null : this))
                 return null;
             dep = null; src = null; snd = null; fn = null;
             return d.postFire(a, b, mode);
         }
     }
 
-    final boolean biRun(CompletableFuture<?> a, CompletableFuture<?> b,
-                        Runnable f, BiRun<?,?> c) {
-        Object r, s; Throwable x;
-        if (a == null || (r = a.result) == null ||
-            b == null || (s = b.result) == null || f == null)
-            return false;
+    final boolean biRun(Object r, Object s, Runnable f, BiRun<?,?> c) {
+        Throwable x; Object z;
         if (result == null) {
-            if (r instanceof AltResult && (x = ((AltResult)r).ex) != null)
-                completeThrowable(x, r);
-            else if (s instanceof AltResult && (x = ((AltResult)s).ex) != null)
-                completeThrowable(x, s);
+            if ((r instanceof AltResult
+                 && (x = ((AltResult)(z = r)).ex) != null) ||
+                (s instanceof AltResult
+                 && (x = ((AltResult)(z = s)).ex) != null))
+                completeThrowable(x, z);
             else
                 try {
                     if (c != null && !c.claim())
@@ -1281,52 +1359,52 @@
 
     private CompletableFuture<Void> biRunStage(Executor e, CompletionStage<?> o,
                                                Runnable f) {
-        CompletableFuture<?> b;
+        CompletableFuture<?> b; Object r, s;
         if (f == null || (b = o.toCompletableFuture()) == null)
             throw new NullPointerException();
         CompletableFuture<Void> d = newIncompleteFuture();
-        if (e != null || !d.biRun(this, b, f, null)) {
-            BiRun<T,?> c = new BiRun<>(e, d, this, b, f);
-            bipush(b, c);
-            c.tryFire(SYNC);
-        }
+        if ((r = result) == null || (s = b.result) == null)
+            bipush(b, new BiRun<>(e, d, this, b, f));
+        else if (e == null)
+            d.biRun(r, s, f, null);
+        else
+            try {
+                e.execute(new BiRun<>(null, d, this, b, f));
+            } catch (Throwable ex) {
+                d.result = encodeThrowable(ex);
+            }
         return d;
     }
 
     @SuppressWarnings("serial")
     static final class BiRelay<T,U> extends BiCompletion<T,U,Void> { // for And
         BiRelay(CompletableFuture<Void> dep,
-                CompletableFuture<T> src,
-                CompletableFuture<U> snd) {
+                CompletableFuture<T> src, CompletableFuture<U> snd) {
             super(null, dep, src, snd);
         }
         final CompletableFuture<Void> tryFire(int mode) {
             CompletableFuture<Void> d;
             CompletableFuture<T> a;
             CompletableFuture<U> b;
-            if ((d = dep) == null || !d.biRelay(a = src, b = snd))
+            Object r, s, z; Throwable x;
+            if ((d = dep) == null
+                || (a = src) == null || (r = a.result) == null
+                || (b = snd) == null || (s = b.result) == null)
                 return null;
+            if (d.result == null) {
+                if ((r instanceof AltResult
+                     && (x = ((AltResult)(z = r)).ex) != null) ||
+                    (s instanceof AltResult
+                     && (x = ((AltResult)(z = s)).ex) != null))
+                    d.completeThrowable(x, z);
+                else
+                    d.completeNull();
+            }
             src = null; snd = null; dep = null;
             return d.postFire(a, b, mode);
         }
     }
 
-    boolean biRelay(CompletableFuture<?> a, CompletableFuture<?> b) {
-        Object r, s; Throwable x;
-        if (a == null || (r = a.result) == null ||
-            b == null || (s = b.result) == null)
-            return false;
-        if (result == null) {
-            if (r instanceof AltResult && (x = ((AltResult)r).ex) != null)
-                completeThrowable(x, r);
-            else if (s instanceof AltResult && (x = ((AltResult)s).ex) != null)
-                completeThrowable(x, s);
-            else
-                completeNull();
-        }
-        return true;
-    }
-
     /** Recursively constructs a tree of completions. */
     static CompletableFuture<Void> andTree(CompletableFuture<?>[] cfs,
                                            int lo, int hi) {
@@ -1334,39 +1412,44 @@
         if (lo > hi) // empty
             d.result = NIL;
         else {
-            CompletableFuture<?> a, b;
+            CompletableFuture<?> a, b; Object r, s, z; Throwable x;
             int mid = (lo + hi) >>> 1;
             if ((a = (lo == mid ? cfs[lo] :
                       andTree(cfs, lo, mid))) == null ||
                 (b = (lo == hi ? a : (hi == mid+1) ? cfs[hi] :
                       andTree(cfs, mid+1, hi))) == null)
                 throw new NullPointerException();
-            if (!d.biRelay(a, b)) {
-                BiRelay<?,?> c = new BiRelay<>(d, a, b);
-                a.bipush(b, c);
-                c.tryFire(SYNC);
-            }
+            if ((r = a.result) == null || (s = b.result) == null)
+                a.bipush(b, new BiRelay<>(d, a, b));
+            else if ((r instanceof AltResult
+                      && (x = ((AltResult)(z = r)).ex) != null) ||
+                     (s instanceof AltResult
+                      && (x = ((AltResult)(z = s)).ex) != null))
+                d.result = encodeThrowable(x, z);
+            else
+                d.result = NIL;
         }
         return d;
     }
 
     /* ------------- Projected (Ored) BiCompletions -------------- */
 
-    /** Pushes completion to this and b unless either done. */
+    /**
+     * Pushes completion to this and b unless either done.
+     * Caller should first check that result and b.result are both null.
+     */
     final void orpush(CompletableFuture<?> b, BiCompletion<?,?,?> c) {
         if (c != null) {
-            while ((b == null || b.result == null) && result == null) {
-                if (tryPushStack(c)) {
-                    if (b != null && b != this && b.result == null) {
-                        Completion q = new CoCompletion(c);
-                        while (result == null && b.result == null &&
-                               !b.tryPushStack(q))
-                            lazySetNext(q, null); // clear on failure
-                    }
+            while (!tryPushStack(c)) {
+                if (result != null) {
+                    NEXT.set(c, null);
                     break;
                 }
-                lazySetNext(c, null); // clear on failure
             }
+            if (result != null)
+                c.tryFire(SYNC);
+            else
+                b.unipush(new CoCompletion(c));
         }
     }
 
@@ -1374,8 +1457,7 @@
     static final class OrApply<T,U extends T,V> extends BiCompletion<T,U,V> {
         Function<? super T,? extends V> fn;
         OrApply(Executor executor, CompletableFuture<V> dep,
-                CompletableFuture<T> src,
-                CompletableFuture<U> snd,
+                CompletableFuture<T> src, CompletableFuture<U> snd,
                 Function<? super T,? extends V> fn) {
             super(executor, dep, src, snd); this.fn = fn;
         }
@@ -1383,54 +1465,46 @@
             CompletableFuture<V> d;
             CompletableFuture<T> a;
             CompletableFuture<U> b;
-            if ((d = dep) == null ||
-                !d.orApply(a = src, b = snd, fn, mode > 0 ? null : this))
+            Object r; Throwable x; Function<? super T,? extends V> f;
+            if ((d = dep) == null || (f = fn) == null
+                || (a = src) == null || (b = snd) == null
+                || ((r = a.result) == null && (r = b.result) == null))
                 return null;
+            tryComplete: if (d.result == null) {
+                try {
+                    if (mode <= 0 && !claim())
+                        return null;
+                    if (r instanceof AltResult) {
+                        if ((x = ((AltResult)r).ex) != null) {
+                            d.completeThrowable(x, r);
+                            break tryComplete;
+                        }
+                        r = null;
+                    }
+                    @SuppressWarnings("unchecked") T t = (T) r;
+                    d.completeValue(f.apply(t));
+                } catch (Throwable ex) {
+                    d.completeThrowable(ex);
+                }
+            }
             dep = null; src = null; snd = null; fn = null;
             return d.postFire(a, b, mode);
         }
     }
 
-    final <R,S extends R> boolean orApply(CompletableFuture<R> a,
-                                          CompletableFuture<S> b,
-                                          Function<? super R, ? extends T> f,
-                                          OrApply<R,S,T> c) {
-        Object r; Throwable x;
-        if (a == null || b == null ||
-            ((r = a.result) == null && (r = b.result) == null) || f == null)
-            return false;
-        tryComplete: if (result == null) {
-            try {
-                if (c != null && !c.claim())
-                    return false;
-                if (r instanceof AltResult) {
-                    if ((x = ((AltResult)r).ex) != null) {
-                        completeThrowable(x, r);
-                        break tryComplete;
-                    }
-                    r = null;
-                }
-                @SuppressWarnings("unchecked") R rr = (R) r;
-                completeValue(f.apply(rr));
-            } catch (Throwable ex) {
-                completeThrowable(ex);
-            }
-        }
-        return true;
-    }
-
     private <U extends T,V> CompletableFuture<V> orApplyStage(
-        Executor e, CompletionStage<U> o,
-        Function<? super T, ? extends V> f) {
+        Executor e, CompletionStage<U> o, Function<? super T, ? extends V> f) {
         CompletableFuture<U> b;
         if (f == null || (b = o.toCompletableFuture()) == null)
             throw new NullPointerException();
+
+        Object r; CompletableFuture<? extends T> z;
+        if ((r = (z = this).result) != null ||
+            (r = (z = b).result) != null)
+            return z.uniApplyNow(r, e, f);
+
         CompletableFuture<V> d = newIncompleteFuture();
-        if (e != null || !d.orApply(this, b, f, null)) {
-            OrApply<T,U,V> c = new OrApply<T,U,V>(e, d, this, b, f);
-            orpush(b, c);
-            c.tryFire(SYNC);
-        }
+        orpush(b, new OrApply<T,U,V>(e, d, this, b, f));
         return d;
     }
 
@@ -1438,8 +1512,7 @@
     static final class OrAccept<T,U extends T> extends BiCompletion<T,U,Void> {
         Consumer<? super T> fn;
         OrAccept(Executor executor, CompletableFuture<Void> dep,
-                 CompletableFuture<T> src,
-                 CompletableFuture<U> snd,
+                 CompletableFuture<T> src, CompletableFuture<U> snd,
                  Consumer<? super T> fn) {
             super(executor, dep, src, snd); this.fn = fn;
         }
@@ -1447,54 +1520,47 @@
             CompletableFuture<Void> d;
             CompletableFuture<T> a;
             CompletableFuture<U> b;
-            if ((d = dep) == null ||
-                !d.orAccept(a = src, b = snd, fn, mode > 0 ? null : this))
+            Object r; Throwable x; Consumer<? super T> f;
+            if ((d = dep) == null || (f = fn) == null
+                || (a = src) == null || (b = snd) == null
+                || ((r = a.result) == null && (r = b.result) == null))
                 return null;
+            tryComplete: if (d.result == null) {
+                try {
+                    if (mode <= 0 && !claim())
+                        return null;
+                    if (r instanceof AltResult) {
+                        if ((x = ((AltResult)r).ex) != null) {
+                            d.completeThrowable(x, r);
+                            break tryComplete;
+                        }
+                        r = null;
+                    }
+                    @SuppressWarnings("unchecked") T t = (T) r;
+                    f.accept(t);
+                    d.completeNull();
+                } catch (Throwable ex) {
+                    d.completeThrowable(ex);
+                }
+            }
             dep = null; src = null; snd = null; fn = null;
             return d.postFire(a, b, mode);
         }
     }
 
-    final <R,S extends R> boolean orAccept(CompletableFuture<R> a,
-                                           CompletableFuture<S> b,
-                                           Consumer<? super R> f,
-                                           OrAccept<R,S> c) {
-        Object r; Throwable x;
-        if (a == null || b == null ||
-            ((r = a.result) == null && (r = b.result) == null) || f == null)
-            return false;
-        tryComplete: if (result == null) {
-            try {
-                if (c != null && !c.claim())
-                    return false;
-                if (r instanceof AltResult) {
-                    if ((x = ((AltResult)r).ex) != null) {
-                        completeThrowable(x, r);
-                        break tryComplete;
-                    }
-                    r = null;
-                }
-                @SuppressWarnings("unchecked") R rr = (R) r;
-                f.accept(rr);
-                completeNull();
-            } catch (Throwable ex) {
-                completeThrowable(ex);
-            }
-        }
-        return true;
-    }
-
     private <U extends T> CompletableFuture<Void> orAcceptStage(
         Executor e, CompletionStage<U> o, Consumer<? super T> f) {
         CompletableFuture<U> b;
         if (f == null || (b = o.toCompletableFuture()) == null)
             throw new NullPointerException();
+
+        Object r; CompletableFuture<? extends T> z;
+        if ((r = (z = this).result) != null ||
+            (r = (z = b).result) != null)
+            return z.uniAcceptNow(r, e, f);
+
         CompletableFuture<Void> d = newIncompleteFuture();
-        if (e != null || !d.orAccept(this, b, f, null)) {
-            OrAccept<T,U> c = new OrAccept<T,U>(e, d, this, b, f);
-            orpush(b, c);
-            c.tryFire(SYNC);
-        }
+        orpush(b, new OrAccept<T,U>(e, d, this, b, f));
         return d;
     }
 
@@ -1502,8 +1568,7 @@
     static final class OrRun<T,U> extends BiCompletion<T,U,Void> {
         Runnable fn;
         OrRun(Executor executor, CompletableFuture<Void> dep,
-              CompletableFuture<T> src,
-              CompletableFuture<U> snd,
+              CompletableFuture<T> src, CompletableFuture<U> snd,
               Runnable fn) {
             super(executor, dep, src, snd); this.fn = fn;
         }
@@ -1511,97 +1576,81 @@
             CompletableFuture<Void> d;
             CompletableFuture<T> a;
             CompletableFuture<U> b;
-            if ((d = dep) == null ||
-                !d.orRun(a = src, b = snd, fn, mode > 0 ? null : this))
+            Object r; Throwable x; Runnable f;
+            if ((d = dep) == null || (f = fn) == null
+                || (a = src) == null || (b = snd) == null
+                || ((r = a.result) == null && (r = b.result) == null))
                 return null;
+            if (d.result == null) {
+                try {
+                    if (mode <= 0 && !claim())
+                        return null;
+                    else if (r instanceof AltResult
+                        && (x = ((AltResult)r).ex) != null)
+                        d.completeThrowable(x, r);
+                    else {
+                        f.run();
+                        d.completeNull();
+                    }
+                } catch (Throwable ex) {
+                    d.completeThrowable(ex);
+                }
+            }
             dep = null; src = null; snd = null; fn = null;
             return d.postFire(a, b, mode);
         }
     }
 
-    final boolean orRun(CompletableFuture<?> a, CompletableFuture<?> b,
-                        Runnable f, OrRun<?,?> c) {
-        Object r; Throwable x;
-        if (a == null || b == null ||
-            ((r = a.result) == null && (r = b.result) == null) || f == null)
-            return false;
-        if (result == null) {
-            try {
-                if (c != null && !c.claim())
-                    return false;
-                if (r instanceof AltResult && (x = ((AltResult)r).ex) != null)
-                    completeThrowable(x, r);
-                else {
-                    f.run();
-                    completeNull();
-                }
-            } catch (Throwable ex) {
-                completeThrowable(ex);
-            }
-        }
-        return true;
-    }
-
     private CompletableFuture<Void> orRunStage(Executor e, CompletionStage<?> o,
                                                Runnable f) {
         CompletableFuture<?> b;
         if (f == null || (b = o.toCompletableFuture()) == null)
             throw new NullPointerException();
+
+        Object r; CompletableFuture<?> z;
+        if ((r = (z = this).result) != null ||
+            (r = (z = b).result) != null)
+            return z.uniRunNow(r, e, f);
+
         CompletableFuture<Void> d = newIncompleteFuture();
-        if (e != null || !d.orRun(this, b, f, null)) {
-            OrRun<T,?> c = new OrRun<>(e, d, this, b, f);
-            orpush(b, c);
-            c.tryFire(SYNC);
-        }
+        orpush(b, new OrRun<>(e, d, this, b, f));
         return d;
     }
 
+    /** Completion for an anyOf input future. */
     @SuppressWarnings("serial")
-    static final class OrRelay<T,U> extends BiCompletion<T,U,Object> { // for Or
-        OrRelay(CompletableFuture<Object> dep, CompletableFuture<T> src,
-                CompletableFuture<U> snd) {
-            super(null, dep, src, snd);
+    static class AnyOf extends Completion {
+        CompletableFuture<Object> dep; CompletableFuture<?> src;
+        CompletableFuture<?>[] srcs;
+        AnyOf(CompletableFuture<Object> dep, CompletableFuture<?> src,
+              CompletableFuture<?>[] srcs) {
+            this.dep = dep; this.src = src; this.srcs = srcs;
         }
         final CompletableFuture<Object> tryFire(int mode) {
-            CompletableFuture<Object> d;
-            CompletableFuture<T> a;
-            CompletableFuture<U> b;
-            if ((d = dep) == null || !d.orRelay(a = src, b = snd))
+            // assert mode != ASYNC;
+            CompletableFuture<Object> d; CompletableFuture<?> a;
+            CompletableFuture<?>[] as;
+            Object r;
+            if ((d = dep) == null
+                || (a = src) == null || (r = a.result) == null
+                || (as = srcs) == null)
                 return null;
-            src = null; snd = null; dep = null;
-            return d.postFire(a, b, mode);
-        }
-    }
-
-    final boolean orRelay(CompletableFuture<?> a, CompletableFuture<?> b) {
-        Object r;
-        if (a == null || b == null ||
-            ((r = a.result) == null && (r = b.result) == null))
-            return false;
-        if (result == null)
-            completeRelay(r);
-        return true;
-    }
-
-    /** Recursively constructs a tree of completions. */
-    static CompletableFuture<Object> orTree(CompletableFuture<?>[] cfs,
-                                            int lo, int hi) {
-        CompletableFuture<Object> d = new CompletableFuture<Object>();
-        if (lo <= hi) {
-            CompletableFuture<?> a, b;
-            int mid = (lo + hi) >>> 1;
-            if ((a = (lo == mid ? cfs[lo] :
-                      orTree(cfs, lo, mid))) == null ||
-                (b = (lo == hi ? a : (hi == mid+1) ? cfs[hi] :
-                      orTree(cfs, mid+1, hi))) == null)
-                throw new NullPointerException();
-            if (!d.orRelay(a, b)) {
-                OrRelay<?,?> c = new OrRelay<>(d, a, b);
-                a.orpush(b, c);
-                c.tryFire(SYNC);
+            dep = null; src = null; srcs = null;
+            if (d.completeRelay(r)) {
+                for (CompletableFuture<?> b : as)
+                    if (b != a)
+                        b.cleanStack();
+                if (mode < 0)
+                    return d;
+                else
+                    d.postComplete();
             }
+            return null;
         }
-        return d;
+        final boolean isLive() {
+            CompletableFuture<Object> d;
+            return (d = dep) != null && d.result == null;
+        }
     }
 
     /* ------------- Zero-input Async forms -------------- */
@@ -1616,7 +1665,7 @@
 
         public final Void getRawResult() { return null; }
         public final void setRawResult(Void v) {}
-        public final boolean exec() { run(); return true; }
+        public final boolean exec() { run(); return false; }
 
         public void run() {
             CompletableFuture<T> d; Supplier<? extends T> f;
@@ -1652,7 +1701,7 @@
 
         public final Void getRawResult() { return null; }
         public final void setRawResult(Void v) {}
-        public final boolean exec() { run(); return true; }
+        public final boolean exec() { run(); return false; }
 
         public void run() {
             CompletableFuture<Void> d; Runnable f;
@@ -1736,15 +1785,13 @@
     private Object waitingGet(boolean interruptible) {
         Signaller q = null;
         boolean queued = false;
-        int spins = SPINS;
         Object r;
         while ((r = result) == null) {
-            if (spins > 0) {
-                if (ThreadLocalRandom.nextSecondarySeed() >= 0)
-                    --spins;
-            }
-            else if (q == null)
+            if (q == null) {
                 q = new Signaller(interruptible, 0L, 0L);
+                if (Thread.currentThread() instanceof ForkJoinWorkerThread)
+                    ForkJoinPool.helpAsyncBlocker(defaultExecutor(), q);
+            }
             else if (!queued)
                 queued = tryPushStack(q);
             else {
@@ -1757,16 +1804,14 @@
                     break;
             }
         }
-        if (q != null) {
+        if (q != null && queued) {
             q.thread = null;
-            if (q.interrupted) {
-                if (interruptible)
-                    cleanStack();
-                else
-                    Thread.currentThread().interrupt();
-            }
+            if (!interruptible && q.interrupted)
+                Thread.currentThread().interrupt();
+            if (r == null)
+                cleanStack();
         }
-        if (r != null)
+        if (r != null || (r = result) != null)
             postComplete();
         return r;
     }
@@ -1784,9 +1829,12 @@
             Signaller q = null;
             boolean queued = false;
             Object r;
-            while ((r = result) == null) { // similar to untimed, without spins
-                if (q == null)
+            while ((r = result) == null) { // similar to untimed
+                if (q == null) {
                     q = new Signaller(true, nanos, deadline);
+                    if (Thread.currentThread() instanceof ForkJoinWorkerThread)
+                        ForkJoinPool.helpAsyncBlocker(defaultExecutor(), q);
+                }
                 else if (!queued)
                     queued = tryPushStack(q);
                 else if (q.nanos <= 0L)
@@ -1801,12 +1849,13 @@
                         break;
                 }
             }
-            if (q != null)
+            if (q != null && queued) {
                 q.thread = null;
-            if (r != null)
+                if (r == null)
+                    cleanStack();
+            }
+            if (r != null || (r = result) != null)
                 postComplete();
-            else
-                cleanStack();
             if (r != null || (q != null && q.interrupted))
                 return r;
         }
@@ -1918,9 +1967,12 @@
      * @throws InterruptedException if the current thread was interrupted
      * while waiting
      */
+    @SuppressWarnings("unchecked")
     public T get() throws InterruptedException, ExecutionException {
         Object r;
-        return reportGet((r = result) == null ? waitingGet(true) : r);
+        if ((r = result) == null)
+            r = waitingGet(true);
+        return (T) reportGet(r);
     }
 
     /**
@@ -1936,11 +1988,14 @@
      * while waiting
      * @throws TimeoutException if the wait timed out
      */
+    @SuppressWarnings("unchecked")
     public T get(long timeout, TimeUnit unit)
         throws InterruptedException, ExecutionException, TimeoutException {
-        Object r;
         long nanos = unit.toNanos(timeout);
-        return reportGet((r = result) == null ? timedGet(nanos) : r);
+        Object r;
+        if ((r = result) == null)
+            r = timedGet(nanos);
+        return (T) reportGet(r);
     }
 
     /**
@@ -1957,9 +2012,12 @@
      * @throws CompletionException if this future completed
      * exceptionally or a completion computation threw an exception
      */
+    @SuppressWarnings("unchecked")
     public T join() {
         Object r;
-        return reportJoin((r = result) == null ? waitingGet(false) : r);
+        if ((r = result) == null)
+            r = waitingGet(false);
+        return (T) reportJoin(r);
     }
 
     /**
@@ -1972,9 +2030,10 @@
      * @throws CompletionException if this future completed
      * exceptionally or a completion computation threw an exception
      */
+    @SuppressWarnings("unchecked")
     public T getNow(T valueIfAbsent) {
         Object r;
-        return ((r = result) == null) ? valueIfAbsent : reportJoin(r);
+        return ((r = result) == null) ? valueIfAbsent : (T) reportJoin(r);
     }
 
     /**
@@ -2270,7 +2329,28 @@
      * {@code null}
      */
     public static CompletableFuture<Object> anyOf(CompletableFuture<?>... cfs) {
-        return orTree(cfs, 0, cfs.length - 1);
+        int n; Object r;
+        if ((n = cfs.length) <= 1)
+            return (n == 0)
+                ? new CompletableFuture<Object>()
+                : uniCopyStage(cfs[0]);
+        for (CompletableFuture<?> cf : cfs)
+            if ((r = cf.result) != null)
+                return new CompletableFuture<Object>(encodeRelay(r));
+        cfs = cfs.clone();
+        CompletableFuture<Object> d = new CompletableFuture<>();
+        for (CompletableFuture<?> cf : cfs)
+            cf.unipush(new AnyOf(d, cf, cfs));
+        // If d was completed while we were adding completions, we should
+        // clean the stack of any sources that may have had completions
+        // pushed on their stack after d was completed.
+        if (d.result != null)
+            for (int i = 0, len = cfs.length; i < len; i++)
+                if (cfs[i].result != null)
+                    for (i++; i < len; i++)
+                        if (cfs[i].result == null)
+                            cfs[i].cleanStack();
+        return d;
     }
 
     /* ------------- Control and status methods -------------- */
@@ -2386,13 +2466,13 @@
         for (Completion p = stack; p != null; p = p.next)
             ++count;
         return super.toString() +
-            ((r == null) ?
-             ((count == 0) ?
-              "[Not completed]" :
-              "[Not completed, " + count + " dependents]") :
-             (((r instanceof AltResult) && ((AltResult)r).ex != null) ?
-              "[Completed exceptionally]" :
-              "[Completed normally]"));
+            ((r == null)
+             ? ((count == 0)
+                ? "[Not completed]"
+                : "[Not completed, " + count + " dependents]")
+             : (((r instanceof AltResult) && ((AltResult)r).ex != null)
+                ? "[Completed exceptionally: " + ((AltResult)r).ex + "]"
+                : "[Completed normally]"));
     }
 
     // jdk9 additions
@@ -2442,7 +2522,7 @@
      * @since 9
      */
     public CompletableFuture<T> copy() {
-        return uniCopyStage();
+        return uniCopyStage(this);
     }
 
     /**
@@ -2455,6 +2535,13 @@
      * exceptionally with a CompletionException with this exception as
      * cause.
      *
+     * <p>Unless overridden by a subclass, a new non-minimal
+     * CompletableFuture with all methods available can be obtained from
+     * a minimal CompletionStage via {@link #toCompletableFuture()}.
+     * For example, completion of a minimal stage can be awaited by
+     *
+     * <pre> {@code minimalStage.toCompletableFuture().join(); }</pre>
+     *
      * @return the new CompletionStage
      * @since 9
      */
@@ -2749,23 +2836,30 @@
         @Override public CompletableFuture<T> completeOnTimeout
             (T value, long timeout, TimeUnit unit) {
             throw new UnsupportedOperationException(); }
+        @Override public CompletableFuture<T> toCompletableFuture() {
+            Object r;
+            if ((r = result) != null)
+                return new CompletableFuture<T>(encodeRelay(r));
+            else {
+                CompletableFuture<T> d = new CompletableFuture<>();
+                unipush(new UniRelay<T,T>(d, this));
+                return d;
+            }
+        }
     }
 
-    // Unsafe mechanics
-    private static final sun.misc.Unsafe U = sun.misc.Unsafe.getUnsafe();
-    private static final long RESULT;
-    private static final long STACK;
-    private static final long NEXT;
+    // VarHandle mechanics
+    private static final VarHandle RESULT;
+    private static final VarHandle STACK;
+    private static final VarHandle NEXT;
     static {
         try {
-            RESULT = U.objectFieldOffset
-                (CompletableFuture.class.getDeclaredField("result"));
-            STACK = U.objectFieldOffset
-                (CompletableFuture.class.getDeclaredField("stack"));
-            NEXT = U.objectFieldOffset
-                (Completion.class.getDeclaredField("next"));
+            MethodHandles.Lookup l = MethodHandles.lookup();
+            RESULT = l.findVarHandle(CompletableFuture.class, "result", Object.class);
+            STACK = l.findVarHandle(CompletableFuture.class, "stack", Completion.class);
+            NEXT = l.findVarHandle(Completion.class, "next", Completion.class);
         } catch (ReflectiveOperationException e) {
-            throw new Error(e);
+            throw new ExceptionInInitializerError(e);
         }
 
         // Reduce the risk of rare disastrous classloading in first call to
diff --git a/ojluni/src/main/java/java/util/concurrent/CompletionService.java b/ojluni/src/main/java/java/util/concurrent/CompletionService.java
index f647e21..5e5232e 100644
--- a/ojluni/src/main/java/java/util/concurrent/CompletionService.java
+++ b/ojluni/src/main/java/java/util/concurrent/CompletionService.java
@@ -57,6 +57,8 @@
  * <a href="package-summary.html#MemoryVisibility"><i>happen-before</i></a>
  * actions taken by that task, which in turn <i>happen-before</i>
  * actions following a successful return from the corresponding {@code take()}.
+ *
+ * @since 1.5
  */
 public interface CompletionService<V> {
     /**
diff --git a/ojluni/src/main/java/java/util/concurrent/CompletionStage.java b/ojluni/src/main/java/java/util/concurrent/CompletionStage.java
index d855945..70b601a 100644
--- a/ojluni/src/main/java/java/util/concurrent/CompletionStage.java
+++ b/ojluni/src/main/java/java/util/concurrent/CompletionStage.java
@@ -856,13 +856,9 @@
      * CompletableFuture, this method may return this stage itself.
      * Otherwise, invocation of this method may be equivalent in
      * effect to {@code thenApply(x -> x)}, but returning an instance
-     * of type {@code CompletableFuture}. A CompletionStage
-     * implementation that does not choose to interoperate with others
-     * may throw {@code UnsupportedOperationException}.
+     * of type {@code CompletableFuture}.
      *
      * @return the CompletableFuture
-     * @throws UnsupportedOperationException if this implementation
-     * does not interoperate with CompletableFuture
      */
     public CompletableFuture<T> toCompletableFuture();
 
diff --git a/ojluni/src/main/java/java/util/concurrent/ConcurrentHashMap.java b/ojluni/src/main/java/java/util/concurrent/ConcurrentHashMap.java
index 5407963..2cf8c86 100644
--- a/ojluni/src/main/java/java/util/concurrent/ConcurrentHashMap.java
+++ b/ojluni/src/main/java/java/util/concurrent/ConcurrentHashMap.java
@@ -68,10 +68,7 @@
 import java.util.function.ToLongBiFunction;
 import java.util.function.ToLongFunction;
 import java.util.stream.Stream;
-
-// BEGIN android-note
-// removed link to collections framework docs
-// END android-note
+import jdk.internal.misc.Unsafe;
 
 /**
  * A hash table supporting full concurrency of retrievals and
@@ -162,8 +159,7 @@
  * ordering, or on any other objects or values that may transiently
  * change while computation is in progress; and except for forEach
  * actions, should ideally be side-effect-free. Bulk operations on
- * {@link java.util.Map.Entry} objects do not support method {@code
- * setValue}.
+ * {@link Map.Entry} objects do not support method {@code setValue}.
  *
  * <ul>
  * <li>forEach: Performs a given action on each element.
@@ -256,6 +252,10 @@
  *
  * <p>All arguments to all task methods must be non-null.
  *
+ * <p>This class is a member of the
+ * <a href="{@docRoot}/java.base/java/util/package-summary.html#CollectionsFramework">
+ * Java Collections Framework</a>.
+ *
  * @since 1.5
  * @author Doug Lea
  * @param <K> the type of keys maintained by this map
@@ -297,7 +297,7 @@
      * Table accesses require volatile/atomic reads, writes, and
      * CASes.  Because there is no other way to arrange this without
      * adding further indirections, we use intrinsics
-     * (sun.misc.Unsafe) operations.
+     * (jdk.internal.misc.Unsafe) operations.
      *
      * We use the top (sign) bit of Node hash fields for control
      * purposes -- it is available anyway because of addressing
@@ -628,10 +628,14 @@
         volatile V val;
         volatile Node<K,V> next;
 
-        Node(int hash, K key, V val, Node<K,V> next) {
+        Node(int hash, K key, V val) {
             this.hash = hash;
             this.key = key;
             this.val = val;
+        }
+
+        Node(int hash, K key, V val, Node<K,V> next) {
+            this(hash, key, val);
             this.next = next;
         }
 
@@ -698,12 +702,7 @@
      * See Hackers Delight, sec 3.2
      */
     private static final int tableSizeFor(int c) {
-        int n = c - 1;
-        n |= n >>> 1;
-        n |= n >>> 2;
-        n |= n >>> 4;
-        n |= n >>> 8;
-        n |= n >>> 16;
+        int n = -1 >>> Integer.numberOfLeadingZeros(c - 1);
         return (n < 0) ? 1 : (n >= MAXIMUM_CAPACITY) ? MAXIMUM_CAPACITY : n + 1;
     }
 
@@ -713,12 +712,12 @@
      */
     static Class<?> comparableClassFor(Object x) {
         if (x instanceof Comparable) {
-            Class<?> c; Type[] ts, as; Type t; ParameterizedType p;
+            Class<?> c; Type[] ts, as; ParameterizedType p;
             if ((c = x.getClass()) == String.class) // bypass checks
                 return c;
             if ((ts = c.getGenericInterfaces()) != null) {
-                for (int i = 0; i < ts.length; ++i) {
-                    if (((t = ts[i]) instanceof ParameterizedType) &&
+                for (Type t : ts) {
+                    if ((t instanceof ParameterizedType) &&
                         ((p = (ParameterizedType)t).getRawType() ==
                          Comparable.class) &&
                         (as = p.getActualTypeArguments()) != null &&
@@ -743,7 +742,7 @@
     /* ---------------- Table element access -------------- */
 
     /*
-     * Volatile access methods are used for table elements as well as
+     * Atomic access methods are used for table elements as well as
      * elements of in-progress next table while resizing.  All uses of
      * the tab arguments must be null checked by callers.  All callers
      * also paranoically precheck that tab's length is not zero (or an
@@ -753,23 +752,21 @@
      * errors by users, these checks must operate on local variables,
      * which accounts for some odd-looking inline assignments below.
      * Note that calls to setTabAt always occur within locked regions,
-     * and so in principle require only release ordering, not
-     * full volatile semantics, but are currently coded as volatile
-     * writes to be conservative.
+     * and so require only release ordering.
      */
 
     @SuppressWarnings("unchecked")
     static final <K,V> Node<K,V> tabAt(Node<K,V>[] tab, int i) {
-        return (Node<K,V>)U.getObjectVolatile(tab, ((long)i << ASHIFT) + ABASE);
+        return (Node<K,V>)U.getObjectAcquire(tab, ((long)i << ASHIFT) + ABASE);
     }
 
     static final <K,V> boolean casTabAt(Node<K,V>[] tab, int i,
                                         Node<K,V> c, Node<K,V> v) {
-        return U.compareAndSwapObject(tab, ((long)i << ASHIFT) + ABASE, c, v);
+        return U.compareAndSetObject(tab, ((long)i << ASHIFT) + ABASE, c, v);
     }
 
     static final <K,V> void setTabAt(Node<K,V>[] tab, int i, Node<K,V> v) {
-        U.putObjectVolatile(tab, ((long)i << ASHIFT) + ABASE, v);
+        U.putObjectRelease(tab, ((long)i << ASHIFT) + ABASE, v);
     }
 
     /* ---------------- Fields -------------- */
@@ -842,12 +839,7 @@
      * elements is negative
      */
     public ConcurrentHashMap(int initialCapacity) {
-        if (initialCapacity < 0)
-            throw new IllegalArgumentException();
-        int cap = ((initialCapacity >= (MAXIMUM_CAPACITY >>> 1)) ?
-                   MAXIMUM_CAPACITY :
-                   tableSizeFor(initialCapacity + (initialCapacity >>> 1) + 1));
-        this.sizeCtl = cap;
+        this(initialCapacity, LOAD_FACTOR, 1);
     }
 
     /**
@@ -881,8 +873,8 @@
 
     /**
      * Creates a new, empty map with an initial table size based on
-     * the given number of elements ({@code initialCapacity}), table
-     * density ({@code loadFactor}), and number of concurrently
+     * the given number of elements ({@code initialCapacity}), initial
+     * table density ({@code loadFactor}), and number of concurrently
      * updating threads ({@code concurrencyLevel}).
      *
      * @param initialCapacity the initial capacity. The implementation
@@ -1020,16 +1012,20 @@
         int hash = spread(key.hashCode());
         int binCount = 0;
         for (Node<K,V>[] tab = table;;) {
-            Node<K,V> f; int n, i, fh;
+            Node<K,V> f; int n, i, fh; K fk; V fv;
             if (tab == null || (n = tab.length) == 0)
                 tab = initTable();
             else if ((f = tabAt(tab, i = (n - 1) & hash)) == null) {
-                if (casTabAt(tab, i, null,
-                             new Node<K,V>(hash, key, value, null)))
+                if (casTabAt(tab, i, null, new Node<K,V>(hash, key, value)))
                     break;                   // no lock when adding to empty bin
             }
             else if ((fh = f.hash) == MOVED)
                 tab = helpTransfer(tab, f);
+            else if (onlyIfAbsent // check first node without acquiring lock
+                     && fh == hash
+                     && ((fk = f.key) == key || (fk != null && key.equals(fk)))
+                     && (fv = f.val) != null)
+                return fv;
             else {
                 V oldVal = null;
                 synchronized (f) {
@@ -1048,8 +1044,7 @@
                                 }
                                 Node<K,V> pred = e;
                                 if ((e = e.next) == null) {
-                                    pred.next = new Node<K,V>(hash, key,
-                                                              value, null);
+                                    pred.next = new Node<K,V>(hash, key, value);
                                     break;
                                 }
                             }
@@ -1245,7 +1240,8 @@
     @dalvik.annotation.codegen.CovariantReturnType(returnType = KeySetView.class, presentAfter = 28)
     public Set<K> keySet() {
         KeySetView<K,V> ks;
-        return (ks = keySet) != null ? ks : (keySet = new KeySetView<K,V>(this, null));
+        if ((ks = keySet) != null) return ks;
+        return keySet = new KeySetView<K,V>(this, null);
     }
 
     /**
@@ -1268,7 +1264,8 @@
      */
     public Collection<V> values() {
         ValuesView<K,V> vs;
-        return (vs = values) != null ? vs : (values = new ValuesView<K,V>(this));
+        if ((vs = values) != null) return vs;
+        return values = new ValuesView<K,V>(this);
     }
 
     /**
@@ -1290,7 +1287,8 @@
      */
     public Set<Map.Entry<K,V>> entrySet() {
         EntrySetView<K,V> es;
-        return (es = entrySet) != null ? es : (entrySet = new EntrySetView<K,V>(this));
+        if ((es = entrySet) != null) return es;
+        return entrySet = new EntrySetView<K,V>(this);
     }
 
     /**
@@ -1391,8 +1389,8 @@
     }
 
     /**
-     * Saves the state of the {@code ConcurrentHashMap} instance to a
-     * stream (i.e., serializes it).
+     * Saves this map to a stream (that is, serializes it).
+     *
      * @param s the stream
      * @throws java.io.IOException if an I/O error occurs
      * @serialData
@@ -1436,7 +1434,7 @@
     }
 
     /**
-     * Reconstitutes the instance from a stream (that is, deserializes it).
+     * Reconstitutes this map from a stream (that is, deserializes it).
      * @param s the stream
      * @throws ClassNotFoundException if the class of a serialized object
      *         could not be found
@@ -1470,13 +1468,9 @@
         if (size == 0L)
             sizeCtl = 0;
         else {
-            int n;
-            if (size >= (long)(MAXIMUM_CAPACITY >>> 1))
-                n = MAXIMUM_CAPACITY;
-            else {
-                int sz = (int)size;
-                n = tableSizeFor(sz + (sz >>> 1) + 1);
-            }
+            long ts = (long)(1.0 + size / LOAD_FACTOR);
+            int n = (ts >= (long)MAXIMUM_CAPACITY) ?
+                MAXIMUM_CAPACITY : tableSizeFor((int)ts);
             @SuppressWarnings("unchecked")
             Node<K,V>[] tab = (Node<K,V>[])new Node<?,?>[n];
             int mask = n - 1;
@@ -1703,7 +1697,7 @@
         V val = null;
         int binCount = 0;
         for (Node<K,V>[] tab = table;;) {
-            Node<K,V> f; int n, i, fh;
+            Node<K,V> f; int n, i, fh; K fk; V fv;
             if (tab == null || (n = tab.length) == 0)
                 tab = initTable();
             else if ((f = tabAt(tab, i = (n - 1) & h)) == null) {
@@ -1714,7 +1708,7 @@
                         Node<K,V> node = null;
                         try {
                             if ((val = mappingFunction.apply(key)) != null)
-                                node = new Node<K,V>(h, key, val, null);
+                                node = new Node<K,V>(h, key, val);
                         } finally {
                             setTabAt(tab, i, node);
                         }
@@ -1725,6 +1719,10 @@
             }
             else if ((fh = f.hash) == MOVED)
                 tab = helpTransfer(tab, f);
+            else if (fh == h    // check first node without acquiring lock
+                     && ((fk = f.key) == key || (fk != null && key.equals(fk)))
+                     && (fv = f.val) != null)
+                return fv;
             else {
                 boolean added = false;
                 synchronized (f) {
@@ -1745,7 +1743,7 @@
                                         if (pred.next != null)
                                             throw new IllegalStateException("Recursive update");
                                         added = true;
-                                        pred.next = new Node<K,V>(h, key, val, null);
+                                        pred.next = new Node<K,V>(h, key, val);
                                     }
                                     break;
                                 }
@@ -1914,7 +1912,7 @@
                         try {
                             if ((val = remappingFunction.apply(key, null)) != null) {
                                 delta = 1;
-                                node = new Node<K,V>(h, key, val, null);
+                                node = new Node<K,V>(h, key, val);
                             }
                         } finally {
                             setTabAt(tab, i, node);
@@ -1956,8 +1954,7 @@
                                         if (pred.next != null)
                                             throw new IllegalStateException("Recursive update");
                                         delta = 1;
-                                        pred.next =
-                                            new Node<K,V>(h, key, val, null);
+                                        pred.next = new Node<K,V>(h, key, val);
                                     }
                                     break;
                                 }
@@ -2035,7 +2032,7 @@
             if (tab == null || (n = tab.length) == 0)
                 tab = initTable();
             else if ((f = tabAt(tab, i = (n - 1) & h)) == null) {
-                if (casTabAt(tab, i, null, new Node<K,V>(h, key, value, null))) {
+                if (casTabAt(tab, i, null, new Node<K,V>(h, key, value))) {
                     delta = 1;
                     val = value;
                     break;
@@ -2070,8 +2067,7 @@
                                 if ((e = e.next) == null) {
                                     delta = 1;
                                     val = value;
-                                    pred.next =
-                                        new Node<K,V>(h, key, val, null);
+                                    pred.next = new Node<K,V>(h, key, val);
                                     break;
                                 }
                             }
@@ -2232,7 +2228,7 @@
     static final class ForwardingNode<K,V> extends Node<K,V> {
         final Node<K,V>[] nextTable;
         ForwardingNode(Node<K,V>[] tab) {
-            super(MOVED, null, null, null);
+            super(MOVED, null, null);
             this.nextTable = tab;
         }
 
@@ -2268,7 +2264,7 @@
      */
     static final class ReservationNode<K,V> extends Node<K,V> {
         ReservationNode() {
-            super(RESERVED, null, null, null);
+            super(RESERVED, null, null);
         }
 
         Node<K,V> find(int h, Object k) {
@@ -2294,7 +2290,7 @@
         while ((tab = table) == null || tab.length == 0) {
             if ((sc = sizeCtl) < 0)
                 Thread.yield(); // lost initialization race; just spin
-            else if (U.compareAndSwapInt(this, SIZECTL, sc, -1)) {
+            else if (U.compareAndSetInt(this, SIZECTL, sc, -1)) {
                 try {
                     if ((tab = table) == null || tab.length == 0) {
                         int n = (sc > 0) ? sc : DEFAULT_CAPACITY;
@@ -2323,15 +2319,15 @@
      * @param check if <0, don't check resize, if <= 1 only check if uncontended
      */
     private final void addCount(long x, int check) {
-        CounterCell[] as; long b, s;
-        if ((as = counterCells) != null ||
-            !U.compareAndSwapLong(this, BASECOUNT, b = baseCount, s = b + x)) {
-            CounterCell a; long v; int m;
+        CounterCell[] cs; long b, s;
+        if ((cs = counterCells) != null ||
+            !U.compareAndSetLong(this, BASECOUNT, b = baseCount, s = b + x)) {
+            CounterCell c; long v; int m;
             boolean uncontended = true;
-            if (as == null || (m = as.length - 1) < 0 ||
-                (a = as[ThreadLocalRandom.getProbe() & m]) == null ||
+            if (cs == null || (m = cs.length - 1) < 0 ||
+                (c = cs[ThreadLocalRandom.getProbe() & m]) == null ||
                 !(uncontended =
-                  U.compareAndSwapLong(a, CELLVALUE, v = a.value, v + x))) {
+                  U.compareAndSetLong(c, CELLVALUE, v = c.value, v + x))) {
                 fullAddCount(x, uncontended);
                 return;
             }
@@ -2349,10 +2345,10 @@
                         sc == rs + MAX_RESIZERS || (nt = nextTable) == null ||
                         transferIndex <= 0)
                         break;
-                    if (U.compareAndSwapInt(this, SIZECTL, sc, sc + 1))
+                    if (U.compareAndSetInt(this, SIZECTL, sc, sc + 1))
                         transfer(tab, nt);
                 }
-                else if (U.compareAndSwapInt(this, SIZECTL, sc,
+                else if (U.compareAndSetInt(this, SIZECTL, sc,
                                              (rs << RESIZE_STAMP_SHIFT) + 2))
                     transfer(tab, null);
                 s = sumCount();
@@ -2373,7 +2369,7 @@
                 if ((sc >>> RESIZE_STAMP_SHIFT) != rs || sc == rs + 1 ||
                     sc == rs + MAX_RESIZERS || transferIndex <= 0)
                     break;
-                if (U.compareAndSwapInt(this, SIZECTL, sc, sc + 1)) {
+                if (U.compareAndSetInt(this, SIZECTL, sc, sc + 1)) {
                     transfer(tab, nextTab);
                     break;
                 }
@@ -2396,7 +2392,7 @@
             Node<K,V>[] tab = table; int n;
             if (tab == null || (n = tab.length) == 0) {
                 n = (sc > c) ? sc : c;
-                if (U.compareAndSwapInt(this, SIZECTL, sc, -1)) {
+                if (U.compareAndSetInt(this, SIZECTL, sc, -1)) {
                     try {
                         if (table == tab) {
                             @SuppressWarnings("unchecked")
@@ -2413,7 +2409,7 @@
                 break;
             else if (tab == table) {
                 int rs = resizeStamp(n);
-                if (U.compareAndSwapInt(this, SIZECTL, sc,
+                if (U.compareAndSetInt(this, SIZECTL, sc,
                                         (rs << RESIZE_STAMP_SHIFT) + 2))
                     transfer(tab, null);
             }
@@ -2454,7 +2450,7 @@
                     i = -1;
                     advance = false;
                 }
-                else if (U.compareAndSwapInt
+                else if (U.compareAndSetInt
                          (this, TRANSFERINDEX, nextIndex,
                           nextBound = (nextIndex > stride ?
                                        nextIndex - stride : 0))) {
@@ -2471,7 +2467,7 @@
                     sizeCtl = (n << 1) - (n >>> 1);
                     return;
                 }
-                if (U.compareAndSwapInt(this, SIZECTL, sc = sizeCtl, sc - 1)) {
+                if (U.compareAndSetInt(this, SIZECTL, sc = sizeCtl, sc - 1)) {
                     if ((sc - 2) != resizeStamp(n) << RESIZE_STAMP_SHIFT)
                         return;
                     finishing = advance = true;
@@ -2571,13 +2567,12 @@
     }
 
     final long sumCount() {
-        CounterCell[] as = counterCells; CounterCell a;
+        CounterCell[] cs = counterCells;
         long sum = baseCount;
-        if (as != null) {
-            for (int i = 0; i < as.length; ++i) {
-                if ((a = as[i]) != null)
-                    sum += a.value;
-            }
+        if (cs != null) {
+            for (CounterCell c : cs)
+                if (c != null)
+                    sum += c.value;
         }
         return sum;
     }
@@ -2592,13 +2587,13 @@
         }
         boolean collide = false;                // True if last slot nonempty
         for (;;) {
-            CounterCell[] as; CounterCell a; int n; long v;
-            if ((as = counterCells) != null && (n = as.length) > 0) {
-                if ((a = as[(n - 1) & h]) == null) {
+            CounterCell[] cs; CounterCell c; int n; long v;
+            if ((cs = counterCells) != null && (n = cs.length) > 0) {
+                if ((c = cs[(n - 1) & h]) == null) {
                     if (cellsBusy == 0) {            // Try to attach new Cell
                         CounterCell r = new CounterCell(x); // Optimistic create
                         if (cellsBusy == 0 &&
-                            U.compareAndSwapInt(this, CELLSBUSY, 0, 1)) {
+                            U.compareAndSetInt(this, CELLSBUSY, 0, 1)) {
                             boolean created = false;
                             try {               // Recheck under lock
                                 CounterCell[] rs; int m, j;
@@ -2620,21 +2615,17 @@
                 }
                 else if (!wasUncontended)       // CAS already known to fail
                     wasUncontended = true;      // Continue after rehash
-                else if (U.compareAndSwapLong(a, CELLVALUE, v = a.value, v + x))
+                else if (U.compareAndSetLong(c, CELLVALUE, v = c.value, v + x))
                     break;
-                else if (counterCells != as || n >= NCPU)
+                else if (counterCells != cs || n >= NCPU)
                     collide = false;            // At max size or stale
                 else if (!collide)
                     collide = true;
                 else if (cellsBusy == 0 &&
-                         U.compareAndSwapInt(this, CELLSBUSY, 0, 1)) {
+                         U.compareAndSetInt(this, CELLSBUSY, 0, 1)) {
                     try {
-                        if (counterCells == as) {// Expand table unless stale
-                            CounterCell[] rs = new CounterCell[n << 1];
-                            for (int i = 0; i < n; ++i)
-                                rs[i] = as[i];
-                            counterCells = rs;
-                        }
+                        if (counterCells == cs) // Expand table unless stale
+                            counterCells = Arrays.copyOf(cs, n << 1);
                     } finally {
                         cellsBusy = 0;
                     }
@@ -2643,11 +2634,11 @@
                 }
                 h = ThreadLocalRandom.advanceProbe(h);
             }
-            else if (cellsBusy == 0 && counterCells == as &&
-                     U.compareAndSwapInt(this, CELLSBUSY, 0, 1)) {
+            else if (cellsBusy == 0 && counterCells == cs &&
+                     U.compareAndSetInt(this, CELLSBUSY, 0, 1)) {
                 boolean init = false;
                 try {                           // Initialize table
-                    if (counterCells == as) {
+                    if (counterCells == cs) {
                         CounterCell[] rs = new CounterCell[2];
                         rs[h & 1] = new CounterCell(x);
                         counterCells = rs;
@@ -2659,7 +2650,7 @@
                 if (init)
                     break;
             }
-            else if (U.compareAndSwapLong(this, BASECOUNT, v = baseCount, v + x))
+            else if (U.compareAndSetLong(this, BASECOUNT, v = baseCount, v + x))
                 break;                          // Fall back on using base
         }
     }
@@ -2697,12 +2688,12 @@
     }
 
     /**
-     * Returns a list on non-TreeNodes replacing those in given list.
+     * Returns a list of non-TreeNodes replacing those in given list.
      */
     static <K,V> Node<K,V> untreeify(Node<K,V> b) {
         Node<K,V> hd = null, tl = null;
         for (Node<K,V> q = b; q != null; q = q.next) {
-            Node<K,V> p = new Node<K,V>(q.hash, q.key, q.val, null);
+            Node<K,V> p = new Node<K,V>(q.hash, q.key, q.val);
             if (tl == null)
                 hd = p;
             else
@@ -2808,7 +2799,7 @@
          * Creates bin with initial set of nodes headed by b.
          */
         TreeBin(TreeNode<K,V> b) {
-            super(TREEBIN, null, null, null);
+            super(TREEBIN, null, null);
             this.first = b;
             TreeNode<K,V> r = null;
             for (TreeNode<K,V> x = b, next; x != null; x = next) {
@@ -2855,7 +2846,7 @@
          * Acquires write lock for tree restructuring.
          */
         private final void lockRoot() {
-            if (!U.compareAndSwapInt(this, LOCKSTATE, 0, WRITER))
+            if (!U.compareAndSetInt(this, LOCKSTATE, 0, WRITER))
                 contendedLock(); // offload to separate method
         }
 
@@ -2873,14 +2864,14 @@
             boolean waiting = false;
             for (int s;;) {
                 if (((s = lockState) & ~WAITER) == 0) {
-                    if (U.compareAndSwapInt(this, LOCKSTATE, s, WRITER)) {
+                    if (U.compareAndSetInt(this, LOCKSTATE, s, WRITER)) {
                         if (waiting)
                             waiter = null;
                         return;
                     }
                 }
                 else if ((s & WAITER) == 0) {
-                    if (U.compareAndSwapInt(this, LOCKSTATE, s, s | WAITER)) {
+                    if (U.compareAndSetInt(this, LOCKSTATE, s, s | WAITER)) {
                         waiting = true;
                         waiter = Thread.currentThread();
                     }
@@ -2905,7 +2896,7 @@
                             return e;
                         e = e.next;
                     }
-                    else if (U.compareAndSwapInt(this, LOCKSTATE, s,
+                    else if (U.compareAndSetInt(this, LOCKSTATE, s,
                                                  s + READER)) {
                         TreeNode<K,V> r, p;
                         try {
@@ -3302,16 +3293,9 @@
             return true;
         }
 
-        private static final sun.misc.Unsafe U = sun.misc.Unsafe.getUnsafe();
-        private static final long LOCKSTATE;
-        static {
-            try {
-                LOCKSTATE = U.objectFieldOffset
-                    (TreeBin.class.getDeclaredField("lockState"));
-            } catch (ReflectiveOperationException e) {
-                throw new Error(e);
-            }
-        }
+        private static final Unsafe U = Unsafe.getUnsafe();
+        private static final long LOCKSTATE
+                = U.objectFieldOffset(TreeBin.class, "lockState");
     }
 
     /* ----------------Table Traversal -------------- */
@@ -3465,9 +3449,9 @@
 
     static final class KeyIterator<K,V> extends BaseIterator<K,V>
         implements Iterator<K>, Enumeration<K> {
-        KeyIterator(Node<K,V>[] tab, int index, int size, int limit,
+        KeyIterator(Node<K,V>[] tab, int size, int index, int limit,
                     ConcurrentHashMap<K,V> map) {
-            super(tab, index, size, limit, map);
+            super(tab, size, index, limit, map);
         }
 
         public final K next() {
@@ -3485,9 +3469,9 @@
 
     static final class ValueIterator<K,V> extends BaseIterator<K,V>
         implements Iterator<V>, Enumeration<V> {
-        ValueIterator(Node<K,V>[] tab, int index, int size, int limit,
+        ValueIterator(Node<K,V>[] tab, int size, int index, int limit,
                       ConcurrentHashMap<K,V> map) {
-            super(tab, index, size, limit, map);
+            super(tab, size, index, limit, map);
         }
 
         public final V next() {
@@ -3505,9 +3489,9 @@
 
     static final class EntryIterator<K,V> extends BaseIterator<K,V>
         implements Iterator<Map.Entry<K,V>> {
-        EntryIterator(Node<K,V>[] tab, int index, int size, int limit,
+        EntryIterator(Node<K,V>[] tab, int size, int index, int limit,
                       ConcurrentHashMap<K,V> map) {
-            super(tab, index, size, limit, map);
+            super(tab, size, index, limit, map);
         }
 
         public final Map.Entry<K,V> next() {
@@ -4558,14 +4542,24 @@
             return true;
         }
 
-        public final boolean removeAll(Collection<?> c) {
+        public boolean removeAll(Collection<?> c) {
             if (c == null) throw new NullPointerException();
             boolean modified = false;
-            for (Iterator<E> it = iterator(); it.hasNext();) {
-                if (c.contains(it.next())) {
-                    it.remove();
-                    modified = true;
+            // Use (c instanceof Set) as a hint that lookup in c is as
+            // efficient as this view
+            Node<K,V>[] t;
+            if ((t = map.table) == null) {
+                return false;
+            } else if (c instanceof Set<?> && c.size() > t.length) {
+                for (Iterator<?> it = iterator(); it.hasNext(); ) {
+                    if (c.contains(it.next())) {
+                        it.remove();
+                        modified = true;
+                    }
                 }
+            } else {
+                for (Object e : c)
+                    modified |= remove(e);
             }
             return modified;
         }
@@ -4751,6 +4745,18 @@
             throw new UnsupportedOperationException();
         }
 
+        @Override public boolean removeAll(Collection<?> c) {
+            if (c == null) throw new NullPointerException();
+            boolean modified = false;
+            for (Iterator<V> it = iterator(); it.hasNext();) {
+                if (c.contains(it.next())) {
+                    it.remove();
+                    modified = true;
+                }
+            }
+            return modified;
+        }
+
         public boolean removeIf(Predicate<? super V> filter) {
             return map.removeValueIf(filter);
         }
@@ -6344,7 +6350,7 @@
     }
 
     // Unsafe mechanics
-    private static final sun.misc.Unsafe U = sun.misc.Unsafe.getUnsafe();
+    private static final Unsafe U = Unsafe.getUnsafe();
     private static final long SIZECTL;
     private static final long TRANSFERINDEX;
     private static final long BASECOUNT;
@@ -6354,30 +6360,29 @@
     private static final int ASHIFT;
 
     static {
-        try {
-            SIZECTL = U.objectFieldOffset
-                (ConcurrentHashMap.class.getDeclaredField("sizeCtl"));
-            TRANSFERINDEX = U.objectFieldOffset
-                (ConcurrentHashMap.class.getDeclaredField("transferIndex"));
-            BASECOUNT = U.objectFieldOffset
-                (ConcurrentHashMap.class.getDeclaredField("baseCount"));
-            CELLSBUSY = U.objectFieldOffset
-                (ConcurrentHashMap.class.getDeclaredField("cellsBusy"));
+        SIZECTL = U.objectFieldOffset
+            (ConcurrentHashMap.class, "sizeCtl");
+        TRANSFERINDEX = U.objectFieldOffset
+            (ConcurrentHashMap.class, "transferIndex");
+        BASECOUNT = U.objectFieldOffset
+            (ConcurrentHashMap.class, "baseCount");
+        CELLSBUSY = U.objectFieldOffset
+            (ConcurrentHashMap.class, "cellsBusy");
 
-            CELLVALUE = U.objectFieldOffset
-                (CounterCell.class.getDeclaredField("value"));
+        CELLVALUE = U.objectFieldOffset
+            (CounterCell.class, "value");
 
-            ABASE = U.arrayBaseOffset(Node[].class);
-            int scale = U.arrayIndexScale(Node[].class);
-            if ((scale & (scale - 1)) != 0)
-                throw new Error("array index scale not a power of two");
-            ASHIFT = 31 - Integer.numberOfLeadingZeros(scale);
-        } catch (ReflectiveOperationException e) {
-            throw new Error(e);
-        }
+        ABASE = U.arrayBaseOffset(Node[].class);
+        int scale = U.arrayIndexScale(Node[].class);
+        if ((scale & (scale - 1)) != 0)
+            throw new ExceptionInInitializerError("array index scale not a power of two");
+        ASHIFT = 31 - Integer.numberOfLeadingZeros(scale);
 
         // Reduce the risk of rare disastrous classloading in first call to
         // LockSupport.park: https://bugs.openjdk.java.net/browse/JDK-8074773
         Class<?> ensureLoaded = LockSupport.class;
+
+        // Eager class load observed to help JIT during startup
+        ensureLoaded = ReservationNode.class;
     }
 }
diff --git a/ojluni/src/main/java/java/util/concurrent/ConcurrentLinkedDeque.java b/ojluni/src/main/java/java/util/concurrent/ConcurrentLinkedDeque.java
index 3edde54..91ddabd 100644
--- a/ojluni/src/main/java/java/util/concurrent/ConcurrentLinkedDeque.java
+++ b/ojluni/src/main/java/java/util/concurrent/ConcurrentLinkedDeque.java
@@ -35,6 +35,8 @@
 
 package java.util.concurrent;
 
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.VarHandle;
 import java.util.AbstractCollection;
 import java.util.Arrays;
 import java.util.Collection;
@@ -46,10 +48,7 @@
 import java.util.Spliterator;
 import java.util.Spliterators;
 import java.util.function.Consumer;
-
-// BEGIN android-note
-// removed link to collections framework docs
-// END android-note
+import java.util.function.Predicate;
 
 /**
  * An unbounded concurrent {@linkplain Deque deque} based on linked nodes.
@@ -68,12 +67,12 @@
  * asynchronous nature of these deques, determining the current number
  * of elements requires a traversal of the elements, and so may report
  * inaccurate results if this collection is modified during traversal.
- * Additionally, the bulk operations {@code addAll},
- * {@code removeAll}, {@code retainAll}, {@code containsAll},
- * {@code equals}, and {@code toArray} are <em>not</em> guaranteed
- * to be performed atomically. For example, an iterator operating
- * concurrently with an {@code addAll} operation might view only some
- * of the added elements.
+ *
+ * <p>Bulk operations that add, remove, or examine multiple elements,
+ * such as {@link #addAll}, {@link #removeIf} or {@link #forEach},
+ * are <em>not</em> guaranteed to be performed atomically.
+ * For example, a {@code forEach} traversal concurrent with an {@code
+ * addAll} operation might observe only some of the added elements.
  *
  * <p>This class and its iterator implement all of the <em>optional</em>
  * methods of the {@link Deque} and {@link Iterator} interfaces.
@@ -85,6 +84,10 @@
  * actions subsequent to the access or removal of that element from
  * the {@code ConcurrentLinkedDeque} in another thread.
  *
+ * <p>This class is a member of the
+ * <a href="{@docRoot}/java.base/java/util/package-summary.html#CollectionsFramework">
+ * Java Collections Framework</a>.
+ *
  * @since 1.7
  * @author Doug Lea
  * @author Martin Buchholz
@@ -228,15 +231,16 @@
      *
      * The implementation is completely directionally symmetrical,
      * except that most public methods that iterate through the list
-     * follow next pointers ("forward" direction).
+     * follow next pointers, in the "forward" direction.
      *
-     * We believe (without full proof) that all single-element deque
-     * operations (e.g., addFirst, peekLast, pollLast) are linearizable
-     * (see Herlihy and Shavit's book).  However, some combinations of
+     * We believe (without full proof) that all single-element Deque
+     * operations that operate directly at the two ends of the Deque
+     * (e.g., addFirst, peekLast, pollLast) are linearizable (see
+     * Herlihy and Shavit's book).  However, some combinations of
      * operations are known not to be linearizable.  In particular,
-     * when an addFirst(A) is racing with pollFirst() removing B, it is
-     * possible for an observer iterating over the elements to observe
-     * A B C and subsequently observe A C, even though no interior
+     * when an addFirst(A) is racing with pollFirst() removing B, it
+     * is possible for an observer iterating over the elements to
+     * observe first [A B C] and then [A C], even though no interior
      * removes are ever performed.  Nevertheless, iterators behave
      * reasonably, providing the "weakly consistent" guarantees.
      *
@@ -292,64 +296,23 @@
         volatile Node<E> prev;
         volatile E item;
         volatile Node<E> next;
+    }
 
-        Node() {  // default constructor for NEXT_TERMINATOR, PREV_TERMINATOR
-        }
-
-        /**
-         * Constructs a new node.  Uses relaxed write because item can
-         * only be seen after publication via casNext or casPrev.
-         */
-        Node(E item) {
-            U.putObject(this, ITEM, item);
-        }
-
-        boolean casItem(E cmp, E val) {
-            return U.compareAndSwapObject(this, ITEM, cmp, val);
-        }
-
-        void lazySetNext(Node<E> val) {
-            U.putOrderedObject(this, NEXT, val);
-        }
-
-        boolean casNext(Node<E> cmp, Node<E> val) {
-            return U.compareAndSwapObject(this, NEXT, cmp, val);
-        }
-
-        void lazySetPrev(Node<E> val) {
-            U.putOrderedObject(this, PREV, val);
-        }
-
-        boolean casPrev(Node<E> cmp, Node<E> val) {
-            return U.compareAndSwapObject(this, PREV, cmp, val);
-        }
-
-        // Unsafe mechanics
-
-        private static final sun.misc.Unsafe U = sun.misc.Unsafe.getUnsafe();
-        private static final long PREV;
-        private static final long ITEM;
-        private static final long NEXT;
-
-        static {
-            try {
-                PREV = U.objectFieldOffset
-                    (Node.class.getDeclaredField("prev"));
-                ITEM = U.objectFieldOffset
-                    (Node.class.getDeclaredField("item"));
-                NEXT = U.objectFieldOffset
-                    (Node.class.getDeclaredField("next"));
-            } catch (ReflectiveOperationException e) {
-                throw new Error(e);
-            }
-        }
+    /**
+     * Returns a new node holding item.  Uses relaxed write because item
+     * can only be seen after piggy-backing publication via CAS.
+     */
+    static <E> Node<E> newNode(E item) {
+        Node<E> node = new Node<E>();
+        ITEM.set(node, item);
+        return node;
     }
 
     /**
      * Links e as first element.
      */
     private void linkFirst(E e) {
-        final Node<E> newNode = new Node<E>(Objects.requireNonNull(e));
+        final Node<E> newNode = newNode(Objects.requireNonNull(e));
 
         restartFromHead:
         for (;;)
@@ -363,13 +326,13 @@
                     continue restartFromHead;
                 else {
                     // p is first node
-                    newNode.lazySetNext(p); // CAS piggyback
-                    if (p.casPrev(null, newNode)) {
+                    NEXT.set(newNode, p); // CAS piggyback
+                    if (PREV.compareAndSet(p, null, newNode)) {
                         // Successful CAS is the linearization point
                         // for e to become an element of this deque,
                         // and for newNode to become "live".
-                        if (p != h) // hop two nodes at a time
-                            casHead(h, newNode);  // Failure is OK.
+                        if (p != h) // hop two nodes at a time; failure is OK
+                            HEAD.weakCompareAndSet(this, h, newNode);
                         return;
                     }
                     // Lost CAS race to another thread; re-read prev
@@ -381,7 +344,7 @@
      * Links e as last element.
      */
     private void linkLast(E e) {
-        final Node<E> newNode = new Node<E>(Objects.requireNonNull(e));
+        final Node<E> newNode = newNode(Objects.requireNonNull(e));
 
         restartFromTail:
         for (;;)
@@ -395,13 +358,13 @@
                     continue restartFromTail;
                 else {
                     // p is last node
-                    newNode.lazySetPrev(p); // CAS piggyback
-                    if (p.casNext(null, newNode)) {
+                    PREV.set(newNode, p); // CAS piggyback
+                    if (NEXT.compareAndSet(p, null, newNode)) {
                         // Successful CAS is the linearization point
                         // for e to become an element of this deque,
                         // and for newNode to become "live".
-                        if (p != t) // hop two nodes at a time
-                            casTail(t, newNode);  // Failure is OK.
+                        if (p != t) // hop two nodes at a time; failure is OK
+                            TAIL.weakCompareAndSet(this, t, newNode);
                         return;
                     }
                     // Lost CAS race to another thread; re-read next
@@ -516,8 +479,8 @@
                 updateTail(); // Ensure x is not reachable from tail
 
                 // Finally, actually gc-unlink
-                x.lazySetPrev(isFirst ? prevTerminator() : x);
-                x.lazySetNext(isLast  ? nextTerminator() : x);
+                PREV.setRelease(x, isFirst ? prevTerminator() : x);
+                NEXT.setRelease(x, isLast  ? nextTerminator() : x);
             }
         }
     }
@@ -531,7 +494,8 @@
         // assert first.item == null;
         for (Node<E> o = null, p = next, q;;) {
             if (p.item != null || (q = p.next) == null) {
-                if (o != null && p.prev != p && first.casNext(next, p)) {
+                if (o != null && p.prev != p &&
+                    NEXT.compareAndSet(first, next, p)) {
                     skipDeletedPredecessors(p);
                     if (first.prev == null &&
                         (p.next == null || p.item != null) &&
@@ -541,8 +505,8 @@
                         updateTail(); // Ensure o is not reachable from tail
 
                         // Finally, actually gc-unlink
-                        o.lazySetNext(o);
-                        o.lazySetPrev(prevTerminator());
+                        NEXT.setRelease(o, o);
+                        PREV.setRelease(o, prevTerminator());
                     }
                 }
                 return;
@@ -565,7 +529,8 @@
         // assert last.item == null;
         for (Node<E> o = null, p = prev, q;;) {
             if (p.item != null || (q = p.prev) == null) {
-                if (o != null && p.next != p && last.casPrev(prev, p)) {
+                if (o != null && p.next != p &&
+                    PREV.compareAndSet(last, prev, p)) {
                     skipDeletedSuccessors(p);
                     if (last.next == null &&
                         (p.prev == null || p.item != null) &&
@@ -575,8 +540,8 @@
                         updateTail(); // Ensure o is not reachable from tail
 
                         // Finally, actually gc-unlink
-                        o.lazySetPrev(o);
-                        o.lazySetNext(nextTerminator());
+                        PREV.setRelease(o, o);
+                        NEXT.setRelease(o, nextTerminator());
                     }
                 }
                 return;
@@ -607,7 +572,7 @@
                     (q = (p = q).prev) == null) {
                     // It is possible that p is PREV_TERMINATOR,
                     // but if so, the CAS is guaranteed to fail.
-                    if (casHead(h, p))
+                    if (HEAD.compareAndSet(this, h, p))
                         return;
                     else
                         continue restartFromHead;
@@ -637,7 +602,7 @@
                     (q = (p = q).next) == null) {
                     // It is possible that p is NEXT_TERMINATOR,
                     // but if so, the CAS is guaranteed to fail.
-                    if (casTail(t, p))
+                    if (TAIL.compareAndSet(this, t, p))
                         return;
                     else
                         continue restartFromTail;
@@ -675,7 +640,7 @@
             }
 
             // found active CAS target
-            if (prev == p || x.casPrev(prev, p))
+            if (prev == p || PREV.compareAndSet(x, prev, p))
                 return;
 
         } while (x.item != null || x.next == null);
@@ -706,7 +671,7 @@
             }
 
             // found active CAS target
-            if (next == p || x.casNext(next, p))
+            if (next == p || NEXT.compareAndSet(x, next, p))
                 return;
 
         } while (x.item != null || x.prev == null);
@@ -719,8 +684,9 @@
      */
     final Node<E> succ(Node<E> p) {
         // TODO: should we skip deleted nodes here?
-        Node<E> q = p.next;
-        return (p == q) ? first() : q;
+        if (p == (p = p.next))
+            p = first();
+        return p;
     }
 
     /**
@@ -729,8 +695,9 @@
      * stale pointer that is now off the list.
      */
     final Node<E> pred(Node<E> p) {
-        Node<E> q = p.prev;
-        return (p == q) ? last() : q;
+        if (p == (p = p.prev))
+            p = last();
+        return p;
     }
 
     /**
@@ -751,7 +718,7 @@
                 else if (p == h
                          // It is possible that p is PREV_TERMINATOR,
                          // but if so, the CAS is guaranteed to fail.
-                         || casHead(h, p))
+                         || HEAD.compareAndSet(this, h, p))
                     return p;
                 else
                     continue restartFromHead;
@@ -776,7 +743,7 @@
                 else if (p == t
                          // It is possible that p is NEXT_TERMINATOR,
                          // but if so, the CAS is guaranteed to fail.
-                         || casTail(t, p))
+                         || TAIL.compareAndSet(this, t, p))
                     return p;
                 else
                     continue restartFromTail;
@@ -802,7 +769,7 @@
      * Constructs an empty deque.
      */
     public ConcurrentLinkedDeque() {
-        head = tail = new Node<E>(null);
+        head = tail = new Node<E>();
     }
 
     /**
@@ -818,12 +785,12 @@
         // Copy c into a private chain of Nodes
         Node<E> h = null, t = null;
         for (E e : c) {
-            Node<E> newNode = new Node<E>(Objects.requireNonNull(e));
+            Node<E> newNode = newNode(Objects.requireNonNull(e));
             if (h == null)
                 h = t = newNode;
             else {
-                t.lazySetNext(newNode);
-                newNode.lazySetPrev(t);
+                NEXT.set(t, newNode);
+                PREV.set(newNode, t);
                 t = newNode;
             }
         }
@@ -836,12 +803,12 @@
     private void initHeadTail(Node<E> h, Node<E> t) {
         if (h == t) {
             if (h == null)
-                h = t = new Node<E>(null);
+                h = t = new Node<E>();
             else {
                 // Avoid edge case of a single Node with non-null item.
-                Node<E> newNode = new Node<E>(null);
-                t.lazySetNext(newNode);
-                newNode.lazySetPrev(t);
+                Node<E> newNode = new Node<E>();
+                NEXT.set(t, newNode);
+                PREV.set(newNode, t);
                 t = newNode;
             }
         }
@@ -900,21 +867,33 @@
     }
 
     public E peekFirst() {
-        for (Node<E> p = first(); p != null; p = succ(p)) {
-            E item = p.item;
-            if (item != null)
-                return item;
+        restart: for (;;) {
+            E item;
+            Node<E> first = first(), p = first;
+            while ((item = p.item) == null) {
+                if (p == (p = p.next)) continue restart;
+                if (p == null)
+                    break;
+            }
+            // recheck for linearizability
+            if (first.prev != null) continue restart;
+            return item;
         }
-        return null;
     }
 
     public E peekLast() {
-        for (Node<E> p = last(); p != null; p = pred(p)) {
-            E item = p.item;
-            if (item != null)
-                return item;
+        restart: for (;;) {
+            E item;
+            Node<E> last = last(), p = last;
+            while ((item = p.item) == null) {
+                if (p == (p = p.prev)) continue restart;
+                if (p == null)
+                    break;
+            }
+            // recheck for linearizability
+            if (last.next != null) continue restart;
+            return item;
         }
-        return null;
     }
 
     /**
@@ -932,25 +911,45 @@
     }
 
     public E pollFirst() {
-        for (Node<E> p = first(); p != null; p = succ(p)) {
-            E item = p.item;
-            if (item != null && p.casItem(item, null)) {
-                unlink(p);
-                return item;
+        restart: for (;;) {
+            for (Node<E> first = first(), p = first;;) {
+                final E item;
+                if ((item = p.item) != null) {
+                    // recheck for linearizability
+                    if (first.prev != null) continue restart;
+                    if (ITEM.compareAndSet(p, item, null)) {
+                        unlink(p);
+                        return item;
+                    }
+                }
+                if (p == (p = p.next)) continue restart;
+                if (p == null) {
+                    if (first.prev != null) continue restart;
+                    return null;
+                }
             }
         }
-        return null;
     }
 
     public E pollLast() {
-        for (Node<E> p = last(); p != null; p = pred(p)) {
-            E item = p.item;
-            if (item != null && p.casItem(item, null)) {
-                unlink(p);
-                return item;
+        restart: for (;;) {
+            for (Node<E> last = last(), p = last;;) {
+                final E item;
+                if ((item = p.item) != null) {
+                    // recheck for linearizability
+                    if (last.next != null) continue restart;
+                    if (ITEM.compareAndSet(p, item, null)) {
+                        unlink(p);
+                        return item;
+                    }
+                }
+                if (p == (p = p.prev)) continue restart;
+                if (p == null) {
+                    if (last.next != null) continue restart;
+                    return null;
+                }
             }
         }
-        return null;
     }
 
     /**
@@ -1030,8 +1029,10 @@
     public boolean removeFirstOccurrence(Object o) {
         Objects.requireNonNull(o);
         for (Node<E> p = first(); p != null; p = succ(p)) {
-            E item = p.item;
-            if (item != null && o.equals(item) && p.casItem(item, null)) {
+            final E item;
+            if ((item = p.item) != null
+                && o.equals(item)
+                && ITEM.compareAndSet(p, item, null)) {
                 unlink(p);
                 return true;
             }
@@ -1054,8 +1055,10 @@
     public boolean removeLastOccurrence(Object o) {
         Objects.requireNonNull(o);
         for (Node<E> p = last(); p != null; p = pred(p)) {
-            E item = p.item;
-            if (item != null && o.equals(item) && p.casItem(item, null)) {
+            final E item;
+            if ((item = p.item) != null
+                && o.equals(item)
+                && ITEM.compareAndSet(p, item, null)) {
                 unlink(p);
                 return true;
             }
@@ -1074,8 +1077,8 @@
     public boolean contains(Object o) {
         if (o != null) {
             for (Node<E> p = first(); p != null; p = succ(p)) {
-                E item = p.item;
-                if (item != null && o.equals(item))
+                final E item;
+                if ((item = p.item) != null && o.equals(item))
                     return true;
             }
         }
@@ -1108,14 +1111,14 @@
      * @return the number of elements in this deque
      */
     public int size() {
-        restartFromHead: for (;;) {
+        restart: for (;;) {
             int count = 0;
             for (Node<E> p = first(); p != null;) {
                 if (p.item != null)
                     if (++count == Integer.MAX_VALUE)
                         break;  // @see Collection.size()
                 if (p == (p = p.next))
-                    continue restartFromHead;
+                    continue restart;
             }
             return count;
         }
@@ -1159,12 +1162,12 @@
         // Copy c into a private chain of Nodes
         Node<E> beginningOfTheEnd = null, last = null;
         for (E e : c) {
-            Node<E> newNode = new Node<E>(Objects.requireNonNull(e));
+            Node<E> newNode = newNode(Objects.requireNonNull(e));
             if (beginningOfTheEnd == null)
                 beginningOfTheEnd = last = newNode;
             else {
-                last.lazySetNext(newNode);
-                newNode.lazySetPrev(last);
+                NEXT.set(last, newNode);
+                PREV.set(newNode, last);
                 last = newNode;
             }
         }
@@ -1184,16 +1187,16 @@
                     continue restartFromTail;
                 else {
                     // p is last node
-                    beginningOfTheEnd.lazySetPrev(p); // CAS piggyback
-                    if (p.casNext(null, beginningOfTheEnd)) {
+                    PREV.set(beginningOfTheEnd, p); // CAS piggyback
+                    if (NEXT.compareAndSet(p, null, beginningOfTheEnd)) {
                         // Successful CAS is the linearization point
                         // for all elements to be added to this deque.
-                        if (!casTail(t, last)) {
+                        if (!TAIL.weakCompareAndSet(this, t, last)) {
                             // Try a little harder to update tail,
                             // since we may be adding many elements.
                             t = tail;
                             if (last.next == null)
-                                casTail(t, last);
+                                TAIL.weakCompareAndSet(this, t, last);
                         }
                         return true;
                     }
@@ -1212,12 +1215,12 @@
 
     public String toString() {
         String[] a = null;
-        restartFromHead: for (;;) {
+        restart: for (;;) {
             int charLength = 0;
             int size = 0;
             for (Node<E> p = first(); p != null;) {
-                E item = p.item;
-                if (item != null) {
+                final E item;
+                if ((item = p.item) != null) {
                     if (a == null)
                         a = new String[4];
                     else if (size == a.length)
@@ -1227,7 +1230,7 @@
                     charLength += s.length();
                 }
                 if (p == (p = p.next))
-                    continue restartFromHead;
+                    continue restart;
             }
 
             if (size == 0)
@@ -1239,11 +1242,11 @@
 
     private Object[] toArrayInternal(Object[] a) {
         Object[] x = a;
-        restartFromHead: for (;;) {
+        restart: for (;;) {
             int size = 0;
             for (Node<E> p = first(); p != null;) {
-                E item = p.item;
-                if (item != null) {
+                final E item;
+                if ((item = p.item) != null) {
                     if (x == null)
                         x = new Object[4];
                     else if (size == x.length)
@@ -1251,7 +1254,7 @@
                     x[size++] = item;
                 }
                 if (p == (p = p.next))
-                    continue restartFromHead;
+                    continue restart;
             }
             if (x == null)
                 return new Object[0];
@@ -1395,8 +1398,8 @@
                     nextItem = null;
                     break;
                 }
-                E item = p.item;
-                if (item != null) {
+                final E item;
+                if ((item = p.item) != null) {
                     nextNode = p;
                     nextItem = item;
                     break;
@@ -1426,90 +1429,75 @@
 
     /** Forward iterator */
     private class Itr extends AbstractItr {
+        Itr() {}                        // prevent access constructor creation
         Node<E> startNode() { return first(); }
         Node<E> nextNode(Node<E> p) { return succ(p); }
     }
 
     /** Descending iterator */
     private class DescendingItr extends AbstractItr {
+        DescendingItr() {}              // prevent access constructor creation
         Node<E> startNode() { return last(); }
         Node<E> nextNode(Node<E> p) { return pred(p); }
     }
 
     /** A customized variant of Spliterators.IteratorSpliterator */
-    static final class CLDSpliterator<E> implements Spliterator<E> {
+    final class CLDSpliterator implements Spliterator<E> {
         static final int MAX_BATCH = 1 << 25;  // max batch array size;
-        final ConcurrentLinkedDeque<E> queue;
         Node<E> current;    // current node; null until initialized
         int batch;          // batch size for splits
         boolean exhausted;  // true when no more nodes
-        CLDSpliterator(ConcurrentLinkedDeque<E> queue) {
-            this.queue = queue;
-        }
 
         public Spliterator<E> trySplit() {
-            Node<E> p;
-            final ConcurrentLinkedDeque<E> q = this.queue;
-            int b = batch;
-            int n = (b <= 0) ? 1 : (b >= MAX_BATCH) ? MAX_BATCH : b + 1;
-            if (!exhausted &&
-                ((p = current) != null || (p = q.first()) != null)) {
-                if (p.item == null && p == (p = p.next))
-                    current = p = q.first();
-                if (p != null && p.next != null) {
-                    Object[] a = new Object[n];
-                    int i = 0;
-                    do {
-                        if ((a[i] = p.item) != null)
-                            ++i;
-                        if (p == (p = p.next))
-                            p = q.first();
-                    } while (p != null && i < n);
-                    if ((current = p) == null)
-                        exhausted = true;
-                    if (i > 0) {
-                        batch = i;
-                        return Spliterators.spliterator
-                            (a, 0, i, (Spliterator.ORDERED |
-                                       Spliterator.NONNULL |
-                                       Spliterator.CONCURRENT));
-                    }
+            Node<E> p, q;
+            if ((p = current()) == null || (q = p.next) == null)
+                return null;
+            int i = 0, n = batch = Math.min(batch + 1, MAX_BATCH);
+            Object[] a = null;
+            do {
+                final E e;
+                if ((e = p.item) != null) {
+                    if (a == null)
+                        a = new Object[n];
+                    a[i++] = e;
                 }
-            }
-            return null;
+                if (p == (p = q))
+                    p = first();
+            } while (p != null && (q = p.next) != null && i < n);
+            setCurrent(p);
+            return (i == 0) ? null :
+                Spliterators.spliterator(a, 0, i, (Spliterator.ORDERED |
+                                                   Spliterator.NONNULL |
+                                                   Spliterator.CONCURRENT));
         }
 
         public void forEachRemaining(Consumer<? super E> action) {
+            Objects.requireNonNull(action);
             Node<E> p;
-            if (action == null) throw new NullPointerException();
-            final ConcurrentLinkedDeque<E> q = this.queue;
-            if (!exhausted &&
-                ((p = current) != null || (p = q.first()) != null)) {
+            if ((p = current()) != null) {
+                current = null;
                 exhausted = true;
                 do {
-                    E e = p.item;
-                    if (p == (p = p.next))
-                        p = q.first();
-                    if (e != null)
+                    final E e;
+                    if ((e = p.item) != null)
                         action.accept(e);
+                    if (p == (p = p.next))
+                        p = first();
                 } while (p != null);
             }
         }
 
         public boolean tryAdvance(Consumer<? super E> action) {
+            Objects.requireNonNull(action);
             Node<E> p;
-            if (action == null) throw new NullPointerException();
-            final ConcurrentLinkedDeque<E> q = this.queue;
-            if (!exhausted &&
-                ((p = current) != null || (p = q.first()) != null)) {
+            if ((p = current()) != null) {
                 E e;
                 do {
                     e = p.item;
                     if (p == (p = p.next))
-                        p = q.first();
+                        p = first();
                 } while (e == null && p != null);
-                if ((current = p) == null)
-                    exhausted = true;
+                setCurrent(p);
                 if (e != null) {
                     action.accept(e);
                     return true;
@@ -1518,11 +1506,24 @@
             return false;
         }
 
+        private void setCurrent(Node<E> p) {
+            if ((current = p) == null)
+                exhausted = true;
+        }
+
+        private Node<E> current() {
+            Node<E> p;
+            if ((p = current) == null && !exhausted)
+                setCurrent(p = first());
+            return p;
+        }
+
         public long estimateSize() { return Long.MAX_VALUE; }
 
         public int characteristics() {
-            return Spliterator.ORDERED | Spliterator.NONNULL |
-                Spliterator.CONCURRENT;
+            return (Spliterator.ORDERED |
+                    Spliterator.NONNULL |
+                    Spliterator.CONCURRENT);
         }
     }
 
@@ -1543,7 +1544,7 @@
      * @since 1.8
      */
     public Spliterator<E> spliterator() {
-        return new CLDSpliterator<E>(this);
+        return new CLDSpliterator();
     }
 
     /**
@@ -1562,8 +1563,8 @@
 
         // Write out all elements in the proper order.
         for (Node<E> p = first(); p != null; p = succ(p)) {
-            E item = p.item;
-            if (item != null)
+            final E item;
+            if ((item = p.item) != null)
                 s.writeObject(item);
         }
 
@@ -1586,43 +1587,91 @@
         Node<E> h = null, t = null;
         for (Object item; (item = s.readObject()) != null; ) {
             @SuppressWarnings("unchecked")
-            Node<E> newNode = new Node<E>((E) item);
+            Node<E> newNode = newNode((E) item);
             if (h == null)
                 h = t = newNode;
             else {
-                t.lazySetNext(newNode);
-                newNode.lazySetPrev(t);
+                NEXT.set(t, newNode);
+                PREV.set(newNode, t);
                 t = newNode;
             }
         }
         initHeadTail(h, t);
     }
 
-    private boolean casHead(Node<E> cmp, Node<E> val) {
-        return U.compareAndSwapObject(this, HEAD, cmp, val);
+    /**
+     * @throws NullPointerException {@inheritDoc}
+     */
+    public boolean removeIf(Predicate<? super E> filter) {
+        Objects.requireNonNull(filter);
+        return bulkRemove(filter);
     }
 
-    private boolean casTail(Node<E> cmp, Node<E> val) {
-        return U.compareAndSwapObject(this, TAIL, cmp, val);
+    /**
+     * @throws NullPointerException {@inheritDoc}
+     */
+    public boolean removeAll(Collection<?> c) {
+        Objects.requireNonNull(c);
+        return bulkRemove(e -> c.contains(e));
     }
 
-    // Unsafe mechanics
+    /**
+     * @throws NullPointerException {@inheritDoc}
+     */
+    public boolean retainAll(Collection<?> c) {
+        Objects.requireNonNull(c);
+        return bulkRemove(e -> !c.contains(e));
+    }
 
-    private static final sun.misc.Unsafe U = sun.misc.Unsafe.getUnsafe();
-    private static final long HEAD;
-    private static final long TAIL;
+    /** Implementation of bulk remove methods. */
+    private boolean bulkRemove(Predicate<? super E> filter) {
+        boolean removed = false;
+        for (Node<E> p = first(), succ; p != null; p = succ) {
+            succ = succ(p);
+            final E item;
+            if ((item = p.item) != null
+                && filter.test(item)
+                && ITEM.compareAndSet(p, item, null)) {
+                unlink(p);
+                removed = true;
+            }
+        }
+        return removed;
+    }
+
+    /**
+     * @throws NullPointerException {@inheritDoc}
+     */
+    public void forEach(Consumer<? super E> action) {
+        Objects.requireNonNull(action);
+        E item;
+        for (Node<E> p = first(); p != null; p = succ(p))
+            if ((item = p.item) != null)
+                action.accept(item);
+    }
+
+    // VarHandle mechanics
+    private static final VarHandle HEAD;
+    private static final VarHandle TAIL;
+    private static final VarHandle PREV;
+    private static final VarHandle NEXT;
+    private static final VarHandle ITEM;
     static {
         PREV_TERMINATOR = new Node<Object>();
         PREV_TERMINATOR.next = PREV_TERMINATOR;
         NEXT_TERMINATOR = new Node<Object>();
         NEXT_TERMINATOR.prev = NEXT_TERMINATOR;
         try {
-            HEAD = U.objectFieldOffset
-                (ConcurrentLinkedDeque.class.getDeclaredField("head"));
-            TAIL = U.objectFieldOffset
-                (ConcurrentLinkedDeque.class.getDeclaredField("tail"));
+            MethodHandles.Lookup l = MethodHandles.lookup();
+            HEAD = l.findVarHandle(ConcurrentLinkedDeque.class, "head",
+                                   Node.class);
+            TAIL = l.findVarHandle(ConcurrentLinkedDeque.class, "tail",
+                                   Node.class);
+            PREV = l.findVarHandle(Node.class, "prev", Node.class);
+            NEXT = l.findVarHandle(Node.class, "next", Node.class);
+            ITEM = l.findVarHandle(Node.class, "item", Object.class);
         } catch (ReflectiveOperationException e) {
-            throw new Error(e);
+            throw new ExceptionInInitializerError(e);
         }
     }
 }
diff --git a/ojluni/src/main/java/java/util/concurrent/ConcurrentLinkedQueue.java b/ojluni/src/main/java/java/util/concurrent/ConcurrentLinkedQueue.java
index 7997c60..eac8ee7 100644
--- a/ojluni/src/main/java/java/util/concurrent/ConcurrentLinkedQueue.java
+++ b/ojluni/src/main/java/java/util/concurrent/ConcurrentLinkedQueue.java
@@ -35,6 +35,8 @@
 
 package java.util.concurrent;
 
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.VarHandle;
 import java.util.AbstractQueue;
 import java.util.Arrays;
 import java.util.Collection;
@@ -45,10 +47,7 @@
 import java.util.Spliterator;
 import java.util.Spliterators;
 import java.util.function.Consumer;
-
-// BEGIN android-note
-// removed link to collections framework docs
-// END android-note
+import java.util.function.Predicate;
 
 /**
  * An unbounded thread-safe {@linkplain Queue queue} based on linked nodes.
@@ -82,12 +81,12 @@
  * asynchronous nature of these queues, determining the current number
  * of elements requires a traversal of the elements, and so may report
  * inaccurate results if this collection is modified during traversal.
- * Additionally, the bulk operations {@code addAll},
- * {@code removeAll}, {@code retainAll}, {@code containsAll},
- * {@code equals}, and {@code toArray} are <em>not</em> guaranteed
- * to be performed atomically. For example, an iterator operating
- * concurrently with an {@code addAll} operation might view only some
- * of the added elements.
+ *
+ * <p>Bulk operations that add, remove, or examine multiple elements,
+ * such as {@link #addAll}, {@link #removeIf} or {@link #forEach},
+ * are <em>not</em> guaranteed to be performed atomically.
+ * For example, a {@code forEach} traversal concurrent with an {@code
+ * addAll} operation might observe only some of the added elements.
  *
  * <p>This class and its iterator implement all of the <em>optional</em>
  * methods of the {@link Queue} and {@link Iterator} interfaces.
@@ -99,6 +98,10 @@
  * actions subsequent to the access or removal of that element from
  * the {@code ConcurrentLinkedQueue} in another thread.
  *
+ * <p>This class is a member of the
+ * <a href="{@docRoot}/java.base/java/util/package-summary.html#CollectionsFramework">
+ * Java Collections Framework</a>.
+ *
  * @since 1.5
  * @author Doug Lea
  * @param <E> the type of elements held in this queue
@@ -110,7 +113,7 @@
     /*
      * This is a modification of the Michael & Scott algorithm,
      * adapted for a garbage-collected environment, with support for
-     * interior node deletion (to support remove(Object)).  For
+     * interior node deletion (to support e.g. remove(Object)).  For
      * explanation, read the paper.
      *
      * Note that like most non-blocking algorithms in this package,
@@ -158,17 +161,17 @@
      * it is possible for tail to lag behind head (why not)?
      *
      * CASing a Node's item reference to null atomically removes the
-     * element from the queue.  Iterators skip over Nodes with null
-     * items.  Prior implementations of this class had a race between
-     * poll() and remove(Object) where the same element would appear
-     * to be successfully removed by two concurrent operations.  The
-     * method remove(Object) also lazily unlinks deleted Nodes, but
-     * this is merely an optimization.
+     * element from the queue, leaving a "dead" node that should later
+     * be unlinked (but unlinking is merely an optimization).
+     * Interior element removal methods (other than Iterator.remove())
+     * keep track of the predecessor node during traversal so that the
+     * node can be CAS-unlinked.  Some traversal methods try to unlink
+     * any deleted nodes encountered during traversal.  See comments
+     * in bulkRemove.
      *
      * When constructing a Node (before enqueuing it) we avoid paying
-     * for a volatile write to item by using Unsafe.putObject instead
-     * of a normal write.  This allows the cost of enqueue to be
-     * "one-and-a-half" CASes.
+     * for a volatile write to item.  This allows the cost of enqueue
+     * to be "one-and-a-half" CASes.
      *
      * Both head and tail may or may not point to a Node with a
      * non-null item.  If the queue is empty, all items must of course
@@ -178,31 +181,33 @@
      * optimization.
      */
 
-    private static class Node<E> {
+    static final class Node<E> {
         volatile E item;
         volatile Node<E> next;
-    }
 
-    /**
-     * Returns a new node holding item.  Uses relaxed write because item
-     * can only be seen after piggy-backing publication via casNext.
-     */
-    static <E> Node<E> newNode(E item) {
-        Node<E> node = new Node<E>();
-        U.putObject(node, ITEM, item);
-        return node;
-    }
+        /**
+         * Constructs a node holding item.  Uses relaxed write because
+         * item can only be seen after piggy-backing publication via CAS.
+         */
+        Node(E item) {
+            ITEM.set(this, item);
+        }
 
-    static <E> boolean casItem(Node<E> node, E cmp, E val) {
-        return U.compareAndSwapObject(node, ITEM, cmp, val);
-    }
+        /** Constructs a dead dummy node. */
+        Node() {}
 
-    static <E> void lazySetNext(Node<E> node, Node<E> val) {
-        U.putOrderedObject(node, NEXT, val);
-    }
+        void appendRelaxed(Node<E> next) {
+            // assert next != null;
+            // assert this.next == null;
+            NEXT.set(this, next);
+        }
 
-    static <E> boolean casNext(Node<E> node, Node<E> cmp, Node<E> val) {
-        return U.compareAndSwapObject(node, NEXT, cmp, val);
+        boolean casItem(E cmp, E val) {
+            // assert item == cmp || item == null;
+            // assert cmp != null;
+            // assert val == null;
+            return ITEM.compareAndSet(this, cmp, val);
+        }
     }
 
     /**
@@ -229,7 +234,7 @@
      * - tail.item may or may not be null.
      * - it is permitted for tail to lag behind head, that is, for tail
      *   to not be reachable from head!
-     * - tail.next may or may not be self-pointing to tail.
+     * - tail.next may or may not be self-linked.
      */
     private transient volatile Node<E> tail;
 
@@ -237,7 +242,7 @@
      * Creates a {@code ConcurrentLinkedQueue} that is initially empty.
      */
     public ConcurrentLinkedQueue() {
-        head = tail = newNode(null);
+        head = tail = new Node<E>();
     }
 
     /**
@@ -252,16 +257,14 @@
     public ConcurrentLinkedQueue(Collection<? extends E> c) {
         Node<E> h = null, t = null;
         for (E e : c) {
-            Node<E> newNode = newNode(Objects.requireNonNull(e));
+            Node<E> newNode = new Node<E>(Objects.requireNonNull(e));
             if (h == null)
                 h = t = newNode;
-            else {
-                lazySetNext(t, newNode);
-                t = newNode;
-            }
+            else
+                t.appendRelaxed(t = newNode);
         }
         if (h == null)
-            h = t = newNode(null);
+            h = t = new Node<E>();
         head = h;
         tail = t;
     }
@@ -286,8 +289,8 @@
      */
     final void updateHead(Node<E> h, Node<E> p) {
         // assert h != null && p != null && (h == p || h.item == null);
-        if (h != p && casHead(h, p))
-            lazySetNext(h, h);
+        if (h != p && HEAD.compareAndSet(this, h, p))
+            NEXT.setRelease(h, h);
     }
 
     /**
@@ -296,8 +299,49 @@
      * stale pointer that is now off the list.
      */
     final Node<E> succ(Node<E> p) {
-        Node<E> next = p.next;
-        return (p == next) ? head : next;
+        if (p == (p = p.next))
+            p = head;
+        return p;
+    }
+
+    /**
+     * Tries to CAS pred.next (or head, if pred is null) from c to p.
+     * Caller must ensure that we're not unlinking the trailing node.
+     */
+    private boolean tryCasSuccessor(Node<E> pred, Node<E> c, Node<E> p) {
+        // assert p != null;
+        // assert c.item == null;
+        // assert c != p;
+        if (pred != null)
+            return NEXT.compareAndSet(pred, c, p);
+        if (HEAD.compareAndSet(this, c, p)) {
+            NEXT.setRelease(c, c);
+            return true;
+        }
+        return false;
+    }
+
+    /**
+     * Collapse dead nodes between pred and q.
+     * @param pred the last known live node, or null if none
+     * @param c the first dead node
+     * @param p the last dead node
+     * @param q p.next: the next live node, or null if at end
+     * @return either old pred or p if pred dead or CAS failed
+     */
+    private Node<E> skipDeadNodes(Node<E> pred, Node<E> c, Node<E> p, Node<E> q) {
+        // assert pred != c;
+        // assert p != q;
+        // assert c.item == null;
+        // assert p.item == null;
+        if (q == null) {
+            // Never unlink trailing node.
+            if (c == p) return pred;
+            q = p;
+        }
+        return (tryCasSuccessor(pred, c, q)
+                && (pred == null || ITEM.get(pred) != null))
+            ? pred : p;
     }
 
     /**
@@ -308,18 +352,18 @@
      * @throws NullPointerException if the specified element is null
      */
     public boolean offer(E e) {
-        final Node<E> newNode = newNode(Objects.requireNonNull(e));
+        final Node<E> newNode = new Node<E>(Objects.requireNonNull(e));
 
         for (Node<E> t = tail, p = t;;) {
             Node<E> q = p.next;
             if (q == null) {
                 // p is last node
-                if (casNext(p, null, newNode)) {
+                if (NEXT.compareAndSet(p, null, newNode)) {
                     // Successful CAS is the linearization point
                     // for e to become an element of this queue,
                     // and for newNode to become "live".
-                    if (p != t) // hop two nodes at a time
-                        casTail(t, newNode);  // Failure is OK.
+                    if (p != t) // hop two nodes at a time; failure is OK
+                        TAIL.weakCompareAndSet(this, t, newNode);
                     return true;
                 }
                 // Lost CAS race to another thread; re-read next
@@ -337,12 +381,10 @@
     }
 
     public E poll() {
-        restartFromHead:
-        for (;;) {
-            for (Node<E> h = head, p = h, q;;) {
-                E item = p.item;
-
-                if (item != null && casItem(p, item, null)) {
+        restartFromHead: for (;;) {
+            for (Node<E> h = head, p = h, q;; p = q) {
+                final E item;
+                if ((item = p.item) != null && p.casItem(item, null)) {
                     // Successful CAS is the linearization point
                     // for item to be removed from this queue.
                     if (p != h) // hop two nodes at a time
@@ -355,25 +397,21 @@
                 }
                 else if (p == q)
                     continue restartFromHead;
-                else
-                    p = q;
             }
         }
     }
 
     public E peek() {
-        restartFromHead:
-        for (;;) {
-            for (Node<E> h = head, p = h, q;;) {
-                E item = p.item;
-                if (item != null || (q = p.next) == null) {
+        restartFromHead: for (;;) {
+            for (Node<E> h = head, p = h, q;; p = q) {
+                final E item;
+                if ((item = p.item) != null
+                    || (q = p.next) == null) {
                     updateHead(h, p);
                     return item;
                 }
                 else if (p == q)
                     continue restartFromHead;
-                else
-                    p = q;
             }
         }
     }
@@ -387,9 +425,8 @@
      * of losing a race to a concurrent poll().
      */
     Node<E> first() {
-        restartFromHead:
-        for (;;) {
-            for (Node<E> h = head, p = h, q;;) {
+        restartFromHead: for (;;) {
+            for (Node<E> h = head, p = h, q;; p = q) {
                 boolean hasItem = (p.item != null);
                 if (hasItem || (q = p.next) == null) {
                     updateHead(h, p);
@@ -397,8 +434,6 @@
                 }
                 else if (p == q)
                     continue restartFromHead;
-                else
-                    p = q;
             }
         }
     }
@@ -451,14 +486,25 @@
      * @return {@code true} if this queue contains the specified element
      */
     public boolean contains(Object o) {
-        if (o != null) {
-            for (Node<E> p = first(); p != null; p = succ(p)) {
-                E item = p.item;
-                if (item != null && o.equals(item))
-                    return true;
+        if (o == null) return false;
+        restartFromHead: for (;;) {
+            for (Node<E> p = head, pred = null; p != null; ) {
+                Node<E> q = p.next;
+                final E item;
+                if ((item = p.item) != null) {
+                    if (o.equals(item))
+                        return true;
+                    pred = p; p = q; continue;
+                }
+                for (Node<E> c = p;; q = p.next) {
+                    if (q == null || q.item != null) {
+                        pred = skipDeadNodes(pred, c, p, q); p = q; break;
+                    }
+                    if (p == (p = q)) continue restartFromHead;
+                }
             }
+            return false;
         }
-        return false;
     }
 
     /**
@@ -473,27 +519,27 @@
      * @return {@code true} if this queue changed as a result of the call
      */
     public boolean remove(Object o) {
-        if (o != null) {
-            Node<E> next, pred = null;
-            for (Node<E> p = first(); p != null; pred = p, p = next) {
-                boolean removed = false;
-                E item = p.item;
-                if (item != null) {
-                    if (!o.equals(item)) {
-                        next = succ(p);
-                        continue;
+        if (o == null) return false;
+        restartFromHead: for (;;) {
+            for (Node<E> p = head, pred = null; p != null; ) {
+                Node<E> q = p.next;
+                final E item;
+                if ((item = p.item) != null) {
+                    if (o.equals(item) && p.casItem(item, null)) {
+                        skipDeadNodes(pred, p, p, q);
+                        return true;
                     }
-                    removed = casItem(p, item, null);
+                    pred = p; p = q; continue;
                 }
-
-                next = succ(p);
-                if (pred != null && next != null) // unlink
-                    casNext(pred, p, next);
-                if (removed)
-                    return true;
+                for (Node<E> c = p;; q = p.next) {
+                    if (q == null || q.item != null) {
+                        pred = skipDeadNodes(pred, c, p, q); p = q; break;
+                    }
+                    if (p == (p = q)) continue restartFromHead;
+                }
             }
+            return false;
         }
-        return false;
     }
 
     /**
@@ -516,13 +562,11 @@
         // Copy c into a private chain of Nodes
         Node<E> beginningOfTheEnd = null, last = null;
         for (E e : c) {
-            Node<E> newNode = newNode(Objects.requireNonNull(e));
+            Node<E> newNode = new Node<E>(Objects.requireNonNull(e));
             if (beginningOfTheEnd == null)
                 beginningOfTheEnd = last = newNode;
-            else {
-                lazySetNext(last, newNode);
-                last = newNode;
-            }
+            else
+                last.appendRelaxed(last = newNode);
         }
         if (beginningOfTheEnd == null)
             return false;
@@ -532,15 +576,15 @@
             Node<E> q = p.next;
             if (q == null) {
                 // p is last node
-                if (casNext(p, null, beginningOfTheEnd)) {
+                if (NEXT.compareAndSet(p, null, beginningOfTheEnd)) {
                     // Successful CAS is the linearization point
                     // for all elements to be added to this queue.
-                    if (!casTail(t, last)) {
+                    if (!TAIL.weakCompareAndSet(this, t, last)) {
                         // Try a little harder to update tail,
                         // since we may be adding many elements.
                         t = tail;
                         if (last.next == null)
-                            casTail(t, last);
+                            TAIL.weakCompareAndSet(this, t, last);
                     }
                     return true;
                 }
@@ -564,8 +608,8 @@
             int charLength = 0;
             int size = 0;
             for (Node<E> p = first(); p != null;) {
-                E item = p.item;
-                if (item != null) {
+                final E item;
+                if ((item = p.item) != null) {
                     if (a == null)
                         a = new String[4];
                     else if (size == a.length)
@@ -590,8 +634,8 @@
         restartFromHead: for (;;) {
             int size = 0;
             for (Node<E> p = first(); p != null;) {
-                E item = p.item;
-                if (item != null) {
+                final E item;
+                if ((item = p.item) != null) {
                     if (x == null)
                         x = new Object[4];
                     else if (size == x.length)
@@ -668,7 +712,7 @@
      */
     @SuppressWarnings("unchecked")
     public <T> T[] toArray(T[] a) {
-        if (a == null) throw new NullPointerException();
+        Objects.requireNonNull(a);
         return (T[]) toArrayInternal(a);
     }
 
@@ -708,7 +752,7 @@
             restartFromHead: for (;;) {
                 Node<E> h, p, q;
                 for (p = h = head;; p = q) {
-                    E item;
+                    final E item;
                     if ((item = p.item) != null) {
                         nextNode = p;
                         nextItem = item;
@@ -744,10 +788,12 @@
                 }
                 // unlink deleted nodes
                 if ((q = succ(p)) != null)
-                    casNext(pred, p, q);
+                    NEXT.compareAndSet(pred, p, q);
             }
         }
 
+        // Default implementation of forEachRemaining is "good enough".
+
         public void remove() {
             Node<E> l = lastRet;
             if (l == null) throw new IllegalStateException();
@@ -773,8 +819,8 @@
 
         // Write out all elements in the proper order.
         for (Node<E> p = first(); p != null; p = succ(p)) {
-            Object item = p.item;
-            if (item != null)
+            final E item;
+            if ((item = p.item) != null)
                 s.writeObject(item);
         }
 
@@ -797,91 +843,69 @@
         Node<E> h = null, t = null;
         for (Object item; (item = s.readObject()) != null; ) {
             @SuppressWarnings("unchecked")
-            Node<E> newNode = newNode((E) item);
+            Node<E> newNode = new Node<E>((E) item);
             if (h == null)
                 h = t = newNode;
-            else {
-                lazySetNext(t, newNode);
-                t = newNode;
-            }
+            else
+                t.appendRelaxed(t = newNode);
         }
         if (h == null)
-            h = t = newNode(null);
+            h = t = new Node<E>();
         head = h;
         tail = t;
     }
 
     /** A customized variant of Spliterators.IteratorSpliterator */
-    static final class CLQSpliterator<E> implements Spliterator<E> {
+    final class CLQSpliterator implements Spliterator<E> {
         static final int MAX_BATCH = 1 << 25;  // max batch array size;
-        final ConcurrentLinkedQueue<E> queue;
         Node<E> current;    // current node; null until initialized
         int batch;          // batch size for splits
         boolean exhausted;  // true when no more nodes
-        CLQSpliterator(ConcurrentLinkedQueue<E> queue) {
-            this.queue = queue;
-        }
 
         public Spliterator<E> trySplit() {
-            Node<E> p;
-            final ConcurrentLinkedQueue<E> q = this.queue;
-            int b = batch;
-            int n = (b <= 0) ? 1 : (b >= MAX_BATCH) ? MAX_BATCH : b + 1;
-            if (!exhausted &&
-                ((p = current) != null || (p = q.first()) != null) &&
-                p.next != null) {
-                Object[] a = new Object[n];
-                int i = 0;
-                do {
-                    if ((a[i] = p.item) != null)
-                        ++i;
-                    if (p == (p = p.next))
-                        p = q.first();
-                } while (p != null && i < n);
-                if ((current = p) == null)
-                    exhausted = true;
-                if (i > 0) {
-                    batch = i;
-                    return Spliterators.spliterator
-                        (a, 0, i, (Spliterator.ORDERED |
-                                   Spliterator.NONNULL |
-                                   Spliterator.CONCURRENT));
+            Node<E> p, q;
+            if ((p = current()) == null || (q = p.next) == null)
+                return null;
+            int i = 0, n = batch = Math.min(batch + 1, MAX_BATCH);
+            Object[] a = null;
+            do {
+                final E e;
+                if ((e = p.item) != null) {
+                    if (a == null)
+                        a = new Object[n];
+                    a[i++] = e;
                 }
-            }
-            return null;
+                if (p == (p = q))
+                    p = first();
+            } while (p != null && (q = p.next) != null && i < n);
+            setCurrent(p);
+            return (i == 0) ? null :
+                Spliterators.spliterator(a, 0, i, (Spliterator.ORDERED |
+                                                   Spliterator.NONNULL |
+                                                   Spliterator.CONCURRENT));
         }
 
         public void forEachRemaining(Consumer<? super E> action) {
-            Node<E> p;
-            if (action == null) throw new NullPointerException();
-            final ConcurrentLinkedQueue<E> q = this.queue;
-            if (!exhausted &&
-                ((p = current) != null || (p = q.first()) != null)) {
+            Objects.requireNonNull(action);
+            final Node<E> p;
+            if ((p = current()) != null) {
+                current = null;
                 exhausted = true;
-                do {
-                    E e = p.item;
-                    if (p == (p = p.next))
-                        p = q.first();
-                    if (e != null)
-                        action.accept(e);
-                } while (p != null);
+                forEachFrom(action, p);
             }
         }
 
         public boolean tryAdvance(Consumer<? super E> action) {
+            Objects.requireNonNull(action);
             Node<E> p;
-            if (action == null) throw new NullPointerException();
-            final ConcurrentLinkedQueue<E> q = this.queue;
-            if (!exhausted &&
-                ((p = current) != null || (p = q.first()) != null)) {
+            if ((p = current()) != null) {
                 E e;
                 do {
                     e = p.item;
                     if (p == (p = p.next))
-                        p = q.first();
+                        p = first();
                 } while (e == null && p != null);
-                if ((current = p) == null)
-                    exhausted = true;
+                setCurrent(p);
                 if (e != null) {
                     action.accept(e);
                     return true;
@@ -890,11 +914,24 @@
             return false;
         }
 
+        private void setCurrent(Node<E> p) {
+            if ((current = p) == null)
+                exhausted = true;
+        }
+
+        private Node<E> current() {
+            Node<E> p;
+            if ((p = current) == null && !exhausted)
+                setCurrent(p = first());
+            return p;
+        }
+
         public long estimateSize() { return Long.MAX_VALUE; }
 
         public int characteristics() {
-            return Spliterator.ORDERED | Spliterator.NONNULL |
-                Spliterator.CONCURRENT;
+            return (Spliterator.ORDERED |
+                    Spliterator.NONNULL |
+                    Spliterator.CONCURRENT);
         }
     }
 
@@ -916,36 +953,123 @@
      */
     @Override
     public Spliterator<E> spliterator() {
-        return new CLQSpliterator<E>(this);
+        return new CLQSpliterator();
     }
 
-    private boolean casTail(Node<E> cmp, Node<E> val) {
-        return U.compareAndSwapObject(this, TAIL, cmp, val);
+    /**
+     * @throws NullPointerException {@inheritDoc}
+     */
+    public boolean removeIf(Predicate<? super E> filter) {
+        Objects.requireNonNull(filter);
+        return bulkRemove(filter);
     }
 
-    private boolean casHead(Node<E> cmp, Node<E> val) {
-        return U.compareAndSwapObject(this, HEAD, cmp, val);
+    /**
+     * @throws NullPointerException {@inheritDoc}
+     */
+    public boolean removeAll(Collection<?> c) {
+        Objects.requireNonNull(c);
+        return bulkRemove(e -> c.contains(e));
     }
 
-    // Unsafe mechanics
+    /**
+     * @throws NullPointerException {@inheritDoc}
+     */
+    public boolean retainAll(Collection<?> c) {
+        Objects.requireNonNull(c);
+        return bulkRemove(e -> !c.contains(e));
+    }
 
-    private static final sun.misc.Unsafe U = sun.misc.Unsafe.getUnsafe();
-    private static final long HEAD;
-    private static final long TAIL;
-    private static final long ITEM;
-    private static final long NEXT;
+    public void clear() {
+        bulkRemove(e -> true);
+    }
+
+    /**
+     * Tolerate this many consecutive dead nodes before CAS-collapsing.
+     * Amortized cost of clear() is (1 + 1/MAX_HOPS) CASes per element.
+     */
+    private static final int MAX_HOPS = 8;
+
+    /** Implementation of bulk remove methods. */
+    private boolean bulkRemove(Predicate<? super E> filter) {
+        boolean removed = false;
+        restartFromHead: for (;;) {
+            int hops = MAX_HOPS;
+            // c will be CASed to collapse intervening dead nodes between
+            // pred (or head if null) and p.
+            for (Node<E> p = head, c = p, pred = null, q; p != null; p = q) {
+                q = p.next;
+                final E item; boolean pAlive;
+                if (pAlive = ((item = p.item) != null)) {
+                    if (filter.test(item)) {
+                        if (p.casItem(item, null))
+                            removed = true;
+                        pAlive = false;
+                    }
+                }
+                if (pAlive || q == null || --hops == 0) {
+                    // p might already be self-linked here, but if so:
+                    // - CASing head will surely fail
+                    // - CASing pred's next will be useless but harmless.
+                    if ((c != p && !tryCasSuccessor(pred, c, c = p))
+                        || pAlive) {
+                        // if CAS failed or alive, abandon old pred
+                        hops = MAX_HOPS;
+                        pred = p;
+                        c = q;
+                    }
+                } else if (p == q)
+                    continue restartFromHead;
+            }
+            return removed;
+        }
+    }
+
+    /**
+     * Runs action on each element found during a traversal starting at p.
+     * If p is null, the action is not run.
+     */
+    void forEachFrom(Consumer<? super E> action, Node<E> p) {
+        for (Node<E> pred = null; p != null; ) {
+            Node<E> q = p.next;
+            final E item;
+            if ((item = p.item) != null) {
+                action.accept(item);
+                pred = p; p = q; continue;
+            }
+            for (Node<E> c = p;; q = p.next) {
+                if (q == null || q.item != null) {
+                    pred = skipDeadNodes(pred, c, p, q); p = q; break;
+                }
+                if (p == (p = q)) { pred = null; p = head; break; }
+            }
+        }
+    }
+
+    /**
+     * @throws NullPointerException {@inheritDoc}
+     */
+    public void forEach(Consumer<? super E> action) {
+        Objects.requireNonNull(action);
+        forEachFrom(action, head);
+    }
+
+    // VarHandle mechanics
+    private static final VarHandle HEAD;
+    private static final VarHandle TAIL;
+    static final VarHandle ITEM;
+    static final VarHandle NEXT;
     static {
         try {
-            HEAD = U.objectFieldOffset
-                (ConcurrentLinkedQueue.class.getDeclaredField("head"));
-            TAIL = U.objectFieldOffset
-                (ConcurrentLinkedQueue.class.getDeclaredField("tail"));
-            ITEM = U.objectFieldOffset
-                (Node.class.getDeclaredField("item"));
-            NEXT = U.objectFieldOffset
-                (Node.class.getDeclaredField("next"));
+            MethodHandles.Lookup l = MethodHandles.lookup();
+            HEAD = l.findVarHandle(ConcurrentLinkedQueue.class, "head",
+                                   Node.class);
+            TAIL = l.findVarHandle(ConcurrentLinkedQueue.class, "tail",
+                                   Node.class);
+            ITEM = l.findVarHandle(Node.class, "item", Object.class);
+            NEXT = l.findVarHandle(Node.class, "next", Node.class);
         } catch (ReflectiveOperationException e) {
-            throw new Error(e);
+            throw new ExceptionInInitializerError(e);
         }
     }
 }
diff --git a/ojluni/src/main/java/java/util/concurrent/ConcurrentMap.java b/ojluni/src/main/java/java/util/concurrent/ConcurrentMap.java
index 69dae6f..2037e31 100644
--- a/ojluni/src/main/java/java/util/concurrent/ConcurrentMap.java
+++ b/ojluni/src/main/java/java/util/concurrent/ConcurrentMap.java
@@ -41,14 +41,8 @@
 import java.util.function.BiFunction;
 import java.util.function.Function;
 
-// BEGIN android-note
-// removed link to collections framework docs
-// fixed framework docs link to "Collection#optional"
-// END android-note
-
 /**
- * A {@link java.util.Map} providing thread safety and atomicity
- * guarantees.
+ * A {@link Map} providing thread safety and atomicity guarantees.
  *
  * <p>To maintain the specified guarantees, default implementations of
  * methods including {@link #putIfAbsent} inherited from {@link Map}
@@ -65,6 +59,10 @@
  * actions subsequent to the access or removal of that object from
  * the {@code ConcurrentMap} in another thread.
  *
+ * <p>This interface is a member of the
+ * <a href="{@docRoot}/java.base/java/util/package-summary.html#CollectionsFramework">
+ * Java Collections Framework</a>.
+ *
  * @since 1.5
  * @author Doug Lea
  * @param <K> the type of keys maintained by this map
@@ -182,10 +180,10 @@
      *         is not supported by this map
      * @throws ClassCastException if the key or value is of an inappropriate
      *         type for this map
-     * (<a href="../Collection.html#optional-restrictions">optional</a>)
+     * (<a href="{@docRoot}/java.base/java/util/Collection.html#optional-restrictions">optional</a>)
      * @throws NullPointerException if the specified key or value is null,
      *         and this map does not permit null keys or values
-     * (<a href="../Collection.html#optional-restrictions">optional</a>)
+     * (<a href="{@docRoot}/java.base/java/util/Collection.html#optional-restrictions">optional</a>)
      */
     boolean remove(Object key, Object value);
 
diff --git a/ojluni/src/main/java/java/util/concurrent/ConcurrentNavigableMap.java b/ojluni/src/main/java/java/util/concurrent/ConcurrentNavigableMap.java
index 94a90cd..1666c8b 100644
--- a/ojluni/src/main/java/java/util/concurrent/ConcurrentNavigableMap.java
+++ b/ojluni/src/main/java/java/util/concurrent/ConcurrentNavigableMap.java
@@ -38,14 +38,14 @@
 import java.util.NavigableMap;
 import java.util.NavigableSet;
 
-// BEGIN android-note
-// removed link to collections framework docs
-// END android-note
-
 /**
  * A {@link ConcurrentMap} supporting {@link NavigableMap} operations,
  * and recursively so for its navigable sub-maps.
  *
+ * <p>This interface is a member of the
+ * <a href="{@docRoot}/java.base/java/util/package-summary.html#CollectionsFramework">
+ * Java Collections Framework</a>.
+ *
  * @author Doug Lea
  * @param <K> the type of keys maintained by this map
  * @param <V> the type of mapped values
diff --git a/ojluni/src/main/java/java/util/concurrent/ConcurrentSkipListMap.java b/ojluni/src/main/java/java/util/concurrent/ConcurrentSkipListMap.java
index 583244b..db14d4b 100644
--- a/ojluni/src/main/java/java/util/concurrent/ConcurrentSkipListMap.java
+++ b/ojluni/src/main/java/java/util/concurrent/ConcurrentSkipListMap.java
@@ -35,6 +35,8 @@
 
 package java.util.concurrent;
 
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.VarHandle;
 import java.io.Serializable;
 import java.util.AbstractCollection;
 import java.util.AbstractMap;
@@ -46,7 +48,6 @@
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
-import java.util.NavigableMap;
 import java.util.NavigableSet;
 import java.util.NoSuchElementException;
 import java.util.Set;
@@ -57,10 +58,7 @@
 import java.util.function.Consumer;
 import java.util.function.Function;
 import java.util.function.Predicate;
-
-// BEGIN android-note
-// removed link to collections framework docs
-// END android-note
+import java.util.concurrent.atomic.LongAdder;
 
 /**
  * A scalable concurrent {@link ConcurrentNavigableMap} implementation.
@@ -89,12 +87,7 @@
  * associated map using {@code put}, {@code putIfAbsent}, or
  * {@code replace}, depending on exactly which effect you need.)
  *
- * <p>Beware that, unlike in most collections, the {@code size}
- * method is <em>not</em> a constant-time operation. Because of the
- * asynchronous nature of these maps, determining the current number
- * of elements requires a traversal of the elements, and so may report
- * inaccurate results if this collection is modified during traversal.
- * Additionally, the bulk operations {@code putAll}, {@code equals},
+ * <p>Beware that bulk operations {@code putAll}, {@code equals},
  * {@code toArray}, {@code containsValue}, and {@code clear} are
  * <em>not</em> guaranteed to be performed atomically. For example, an
  * iterator operating concurrently with a {@code putAll} operation
@@ -107,6 +100,10 @@
  * null return values cannot be reliably distinguished from the absence of
  * elements.
  *
+ * <p>This class is a member of the
+ * <a href="{@docRoot}/java.base/java/util/package-summary.html#CollectionsFramework">
+ * Java Collections Framework</a>.
+ *
  * @author Doug Lea
  * @param <K> the type of keys maintained by this map
  * @param <V> the type of mapped values
@@ -157,42 +154,35 @@
      * be slow and space-intensive using AtomicMarkedReference), nodes
      * use direct CAS'able next pointers.  On deletion, instead of
      * marking a pointer, they splice in another node that can be
-     * thought of as standing for a marked pointer (indicating this by
-     * using otherwise impossible field values).  Using plain nodes
-     * acts roughly like "boxed" implementations of marked pointers,
-     * but uses new nodes only when nodes are deleted, not for every
-     * link.  This requires less space and supports faster
-     * traversal. Even if marked references were better supported by
-     * JVMs, traversal using this technique might still be faster
-     * because any search need only read ahead one more node than
-     * otherwise required (to check for trailing marker) rather than
-     * unmasking mark bits or whatever on each read.
+     * thought of as standing for a marked pointer (see method
+     * unlinkNode).  Using plain nodes acts roughly like "boxed"
+     * implementations of marked pointers, but uses new nodes only
+     * when nodes are deleted, not for every link.  This requires less
+     * space and supports faster traversal. Even if marked references
+     * were better supported by JVMs, traversal using this technique
+     * might still be faster because any search need only read ahead
+     * one more node than otherwise required (to check for trailing
+     * marker) rather than unmasking mark bits or whatever on each
+     * read.
      *
      * This approach maintains the essential property needed in the HM
      * algorithm of changing the next-pointer of a deleted node so
      * that any other CAS of it will fail, but implements the idea by
-     * changing the pointer to point to a different node, not by
-     * marking it.  While it would be possible to further squeeze
-     * space by defining marker nodes not to have key/value fields, it
-     * isn't worth the extra type-testing overhead.  The deletion
-     * markers are rarely encountered during traversal and are
-     * normally quickly garbage collected. (Note that this technique
-     * would not work well in systems without garbage collection.)
+     * changing the pointer to point to a different node (with
+     * otherwise illegal null fields), not by marking it.  While it
+     * would be possible to further squeeze space by defining marker
+     * nodes not to have key/value fields, it isn't worth the extra
+     * type-testing overhead.  The deletion markers are rarely
+     * encountered during traversal, are easily detected via null
+     * checks that are needed anyway, and are normally quickly garbage
+     * collected. (Note that this technique would not work well in
+     * systems without garbage collection.)
      *
      * In addition to using deletion markers, the lists also use
      * nullness of value fields to indicate deletion, in a style
      * similar to typical lazy-deletion schemes.  If a node's value is
      * null, then it is considered logically deleted and ignored even
-     * though it is still reachable. This maintains proper control of
-     * concurrent replace vs delete operations -- an attempted replace
-     * must fail if a delete beat it by nulling field, and a delete
-     * must return the last non-null value held in the field. (Note:
-     * Null, rather than some special marker, is used for value fields
-     * here because it just so happens to mesh with the Map API
-     * requirement that method get returns null if there is no
-     * mapping, which allows nodes to remain concurrently readable
-     * even when deleted. Using any other marker value here would be
-     * messy at best.)
+     * though it is still reachable.
      *
      * Here's the sequence of events for a deletion of node n with
      * predecessor b and successor f, initially:
@@ -202,9 +192,8 @@
      *        +------+       +------+      +------+
      *
      * 1. CAS n's value field from non-null to null.
-     *    From this point on, no public operations encountering
-     *    the node consider this mapping to exist. However, other
-     *    ongoing insertions and deletions might still modify
+     *    Traversals encountering a node with null value ignore it.
+     *    However, ongoing insertions and deletions might still modify
      *    n's next pointer.
      *
      * 2. CAS n's next pointer to point to a new marker node.
@@ -227,12 +216,7 @@
      * thread noticed during a traversal a node with null value and
      * helped out by marking and/or unlinking.  This helping-out
      * ensures that no thread can become stuck waiting for progress of
-     * the deleting thread.  The use of marker nodes slightly
-     * complicates helping-out code because traversals must track
-     * consistent reads of up to four nodes (b, n, marker, f), not
-     * just (b, n, f), although the next field of a marker is
-     * immutable, and once a next field is CAS'ed to point to a
-     * marker, it never again changes, so this requires less care.
+     * the deleting thread.
      *
      * Skip lists add indexing to this scheme, so that the base-level
      * traversals start close to the locations being found, inserted
@@ -242,113 +226,101 @@
      * b) that are not (structurally) deleted, otherwise retrying
      * after processing the deletion.
      *
-     * Index levels are maintained as lists with volatile next fields,
-     * using CAS to link and unlink.  Races are allowed in index-list
-     * operations that can (rarely) fail to link in a new index node
-     * or delete one. (We can't do this of course for data nodes.)
-     * However, even when this happens, the index lists remain sorted,
-     * so correctly serve as indices.  This can impact performance,
-     * but since skip lists are probabilistic anyway, the net result
-     * is that under contention, the effective "p" value may be lower
-     * than its nominal value. And race windows are kept small enough
-     * that in practice these failures are rare, even under a lot of
-     * contention.
+     * Index levels are maintained using CAS to link and unlink
+     * successors ("right" fields).  Races are allowed in index-list
+     * operations that can (rarely) fail to link in a new index node.
+     * (We can't do this of course for data nodes.)  However, even
+     * when this happens, the index lists correctly guide search.
+     * This can impact performance, but since skip lists are
+     * probabilistic anyway, the net result is that under contention,
+     * the effective "p" value may be lower than its nominal value.
      *
-     * The fact that retries (for both base and index lists) are
-     * relatively cheap due to indexing allows some minor
-     * simplifications of retry logic. Traversal restarts are
-     * performed after most "helping-out" CASes. This isn't always
-     * strictly necessary, but the implicit backoffs tend to help
-     * reduce other downstream failed CAS's enough to outweigh restart
-     * cost.  This worsens the worst case, but seems to improve even
-     * highly contended cases.
-     *
-     * Unlike most skip-list implementations, index insertion and
-     * deletion here require a separate traversal pass occurring after
-     * the base-level action, to add or remove index nodes.  This adds
-     * to single-threaded overhead, but improves contended
-     * multithreaded performance by narrowing interference windows,
-     * and allows deletion to ensure that all index nodes will be made
-     * unreachable upon return from a public remove operation, thus
-     * avoiding unwanted garbage retention. This is more important
-     * here than in some other data structures because we cannot null
-     * out node fields referencing user keys since they might still be
-     * read by other ongoing traversals.
+     * Index insertion and deletion sometimes require a separate
+     * traversal pass occurring after the base-level action, to add or
+     * remove index nodes.  This adds to single-threaded overhead, but
+     * improves contended multithreaded performance by narrowing
+     * interference windows, and allows deletion to ensure that all
+     * index nodes will be made unreachable upon return from a public
+     * remove operation, thus avoiding unwanted garbage retention.
      *
      * Indexing uses skip list parameters that maintain good search
      * performance while using sparser-than-usual indices: The
-     * hardwired parameters k=1, p=0.5 (see method doPut) mean
-     * that about one-quarter of the nodes have indices. Of those that
-     * do, half have one level, a quarter have two, and so on (see
-     * Pugh's Skip List Cookbook, sec 3.4).  The expected total space
-     * requirement for a map is slightly less than for the current
-     * implementation of java.util.TreeMap.
+     * hardwired parameters k=1, p=0.5 (see method doPut) mean that
+     * about one-quarter of the nodes have indices. Of those that do,
+     * half have one level, a quarter have two, and so on (see Pugh's
+     * Skip List Cookbook, sec 3.4), up to a maximum of 62 levels
+     * (appropriate for up to 2^63 elements).  The expected total
+     * space requirement for a map is slightly less than for the
+     * current implementation of java.util.TreeMap.
      *
      * Changing the level of the index (i.e, the height of the
-     * tree-like structure) also uses CAS. The head index has initial
-     * level/height of one. Creation of an index with height greater
-     * than the current level adds a level to the head index by
-     * CAS'ing on a new top-most head. To maintain good performance
-     * after a lot of removals, deletion methods heuristically try to
-     * reduce the height if the topmost levels appear to be empty.
-     * This may encounter races in which it possible (but rare) to
-     * reduce and "lose" a level just as it is about to contain an
-     * index (that will then never be encountered). This does no
-     * structural harm, and in practice appears to be a better option
-     * than allowing unrestrained growth of levels.
+     * tree-like structure) also uses CAS.  Creation of an index with
+     * height greater than the current level adds a level to the head
+     * index by CAS'ing on a new top-most head. To maintain good
+     * performance after a lot of removals, deletion methods
+     * heuristically try to reduce the height if the topmost levels
+     * appear to be empty.  This may encounter races in which it is
+     * possible (but rare) to reduce and "lose" a level just as it is
+     * about to contain an index (that will then never be
+     * encountered). This does no structural harm, and in practice
+     * appears to be a better option than allowing unrestrained growth
+     * of levels.
      *
-     * The code for all this is more verbose than you'd like. Most
-     * operations entail locating an element (or position to insert an
-     * element). The code to do this can't be nicely factored out
-     * because subsequent uses require a snapshot of predecessor
-     * and/or successor and/or value fields which can't be returned
-     * all at once, at least not without creating yet another object
-     * to hold them -- creating such little objects is an especially
-     * bad idea for basic internal search operations because it adds
-     * to GC overhead.  (This is one of the few times I've wished Java
-     * had macros.) Instead, some traversal code is interleaved within
-     * insertion and removal operations.  The control logic to handle
-     * all the retry conditions is sometimes twisty. Most search is
-     * broken into 2 parts. findPredecessor() searches index nodes
-     * only, returning a base-level predecessor of the key. findNode()
-     * finishes out the base-level search. Even with this factoring,
-     * there is a fair amount of near-duplication of code to handle
-     * variants.
+     * This class provides concurrent-reader-style memory consistency,
+     * ensuring that read-only methods report status and/or values no
+     * staler than those holding at method entry. This is done by
+     * performing all publication and structural updates using
+     * (volatile) CAS, placing an acquireFence in a few access
+     * methods, and ensuring that linked objects are transitively
+     * acquired via dependent reads (normally once) unless performing
+     * a volatile-mode CAS operation (that also acts as an acquire and
+     * release).  This form of fence-hoisting is similar to RCU and
+     * related techniques (see McKenney's online book
+     * https://www.kernel.org/pub/linux/kernel/people/paulmck/perfbook/perfbook.html)
+     * It minimizes overhead that may otherwise occur when using so
+     * many volatile-mode reads. Using explicit acquireFences is
+     * logistically easier than targeting particular fields to be read
+     * in acquire mode: fences are just hoisted up as far as possible,
+     * to the entry points or loop headers of a few methods. A
+     * potential disadvantage is that these few remaining fences are
+     * not easily optimized away by compilers under exclusively
+     * single-thread use.  It requires some care to avoid volatile
+     * mode reads of other fields. (Note that the memory semantics of
+     * a reference dependently read in plain mode exactly once are
+     * equivalent to those for atomic opaque mode.)  Iterators and
+     * other traversals encounter each node and value exactly once.
+     * Other operations locate an element (or position to insert an
+     * element) via a sequence of dereferences. This search is broken
+     * into two parts. Method findPredecessor (and its specialized
+     * embeddings) searches index nodes only, returning a base-level
+     * predecessor of the key. Callers carry out the base-level
+     * search, restarting if encountering a marker preventing link
+     * modification.  In some cases, it is possible to encounter a
+     * node multiple times while descending levels. For mutative
+     * operations, the reported value is validated using CAS (else
+     * retrying), preserving linearizability with respect to each
+     * other. Others may return any (non-null) value holding in the
+     * course of the method call.  (Search-based methods also include
+     * some useless-looking explicit null checks designed to allow
+     * more fields to be nulled out upon removal, to reduce floating
+     * garbage, but which is not currently done, pending discovery of
+     * a way to do this with less impact on other operations.)
      *
      * To produce random values without interference across threads,
      * we use within-JDK thread local random support (via the
      * "secondary seed", to avoid interference with user-level
      * ThreadLocalRandom.)
      *
-     * A previous version of this class wrapped non-comparable keys
-     * with their comparators to emulate Comparables when using
-     * comparators vs Comparables.  However, JVMs now appear to better
-     * handle infusing comparator-vs-comparable choice into search
-     * loops. Static method cpr(comparator, x, y) is used for all
-     * comparisons, which works well as long as the comparator
-     * argument is set up outside of loops (thus sometimes passed as
-     * an argument to internal methods) to avoid field re-reads.
-     *
      * For explanation of algorithms sharing at least a couple of
      * features with this one, see Mikhail Fomitchev's thesis
      * (http://www.cs.yorku.ca/~mikhail/), Keir Fraser's thesis
      * (http://www.cl.cam.ac.uk/users/kaf24/), and Hakan Sundell's
      * thesis (http://www.cs.chalmers.se/~phs/).
      *
-     * Given the use of tree-like index nodes, you might wonder why
-     * this doesn't use some kind of search tree instead, which would
-     * support somewhat faster search operations. The reason is that
-     * there are no known efficient lock-free insertion and deletion
-     * algorithms for search trees. The immutability of the "down"
-     * links of index nodes (as opposed to mutable "left" fields in
-     * true trees) makes this tractable using only CAS operations.
-     *
      * Notation guide for local variables
-     * Node:         b, n, f    for  predecessor, node, successor
+     * Node:         b, n, f, p for  predecessor, node, successor, aux
      * Index:        q, r, d    for index node, right, down.
-     *               t          for another index node
      * Head:         h
-     * Levels:       j
      * Keys:         k, key
      * Values:       v, value
      * Comparisons:  c
@@ -357,16 +329,6 @@
     private static final long serialVersionUID = -8627078645895051609L;
 
     /**
-     * Special value used to identify base-level header.
-     */
-    static final Object BASE_HEADER = new Object();
-
-    /**
-     * The topmost head index of the skiplist.
-     */
-    private transient volatile HeadIndex<K,V> head;
-
-    /**
      * The comparator used to maintain order in this map, or null if
      * using natural ordering.  (Non-private to simplify access in
      * nested classes.)
@@ -374,316 +336,152 @@
      */
     final Comparator<? super K> comparator;
 
+    /** Lazily initialized topmost index of the skiplist. */
+    private transient Index<K,V> head;
+    /** Lazily initialized element count */
+    private transient LongAdder adder;
     /** Lazily initialized key set */
     private transient KeySet<K,V> keySet;
-    /** Lazily initialized entry set */
-    private transient EntrySet<K,V> entrySet;
     /** Lazily initialized values collection */
     private transient Values<K,V> values;
-    /** Lazily initialized descending key set */
-    private transient ConcurrentNavigableMap<K,V> descendingMap;
-
-    /**
-     * Initializes or resets state. Needed by constructors, clone,
-     * clear, readObject. and ConcurrentSkipListSet.clone.
-     * (Note that comparator must be separately initialized.)
-     */
-    private void initialize() {
-        keySet = null;
-        entrySet = null;
-        values = null;
-        descendingMap = null;
-        head = new HeadIndex<K,V>(new Node<K,V>(null, BASE_HEADER, null),
-                                  null, null, 1);
-    }
-
-    /**
-     * compareAndSet head node.
-     */
-    private boolean casHead(HeadIndex<K,V> cmp, HeadIndex<K,V> val) {
-        return U.compareAndSwapObject(this, HEAD, cmp, val);
-    }
-
-    /* ---------------- Nodes -------------- */
+    /** Lazily initialized entry set */
+    private transient EntrySet<K,V> entrySet;
+    /** Lazily initialized descending map */
+    private transient SubMap<K,V> descendingMap;
 
     /**
      * Nodes hold keys and values, and are singly linked in sorted
      * order, possibly with some intervening marker nodes. The list is
-     * headed by a dummy node accessible as head.node. The value field
-     * is declared only as Object because it takes special non-V
-     * values for marker and header nodes.
+     * headed by a header node accessible as head.node. Headers and
+     * marker nodes have null keys. The val field (but currently not
+     * the key field) is nulled out upon deletion.
      */
     static final class Node<K,V> {
-        final K key;
-        volatile Object value;
-        volatile Node<K,V> next;
-
-        /**
-         * Creates a new regular node.
-         */
-        Node(K key, Object value, Node<K,V> next) {
+        final K key; // currently, never detached
+        V val;
+        Node<K,V> next;
+        Node(K key, V value, Node<K,V> next) {
             this.key = key;
-            this.value = value;
+            this.val = value;
             this.next = next;
         }
-
-        /**
-         * Creates a new marker node. A marker is distinguished by
-         * having its value field point to itself.  Marker nodes also
-         * have null keys, a fact that is exploited in a few places,
-         * but this doesn't distinguish markers from the base-level
-         * header node (head.node), which also has a null key.
-         */
-        Node(Node<K,V> next) {
-            this.key = null;
-            this.value = this;
-            this.next = next;
-        }
-
-        /**
-         * compareAndSet value field.
-         */
-        boolean casValue(Object cmp, Object val) {
-            return U.compareAndSwapObject(this, VALUE, cmp, val);
-        }
-
-        /**
-         * compareAndSet next field.
-         */
-        boolean casNext(Node<K,V> cmp, Node<K,V> val) {
-            return U.compareAndSwapObject(this, NEXT, cmp, val);
-        }
-
-        /**
-         * Returns true if this node is a marker. This method isn't
-         * actually called in any current code checking for markers
-         * because callers will have already read value field and need
-         * to use that read (not another done here) and so directly
-         * test if value points to node.
-         *
-         * @return true if this node is a marker node
-         */
-        boolean isMarker() {
-            return value == this;
-        }
-
-        /**
-         * Returns true if this node is the header of base-level list.
-         * @return true if this node is header node
-         */
-        boolean isBaseHeader() {
-            return value == BASE_HEADER;
-        }
-
-        /**
-         * Tries to append a deletion marker to this node.
-         * @param f the assumed current successor of this node
-         * @return true if successful
-         */
-        boolean appendMarker(Node<K,V> f) {
-            return casNext(f, new Node<K,V>(f));
-        }
-
-        /**
-         * Helps out a deletion by appending marker or unlinking from
-         * predecessor. This is called during traversals when value
-         * field seen to be null.
-         * @param b predecessor
-         * @param f successor
-         */
-        void helpDelete(Node<K,V> b, Node<K,V> f) {
-            /*
-             * Rechecking links and then doing only one of the
-             * help-out stages per call tends to minimize CAS
-             * interference among helping threads.
-             */
-            if (f == next && this == b.next) {
-                if (f == null || f.value != f) // not already marked
-                    casNext(f, new Node<K,V>(f));
-                else
-                    b.casNext(this, f.next);
-            }
-        }
-
-        /**
-         * Returns value if this node contains a valid key-value pair,
-         * else null.
-         * @return this node's value if it isn't a marker or header or
-         * is deleted, else null
-         */
-        V getValidValue() {
-            Object v = value;
-            if (v == this || v == BASE_HEADER)
-                return null;
-            @SuppressWarnings("unchecked") V vv = (V)v;
-            return vv;
-        }
-
-        /**
-         * Creates and returns a new SimpleImmutableEntry holding current
-         * mapping if this node holds a valid value, else null.
-         * @return new entry or null
-         */
-        AbstractMap.SimpleImmutableEntry<K,V> createSnapshot() {
-            Object v = value;
-            if (v == null || v == this || v == BASE_HEADER)
-                return null;
-            @SuppressWarnings("unchecked") V vv = (V)v;
-            return new AbstractMap.SimpleImmutableEntry<K,V>(key, vv);
-        }
-
-        // Unsafe mechanics
-
-        private static final sun.misc.Unsafe U = sun.misc.Unsafe.getUnsafe();
-        private static final long VALUE;
-        private static final long NEXT;
-
-        static {
-            try {
-                VALUE = U.objectFieldOffset
-                    (Node.class.getDeclaredField("value"));
-                NEXT = U.objectFieldOffset
-                    (Node.class.getDeclaredField("next"));
-            } catch (ReflectiveOperationException e) {
-                throw new Error(e);
-            }
-        }
     }
 
-    /* ---------------- Indexing -------------- */
-
     /**
-     * Index nodes represent the levels of the skip list.  Note that
-     * even though both Nodes and Indexes have forward-pointing
-     * fields, they have different types and are handled in different
-     * ways, that can't nicely be captured by placing field in a
-     * shared abstract class.
+     * Index nodes represent the levels of the skip list.
      */
-    static class Index<K,V> {
-        final Node<K,V> node;
+    static final class Index<K,V> {
+        final Node<K,V> node;  // currently, never detached
         final Index<K,V> down;
-        volatile Index<K,V> right;
-
-        /**
-         * Creates index node with given values.
-         */
+        Index<K,V> right;
         Index(Node<K,V> node, Index<K,V> down, Index<K,V> right) {
             this.node = node;
             this.down = down;
             this.right = right;
         }
-
-        /**
-         * compareAndSet right field.
-         */
-        final boolean casRight(Index<K,V> cmp, Index<K,V> val) {
-            return U.compareAndSwapObject(this, RIGHT, cmp, val);
-        }
-
-        /**
-         * Returns true if the node this indexes has been deleted.
-         * @return true if indexed node is known to be deleted
-         */
-        final boolean indexesDeletedNode() {
-            return node.value == null;
-        }
-
-        /**
-         * Tries to CAS newSucc as successor.  To minimize races with
-         * unlink that may lose this index node, if the node being
-         * indexed is known to be deleted, it doesn't try to link in.
-         * @param succ the expected current successor
-         * @param newSucc the new successor
-         * @return true if successful
-         */
-        final boolean link(Index<K,V> succ, Index<K,V> newSucc) {
-            Node<K,V> n = node;
-            newSucc.right = succ;
-            return n.value != null && casRight(succ, newSucc);
-        }
-
-        /**
-         * Tries to CAS right field to skip over apparent successor
-         * succ.  Fails (forcing a retraversal by caller) if this node
-         * is known to be deleted.
-         * @param succ the expected current successor
-         * @return true if successful
-         */
-        final boolean unlink(Index<K,V> succ) {
-            return node.value != null && casRight(succ, succ.right);
-        }
-
-        // Unsafe mechanics
-        private static final sun.misc.Unsafe U = sun.misc.Unsafe.getUnsafe();
-        private static final long RIGHT;
-        static {
-            try {
-                RIGHT = U.objectFieldOffset
-                    (Index.class.getDeclaredField("right"));
-            } catch (ReflectiveOperationException e) {
-                throw new Error(e);
-            }
-        }
     }
 
-    /* ---------------- Head nodes -------------- */
-
-    /**
-     * Nodes heading each level keep track of their level.
-     */
-    static final class HeadIndex<K,V> extends Index<K,V> {
-        final int level;
-        HeadIndex(Node<K,V> node, Index<K,V> down, Index<K,V> right, int level) {
-            super(node, down, right);
-            this.level = level;
-        }
-    }
-
-    /* ---------------- Comparison utilities -------------- */
+    /* ----------------  Utilities -------------- */
 
     /**
      * Compares using comparator or natural ordering if null.
      * Called only by methods that have performed required type checks.
      */
     @SuppressWarnings({"unchecked", "rawtypes"})
-    static final int cpr(Comparator c, Object x, Object y) {
+    static int cpr(Comparator c, Object x, Object y) {
         return (c != null) ? c.compare(x, y) : ((Comparable)x).compareTo(y);
     }
 
+    /**
+     * Returns the header for base node list, or null if uninitialized
+     */
+    final Node<K,V> baseHead() {
+        Index<K,V> h;
+        VarHandle.acquireFence();
+        return ((h = head) == null) ? null : h.node;
+    }
+
+    /**
+     * Tries to unlink deleted node n from predecessor b (if both
+     * exist), by first splicing in a marker if not already present.
+     * Upon return, node n is sure to be unlinked from b, possibly
+     * via the actions of some other thread.
+     *
+     * @param b if nonnull, predecessor
+     * @param n if nonnull, node known to be deleted
+     */
+    static <K,V> void unlinkNode(Node<K,V> b, Node<K,V> n) {
+        if (b != null && n != null) {
+            Node<K,V> f, p;
+            for (;;) {
+                if ((f = n.next) != null && f.key == null) {
+                    p = f.next;               // already marked
+                    break;
+                }
+                else if (NEXT.compareAndSet(n, f,
+                                            new Node<K,V>(null, null, f))) {
+                    p = f;                    // add marker
+                    break;
+                }
+            }
+            NEXT.compareAndSet(b, n, p);
+        }
+    }
+
+    /**
+     * Adds to element count, initializing adder if necessary
+     *
+     * @param c count to add
+     */
+    private void addCount(long c) {
+        LongAdder a;
+        do {} while ((a = adder) == null &&
+                     !ADDER.compareAndSet(this, null, a = new LongAdder()));
+        a.add(c);
+    }
+
+    /**
+     * Returns element count, initializing adder if necessary.
+     */
+    final long getAdderCount() {
+        LongAdder a; long c;
+        do {} while ((a = adder) == null &&
+                     !ADDER.compareAndSet(this, null, a = new LongAdder()));
+        return ((c = a.sum()) <= 0L) ? 0L : c; // ignore transient negatives
+    }
+
     /* ---------------- Traversal -------------- */
 
     /**
-     * Returns a base-level node with key strictly less than given key,
-     * or the base-level header if there is no such node.  Also
-     * unlinks indexes to deleted nodes found along the way.  Callers
-     * rely on this side-effect of clearing indices to deleted nodes.
-     * @param key the key
-     * @return a predecessor of key
+     * Returns an index node with key strictly less than given key.
+     * Also unlinks indexes to deleted nodes found along the way.
+     * Callers rely on this side-effect of clearing indices to deleted
+     * nodes.
+     *
+     * @param key if nonnull the key
+     * @return a predecessor node of key, or null if uninitialized or null key
      */
     private Node<K,V> findPredecessor(Object key, Comparator<? super K> cmp) {
-        if (key == null)
-            throw new NullPointerException(); // don't postpone errors
-        for (;;) {
-            for (Index<K,V> q = head, r = q.right, d;;) {
-                if (r != null) {
-                    Node<K,V> n = r.node;
-                    K k = n.key;
-                    if (n.value == null) {
-                        if (!q.unlink(r))
-                            break;           // restart
-                        r = q.right;         // reread r
-                        continue;
-                    }
-                    if (cpr(cmp, key, k) > 0) {
+        Index<K,V> q;
+        VarHandle.acquireFence();
+        if ((q = head) == null || key == null)
+            return null;
+        else {
+            for (Index<K,V> r, d;;) {
+                while ((r = q.right) != null) {
+                    Node<K,V> p; K k;
+                    if ((p = r.node) == null || (k = p.key) == null ||
+                        p.val == null)  // unlink index to deleted node
+                        RIGHT.compareAndSet(q, r, r.right);
+                    else if (cpr(cmp, key, k) > 0)
                         q = r;
-                        r = r.right;
-                        continue;
-                    }
+                    else
+                        break;
                 }
-                if ((d = q.down) == null)
+                if ((d = q.down) != null)
+                    q = d;
+                else
                     return q.node;
-                q = d;
-                r = d.right;
             }
         }
     }
@@ -693,41 +491,11 @@
      * deleted nodes seen along the way.  Repeatedly traverses at
      * base-level looking for key starting at predecessor returned
      * from findPredecessor, processing base-level deletions as
-     * encountered. Some callers rely on this side-effect of clearing
-     * deleted nodes.
-     *
-     * Restarts occur, at traversal step centered on node n, if:
-     *
-     *   (1) After reading n's next field, n is no longer assumed
-     *       predecessor b's current successor, which means that
-     *       we don't have a consistent 3-node snapshot and so cannot
-     *       unlink any subsequent deleted nodes encountered.
-     *
-     *   (2) n's value field is null, indicating n is deleted, in
-     *       which case we help out an ongoing structural deletion
-     *       before retrying.  Even though there are cases where such
-     *       unlinking doesn't require restart, they aren't sorted out
-     *       here because doing so would not usually outweigh cost of
-     *       restarting.
-     *
-     *   (3) n is a marker or n's predecessor's value field is null,
-     *       indicating (among other possibilities) that
-     *       findPredecessor returned a deleted node. We can't unlink
-     *       the node because we don't know its predecessor, so rely
-     *       on another call to findPredecessor to notice and return
-     *       some earlier predecessor, which it will do. This check is
-     *       only strictly needed at beginning of loop, (and the
-     *       b.value check isn't strictly needed at all) but is done
-     *       each iteration to help avoid contention with other
-     *       threads by callers that will fail to be able to change
-     *       links, and so will retry anyway.
-     *
-     * The traversal loops in doPut, doRemove, and findNear all
-     * include the same three kinds of checks. And specialized
-     * versions appear in findFirst, and findLast and their variants.
-     * They can't easily share code because each uses the reads of
-     * fields held in locals occurring in the orders they were
-     * performed.
+     * encountered. Restarts occur, at traversal step encountering
+     * node n, if n's key field is null, indicating it is a marker, so
+     * its predecessor is deleted before continuing, which we help do
+     * by re-finding a valid predecessor.  The traversal loops in
+     * doPut, doRemove, and findNear all include the same checks.
      *
      * @param key the key
      * @return node holding key, or null if no such
@@ -736,67 +504,81 @@
         if (key == null)
             throw new NullPointerException(); // don't postpone errors
         Comparator<? super K> cmp = comparator;
-        outer: for (;;) {
-            for (Node<K,V> b = findPredecessor(key, cmp), n = b.next;;) {
-                Object v; int c;
-                if (n == null)
-                    break outer;
-                Node<K,V> f = n.next;
-                if (n != b.next)                // inconsistent read
-                    break;
-                if ((v = n.value) == null) {    // n is deleted
-                    n.helpDelete(b, f);
-                    break;
-                }
-                if (b.value == null || v == n)  // b is deleted
-                    break;
-                if ((c = cpr(cmp, key, n.key)) == 0)
+        Node<K,V> b;
+        outer: while ((b = findPredecessor(key, cmp)) != null) {
+            for (;;) {
+                Node<K,V> n; K k; V v; int c;
+                if ((n = b.next) == null)
+                    break outer;               // empty
+                else if ((k = n.key) == null)
+                    break;                     // b is deleted
+                else if ((v = n.val) == null)
+                    unlinkNode(b, n);          // n is deleted
+                else if ((c = cpr(cmp, key, k)) > 0)
+                    b = n;
+                else if (c == 0)
                     return n;
-                if (c < 0)
+                else
                     break outer;
-                b = n;
-                n = f;
             }
         }
         return null;
     }
 
     /**
-     * Gets value for key. Almost the same as findNode, but returns
-     * the found value (to avoid retries during re-reads)
+     * Gets value for key. Same idea as findNode, except skips over
+     * deletions and markers, and returns first encountered value to
+     * avoid possibly inconsistent rereads.
      *
      * @param key the key
      * @return the value, or null if absent
      */
     private V doGet(Object key) {
+        Index<K,V> q;
+        VarHandle.acquireFence();
         if (key == null)
             throw new NullPointerException();
         Comparator<? super K> cmp = comparator;
-        outer: for (;;) {
-            for (Node<K,V> b = findPredecessor(key, cmp), n = b.next;;) {
-                Object v; int c;
-                if (n == null)
-                    break outer;
-                Node<K,V> f = n.next;
-                if (n != b.next)                // inconsistent read
-                    break;
-                if ((v = n.value) == null) {    // n is deleted
-                    n.helpDelete(b, f);
+        V result = null;
+        if ((q = head) != null) {
+            outer: for (Index<K,V> r, d;;) {
+                while ((r = q.right) != null) {
+                    Node<K,V> p; K k; V v; int c;
+                    if ((p = r.node) == null || (k = p.key) == null ||
+                        (v = p.val) == null)
+                        RIGHT.compareAndSet(q, r, r.right);
+                    else if ((c = cpr(cmp, key, k)) > 0)
+                        q = r;
+                    else if (c == 0) {
+                        result = v;
+                        break outer;
+                    }
+                    else
+                        break;
+                }
+                if ((d = q.down) != null)
+                    q = d;
+                else {
+                    Node<K,V> b, n;
+                    if ((b = q.node) != null) {
+                        while ((n = b.next) != null) {
+                            V v; int c;
+                            K k = n.key;
+                            if ((v = n.val) == null || k == null ||
+                                (c = cpr(cmp, key, k)) > 0)
+                                b = n;
+                            else {
+                                if (c == 0)
+                                    result = v;
+                                break;
+                            }
+                        }
+                    }
                     break;
                 }
-                if (b.value == null || v == n)  // b is deleted
-                    break;
-                if ((c = cpr(cmp, key, n.key)) == 0) {
-                    @SuppressWarnings("unchecked") V vv = (V)v;
-                    return vv;
-                }
-                if (c < 0)
-                    break outer;
-                b = n;
-                n = f;
             }
         }
-        return null;
+        return result;
     }
 
     /* ---------------- Insertion -------------- */
@@ -804,126 +586,160 @@
     /**
      * Main insertion method.  Adds element if not present, or
      * replaces value if present and onlyIfAbsent is false.
+     *
      * @param key the key
      * @param value the value that must be associated with key
      * @param onlyIfAbsent if should not insert if already present
      * @return the old value, or null if newly inserted
      */
     private V doPut(K key, V value, boolean onlyIfAbsent) {
-        Node<K,V> z;             // added node
         if (key == null)
             throw new NullPointerException();
         Comparator<? super K> cmp = comparator;
-        outer: for (;;) {
-            for (Node<K,V> b = findPredecessor(key, cmp), n = b.next;;) {
-                if (n != null) {
-                    Object v; int c;
-                    Node<K,V> f = n.next;
-                    if (n != b.next)               // inconsistent read
-                        break;
-                    if ((v = n.value) == null) {   // n is deleted
-                        n.helpDelete(b, f);
-                        break;
-                    }
-                    if (b.value == null || v == n) // b is deleted
-                        break;
-                    if ((c = cpr(cmp, key, n.key)) > 0) {
-                        b = n;
-                        n = f;
-                        continue;
-                    }
-                    if (c == 0) {
-                        if (onlyIfAbsent || n.casValue(v, value)) {
-                            @SuppressWarnings("unchecked") V vv = (V)v;
-                            return vv;
-                        }
-                        break; // restart if lost race to replace value
-                    }
-                    // else c < 0; fall through
-                }
-
-                z = new Node<K,V>(key, value, n);
-                if (!b.casNext(n, z))
-                    break;         // restart if lost race to append to b
-                break outer;
+        for (;;) {
+            Index<K,V> h; Node<K,V> b;
+            VarHandle.acquireFence();
+            int levels = 0;                    // number of levels descended
+            if ((h = head) == null) {          // try to initialize
+                Node<K,V> base = new Node<K,V>(null, null, null);
+                h = new Index<K,V>(base, null, null);
+                b = (HEAD.compareAndSet(this, null, h)) ? base : null;
             }
-        }
-
-        int rnd = ThreadLocalRandom.nextSecondarySeed();
-        if ((rnd & 0x80000001) == 0) { // test highest and lowest bits
-            int level = 1, max;
-            while (((rnd >>>= 1) & 1) != 0)
-                ++level;
-            Index<K,V> idx = null;
-            HeadIndex<K,V> h = head;
-            if (level <= (max = h.level)) {
-                for (int i = 1; i <= level; ++i)
-                    idx = new Index<K,V>(z, idx, null);
-            }
-            else { // try to grow by one level
-                level = max + 1; // hold in array and later pick the one to use
-                @SuppressWarnings("unchecked")Index<K,V>[] idxs =
-                    (Index<K,V>[])new Index<?,?>[level+1];
-                for (int i = 1; i <= level; ++i)
-                    idxs[i] = idx = new Index<K,V>(z, idx, null);
-                for (;;) {
-                    h = head;
-                    int oldLevel = h.level;
-                    if (level <= oldLevel) // lost race to add level
-                        break;
-                    HeadIndex<K,V> newh = h;
-                    Node<K,V> oldbase = h.node;
-                    for (int j = oldLevel+1; j <= level; ++j)
-                        newh = new HeadIndex<K,V>(oldbase, newh, idxs[j], j);
-                    if (casHead(h, newh)) {
-                        h = newh;
-                        idx = idxs[level = oldLevel];
-                        break;
-                    }
-                }
-            }
-            // find insertion points and splice in
-            splice: for (int insertionLevel = level;;) {
-                int j = h.level;
-                for (Index<K,V> q = h, r = q.right, t = idx;;) {
-                    if (q == null || t == null)
-                        break splice;
-                    if (r != null) {
-                        Node<K,V> n = r.node;
-                        // compare before deletion check avoids needing recheck
-                        int c = cpr(cmp, key, n.key);
-                        if (n.value == null) {
-                            if (!q.unlink(r))
-                                break;
-                            r = q.right;
-                            continue;
-                        }
-                        if (c > 0) {
+            else {
+                for (Index<K,V> q = h, r, d;;) { // count while descending
+                    while ((r = q.right) != null) {
+                        Node<K,V> p; K k;
+                        if ((p = r.node) == null || (k = p.key) == null ||
+                            p.val == null)
+                            RIGHT.compareAndSet(q, r, r.right);
+                        else if (cpr(cmp, key, k) > 0)
                             q = r;
-                            r = r.right;
-                            continue;
-                        }
+                        else
+                            break;
                     }
-
-                    if (j == insertionLevel) {
-                        if (!q.link(r, t))
-                            break; // restart
-                        if (t.node.value == null) {
-                            findNode(key);
-                            break splice;
-                        }
-                        if (--insertionLevel == 0)
-                            break splice;
+                    if ((d = q.down) != null) {
+                        ++levels;
+                        q = d;
                     }
+                    else {
+                        b = q.node;
+                        break;
+                    }
+                }
+            }
+            if (b != null) {
+                Node<K,V> z = null;              // new node, if inserted
+                for (;;) {                       // find insertion point
+                    Node<K,V> n, p; K k; V v; int c;
+                    if ((n = b.next) == null) {
+                        if (b.key == null)       // if empty, type check key now
+                            cpr(cmp, key, key);
+                        c = -1;
+                    }
+                    else if ((k = n.key) == null)
+                        break;                   // can't append; restart
+                    else if ((v = n.val) == null) {
+                        unlinkNode(b, n);
+                        c = 1;
+                    }
+                    else if ((c = cpr(cmp, key, k)) > 0)
+                        b = n;
+                    else if (c == 0 &&
+                             (onlyIfAbsent || VAL.compareAndSet(n, v, value)))
+                        return v;
 
-                    if (--j >= insertionLevel && j < level)
-                        t = t.down;
-                    q = q.down;
-                    r = q.right;
+                    if (c < 0 &&
+                        NEXT.compareAndSet(b, n,
+                                           p = new Node<K,V>(key, value, n))) {
+                        z = p;
+                        break;
+                    }
+                }
+
+                if (z != null) {
+                    int lr = ThreadLocalRandom.nextSecondarySeed();
+                    if ((lr & 0x3) == 0) {       // add indices with 1/4 prob
+                        int hr = ThreadLocalRandom.nextSecondarySeed();
+                        long rnd = ((long)hr << 32) | ((long)lr & 0xffffffffL);
+                        int skips = levels;      // levels to descend before add
+                        Index<K,V> x = null;
+                        for (;;) {               // create at most 62 indices
+                            x = new Index<K,V>(z, x, null);
+                            if (rnd >= 0L || --skips < 0)
+                                break;
+                            else
+                                rnd <<= 1;
+                        }
+                        if (addIndices(h, skips, x, cmp) && skips < 0 &&
+                            head == h) {         // try to add new level
+                            Index<K,V> hx = new Index<K,V>(z, x, null);
+                            Index<K,V> nh = new Index<K,V>(h.node, h, hx);
+                            HEAD.compareAndSet(this, h, nh);
+                        }
+                        if (z.val == null)       // deleted while adding indices
+                            findPredecessor(key, cmp); // clean
+                    }
+                    addCount(1L);
+                    return null;
                 }
             }
         }
-        return null;
+    }
+
+    /**
+     * Add indices after an insertion. Descends iteratively to the
+     * highest level of insertion, then recursively, to chain index
+     * nodes to lower ones. Returns null on (staleness) failure,
+     * disabling higher-level insertions. Recursion depths are
+     * exponentially less probable.
+     *
+     * @param q starting index for current level
+     * @param skips levels to skip before inserting
+     * @param x index for this insertion
+     * @param cmp comparator
+     */
+    static <K,V> boolean addIndices(Index<K,V> q, int skips, Index<K,V> x,
+                                    Comparator<? super K> cmp) {
+        Node<K,V> z; K key;
+        if (x != null && (z = x.node) != null && (key = z.key) != null &&
+            q != null) {                            // hoist checks
+            boolean retrying = false;
+            for (;;) {                              // find splice point
+                Index<K,V> r, d; int c;
+                if ((r = q.right) != null) {
+                    Node<K,V> p; K k;
+                    if ((p = r.node) == null || (k = p.key) == null ||
+                        p.val == null) {
+                        RIGHT.compareAndSet(q, r, r.right);
+                        c = 0;
+                    }
+                    else if ((c = cpr(cmp, key, k)) > 0)
+                        q = r;
+                    else if (c == 0)
+                        break;                      // stale
+                }
+                else
+                    c = -1;
+
+                if (c < 0) {
+                    if ((d = q.down) != null && skips > 0) {
+                        --skips;
+                        q = d;
+                    }
+                    else if (d != null && !retrying &&
+                             !addIndices(d, 0, x.down, cmp))
+                        break;
+                    else {
+                        x.right = r;
+                        if (RIGHT.compareAndSet(q, r, x))
+                            return true;
+                        else
+                            retrying = true;         // re-find splice point
+                    }
+                }
+            }
+        }
+        return false;
     }
 
     /* ---------------- Deletion -------------- */
@@ -933,15 +749,6 @@
      * deletion marker, unlinks predecessor, removes associated index
      * nodes, and possibly reduces head index level.
      *
-     * Index nodes are cleared out simply by calling findPredecessor.
-     * which unlinks indexes to deleted nodes found along path to key,
-     * which will include the indexes to this node.  This is done
-     * unconditionally. We can't check beforehand whether there are
-     * index nodes because it might be the case that some or all
-     * indexes hadn't been inserted yet for this node during initial
-     * search for it, and we'd like to ensure lack of garbage
-     * retention, so must call to be sure.
-     *
      * @param key the key
      * @param value if non-null, the value that must be
      * associated with key
@@ -951,43 +758,36 @@
         if (key == null)
             throw new NullPointerException();
         Comparator<? super K> cmp = comparator;
-        outer: for (;;) {
-            for (Node<K,V> b = findPredecessor(key, cmp), n = b.next;;) {
-                Object v; int c;
-                if (n == null)
+        V result = null;
+        Node<K,V> b;
+        outer: while ((b = findPredecessor(key, cmp)) != null &&
+                      result == null) {
+            for (;;) {
+                Node<K,V> n; K k; V v; int c;
+                if ((n = b.next) == null)
                     break outer;
-                Node<K,V> f = n.next;
-                if (n != b.next)                    // inconsistent read
+                else if ((k = n.key) == null)
                     break;
-                if ((v = n.value) == null) {        // n is deleted
-                    n.helpDelete(b, f);
-                    break;
-                }
-                if (b.value == null || v == n)      // b is deleted
-                    break;
-                if ((c = cpr(cmp, key, n.key)) < 0)
-                    break outer;
-                if (c > 0) {
+                else if ((v = n.val) == null)
+                    unlinkNode(b, n);
+                else if ((c = cpr(cmp, key, k)) > 0)
                     b = n;
-                    n = f;
-                    continue;
-                }
-                if (value != null && !value.equals(v))
+                else if (c < 0)
                     break outer;
-                if (!n.casValue(v, null))
-                    break;
-                if (!n.appendMarker(f) || !b.casNext(n, f))
-                    findNode(key);                  // retry via findNode
-                else {
-                    findPredecessor(key, cmp);      // clean index
-                    if (head.right == null)
-                        tryReduceLevel();
+                else if (value != null && !value.equals(v))
+                    break outer;
+                else if (VAL.compareAndSet(n, v, null)) {
+                    result = v;
+                    unlinkNode(b, n);
+                    break; // loop to clean up
                 }
-                @SuppressWarnings("unchecked") V vv = (V)v;
-                return vv;
             }
         }
-        return null;
+        if (result != null) {
+            tryReduceLevel();
+            addCount(-1L);
+        }
+        return result;
     }
 
     /**
@@ -1011,125 +811,71 @@
      * reduction.
      */
     private void tryReduceLevel() {
-        HeadIndex<K,V> h = head;
-        HeadIndex<K,V> d;
-        HeadIndex<K,V> e;
-        if (h.level > 3 &&
-            (d = (HeadIndex<K,V>)h.down) != null &&
-            (e = (HeadIndex<K,V>)d.down) != null &&
-            e.right == null &&
-            d.right == null &&
-            h.right == null &&
-            casHead(h, d) && // try to set
-            h.right != null) // recheck
-            casHead(d, h);   // try to backout
+        Index<K,V> h, d, e;
+        if ((h = head) != null && h.right == null &&
+            (d = h.down) != null && d.right == null &&
+            (e = d.down) != null && e.right == null &&
+            HEAD.compareAndSet(this, h, d) &&
+            h.right != null)   // recheck
+            HEAD.compareAndSet(this, d, h);  // try to backout
     }
 
     /* ---------------- Finding and removing first element -------------- */
 
     /**
-     * Specialized variant of findNode to get first valid node.
+     * Gets first valid node, unlinking deleted nodes if encountered.
      * @return first node or null if empty
      */
     final Node<K,V> findFirst() {
-        for (Node<K,V> b, n;;) {
-            if ((n = (b = head.node).next) == null)
-                return null;
-            if (n.value != null)
-                return n;
-            n.helpDelete(b, n.next);
+        Node<K,V> b, n;
+        if ((b = baseHead()) != null) {
+            while ((n = b.next) != null) {
+                if (n.val == null)
+                    unlinkNode(b, n);
+                else
+                    return n;
+            }
         }
+        return null;
+    }
+
+    /**
+     * Entry snapshot version of findFirst
+     */
+    final AbstractMap.SimpleImmutableEntry<K,V> findFirstEntry() {
+        Node<K,V> b, n; V v;
+        if ((b = baseHead()) != null) {
+            while ((n = b.next) != null) {
+                if ((v = n.val) == null)
+                    unlinkNode(b, n);
+                else
+                    return new AbstractMap.SimpleImmutableEntry<K,V>(n.key, v);
+            }
+        }
+        return null;
     }
 
     /**
      * Removes first entry; returns its snapshot.
      * @return null if empty, else snapshot of first entry
      */
-    private Map.Entry<K,V> doRemoveFirstEntry() {
-        for (Node<K,V> b, n;;) {
-            if ((n = (b = head.node).next) == null)
-                return null;
-            Node<K,V> f = n.next;
-            if (n != b.next)
-                continue;
-            Object v = n.value;
-            if (v == null) {
-                n.helpDelete(b, f);
-                continue;
-            }
-            if (!n.casValue(v, null))
-                continue;
-            if (!n.appendMarker(f) || !b.casNext(n, f))
-                findFirst(); // retry
-            clearIndexToFirst();
-            @SuppressWarnings("unchecked") V vv = (V)v;
-            return new AbstractMap.SimpleImmutableEntry<K,V>(n.key, vv);
-        }
-    }
-
-    /**
-     * Clears out index nodes associated with deleted first entry.
-     */
-    private void clearIndexToFirst() {
-        for (;;) {
-            for (Index<K,V> q = head;;) {
-                Index<K,V> r = q.right;
-                if (r != null && r.indexesDeletedNode() && !q.unlink(r))
-                    break;
-                if ((q = q.down) == null) {
-                    if (head.right == null)
+    private AbstractMap.SimpleImmutableEntry<K,V> doRemoveFirstEntry() {
+        Node<K,V> b, n; V v;
+        if ((b = baseHead()) != null) {
+            while ((n = b.next) != null) {
+                if ((v = n.val) == null || VAL.compareAndSet(n, v, null)) {
+                    K k = n.key;
+                    unlinkNode(b, n);
+                    if (v != null) {
                         tryReduceLevel();
-                    return;
+                        findPredecessor(k, comparator); // clean index
+                        addCount(-1L);
+                        return new AbstractMap.SimpleImmutableEntry<K,V>(k, v);
+                    }
                 }
             }
         }
-    }
-
-    /**
-     * Removes last entry; returns its snapshot.
-     * Specialized variant of doRemove.
-     * @return null if empty, else snapshot of last entry
-     */
-    private Map.Entry<K,V> doRemoveLastEntry() {
-        for (;;) {
-            Node<K,V> b = findPredecessorOfLast();
-            Node<K,V> n = b.next;
-            if (n == null) {
-                if (b.isBaseHeader())               // empty
-                    return null;
-                else
-                    continue; // all b's successors are deleted; retry
-            }
-            for (;;) {
-                Node<K,V> f = n.next;
-                if (n != b.next)                    // inconsistent read
-                    break;
-                Object v = n.value;
-                if (v == null) {                    // n is deleted
-                    n.helpDelete(b, f);
-                    break;
-                }
-                if (b.value == null || v == n)      // b is deleted
-                    break;
-                if (f != null) {
-                    b = n;
-                    n = f;
-                    continue;
-                }
-                if (!n.casValue(v, null))
-                    break;
-                K key = n.key;
-                if (!n.appendMarker(f) || !b.casNext(n, f))
-                    findNode(key);                  // retry via findNode
-                else {                              // clean index
-                    findPredecessor(key, comparator);
-                    if (head.right == null)
-                        tryReduceLevel();
-                }
-                @SuppressWarnings("unchecked") V vv = (V)v;
-                return new AbstractMap.SimpleImmutableEntry<K,V>(key, vv);
-            }
-        }
+        return null;
     }
 
     /* ---------------- Finding and removing last element -------------- */
@@ -1139,73 +885,115 @@
      * @return last node or null if empty
      */
     final Node<K,V> findLast() {
-        /*
-         * findPredecessor can't be used to traverse index level
-         * because this doesn't use comparisons.  So traversals of
-         * both levels are folded together.
-         */
-        Index<K,V> q = head;
-        for (;;) {
-            Index<K,V> d, r;
-            if ((r = q.right) != null) {
-                if (r.indexesDeletedNode()) {
-                    q.unlink(r);
-                    q = head; // restart
+        outer: for (;;) {
+            Index<K,V> q; Node<K,V> b;
+            VarHandle.acquireFence();
+            if ((q = head) == null)
+                break;
+            for (Index<K,V> r, d;;) {
+                while ((r = q.right) != null) {
+                    Node<K,V> p;
+                    if ((p = r.node) == null || p.val == null)
+                        RIGHT.compareAndSet(q, r, r.right);
+                    else
+                        q = r;
                 }
-                else
-                    q = r;
-            } else if ((d = q.down) != null) {
-                q = d;
-            } else {
-                for (Node<K,V> b = q.node, n = b.next;;) {
-                    if (n == null)
-                        return b.isBaseHeader() ? null : b;
-                    Node<K,V> f = n.next;            // inconsistent read
-                    if (n != b.next)
-                        break;
-                    Object v = n.value;
-                    if (v == null) {                 // n is deleted
-                        n.helpDelete(b, f);
-                        break;
-                    }
-                    if (b.value == null || v == n)      // b is deleted
-                        break;
-                    b = n;
-                    n = f;
+                if ((d = q.down) != null)
+                    q = d;
+                else {
+                    b = q.node;
+                    break;
                 }
-                q = head; // restart
             }
+            if (b != null) {
+                for (;;) {
+                    Node<K,V> n;
+                    if ((n = b.next) == null) {
+                        if (b.key == null) // empty
+                            break outer;
+                        else
+                            return b;
+                    }
+                    else if (n.key == null)
+                        break;
+                    else if (n.val == null)
+                        unlinkNode(b, n);
+                    else
+                        b = n;
+                }
+            }
+        }
+        return null;
+    }
+
+    /**
+     * Entry version of findLast
+     * @return Entry for last node or null if empty
+     */
+    final AbstractMap.SimpleImmutableEntry<K,V> findLastEntry() {
+        for (;;) {
+            Node<K,V> n; V v;
+            if ((n = findLast()) == null)
+                return null;
+            if ((v = n.val) != null)
+                return new AbstractMap.SimpleImmutableEntry<K,V>(n.key, v);
         }
     }
 
     /**
-     * Specialized variant of findPredecessor to get predecessor of last
-     * valid node.  Needed when removing the last entry.  It is possible
-     * that all successors of returned node will have been deleted upon
-     * return, in which case this method can be retried.
-     * @return likely predecessor of last node
+     * Removes last entry; returns its snapshot.
+     * Specialized variant of doRemove.
+     * @return null if empty, else snapshot of last entry
      */
-    private Node<K,V> findPredecessorOfLast() {
-        for (;;) {
-            for (Index<K,V> q = head;;) {
-                Index<K,V> d, r;
-                if ((r = q.right) != null) {
-                    if (r.indexesDeletedNode()) {
-                        q.unlink(r);
-                        break;    // must restart
-                    }
-                    // proceed as far across as possible without overshooting
-                    if (r.node.next != null) {
-                        q = r;
-                        continue;
-                    }
+    private Map.Entry<K,V> doRemoveLastEntry() {
+        outer: for (;;) {
+            Index<K,V> q; Node<K,V> b;
+            VarHandle.acquireFence();
+            if ((q = head) == null)
+                break;
+            for (;;) {
+                Index<K,V> d, r; Node<K,V> p;
+                while ((r = q.right) != null) {
+                    if ((p = r.node) == null || p.val == null)
+                        RIGHT.compareAndSet(q, r, r.right);
+                    else if (p.next != null)
+                        q = r;  // continue only if a successor
+                    else
+                        break;
                 }
                 if ((d = q.down) != null)
                     q = d;
-                else
-                    return q.node;
+                else {
+                    b = q.node;
+                    break;
+                }
+            }
+            if (b != null) {
+                for (;;) {
+                    Node<K,V> n; K k; V v;
+                    if ((n = b.next) == null) {
+                        if (b.key == null) // empty
+                            break outer;
+                        else
+                            break; // retry
+                    }
+                    else if ((k = n.key) == null)
+                        break;
+                    else if ((v = n.val) == null)
+                        unlinkNode(b, n);
+                    else if (n.next != null)
+                        b = n;
+                    else if (VAL.compareAndSet(n, v, null)) {
+                        unlinkNode(b, n);
+                        tryReduceLevel();
+                        findPredecessor(k, comparator); // clean index
+                        addCount(-1L);
+                        return new AbstractMap.SimpleImmutableEntry<K,V>(k, v);
+                    }
+                }
             }
         }
+        return null;
     }
 
     /* ---------------- Relational operations -------------- */
@@ -1225,47 +1013,52 @@
     final Node<K,V> findNear(K key, int rel, Comparator<? super K> cmp) {
         if (key == null)
             throw new NullPointerException();
-        for (;;) {
-            for (Node<K,V> b = findPredecessor(key, cmp), n = b.next;;) {
-                Object v;
-                if (n == null)
-                    return ((rel & LT) == 0 || b.isBaseHeader()) ? null : b;
-                Node<K,V> f = n.next;
-                if (n != b.next)                  // inconsistent read
-                    break;
-                if ((v = n.value) == null) {      // n is deleted
-                    n.helpDelete(b, f);
-                    break;
+        Node<K,V> result;
+        outer: for (Node<K,V> b;;) {
+            if ((b = findPredecessor(key, cmp)) == null) {
+                result = null;
+                break;                   // empty
+            }
+            for (;;) {
+                Node<K,V> n; K k; int c;
+                if ((n = b.next) == null) {
+                    result = ((rel & LT) != 0 && b.key != null) ? b : null;
+                    break outer;
                 }
-                if (b.value == null || v == n)      // b is deleted
+                else if ((k = n.key) == null)
                     break;
-                int c = cpr(cmp, key, n.key);
-                if ((c == 0 && (rel & EQ) != 0) ||
-                    (c <  0 && (rel & LT) == 0))
-                    return n;
-                if ( c <= 0 && (rel & LT) != 0)
-                    return b.isBaseHeader() ? null : b;
-                b = n;
-                n = f;
+                else if (n.val == null)
+                    unlinkNode(b, n);
+                else if (((c = cpr(cmp, key, k)) == 0 && (rel & EQ) != 0) ||
+                         (c < 0 && (rel & LT) == 0)) {
+                    result = n;
+                    break outer;
+                }
+                else if (c <= 0 && (rel & LT) != 0) {
+                    result = (b.key != null) ? b : null;
+                    break outer;
+                }
+                else
+                    b = n;
             }
         }
+        return result;
     }
 
     /**
-     * Returns SimpleImmutableEntry for results of findNear.
+     * Variant of findNear returning SimpleImmutableEntry
      * @param key the key
      * @param rel the relation -- OR'ed combination of EQ, LT, GT
      * @return Entry fitting relation, or null if no such
      */
-    final AbstractMap.SimpleImmutableEntry<K,V> getNear(K key, int rel) {
-        Comparator<? super K> cmp = comparator;
+    final AbstractMap.SimpleImmutableEntry<K,V> findNearEntry(K key, int rel,
+                                                              Comparator<? super K> cmp) {
         for (;;) {
-            Node<K,V> n = findNear(key, rel, cmp);
-            if (n == null)
+            Node<K,V> n; V v;
+            if ((n = findNear(key, rel, cmp)) == null)
                 return null;
-            AbstractMap.SimpleImmutableEntry<K,V> e = n.createSnapshot();
-            if (e != null)
-                return e;
+            if ((v = n.val) != null)
+                return new AbstractMap.SimpleImmutableEntry<K,V>(n.key, v);
         }
     }
 
@@ -1277,7 +1070,6 @@
      */
     public ConcurrentSkipListMap() {
         this.comparator = null;
-        initialize();
     }
 
     /**
@@ -1290,7 +1082,6 @@
      */
     public ConcurrentSkipListMap(Comparator<? super K> comparator) {
         this.comparator = comparator;
-        initialize();
     }
 
     /**
@@ -1306,7 +1097,6 @@
      */
     public ConcurrentSkipListMap(Map<? extends K, ? extends V> m) {
         this.comparator = null;
-        initialize();
         putAll(m);
     }
 
@@ -1321,8 +1111,7 @@
      */
     public ConcurrentSkipListMap(SortedMap<K, ? extends V> m) {
         this.comparator = m.comparator();
-        initialize();
-        buildFromSorted(m);
+        buildFromSorted(m); // initializes transients
     }
 
     /**
@@ -1336,7 +1125,10 @@
             @SuppressWarnings("unchecked")
             ConcurrentSkipListMap<K,V> clone =
                 (ConcurrentSkipListMap<K,V>) super.clone();
-            clone.initialize();
+            clone.keySet = null;
+            clone.entrySet = null;
+            clone.values = null;
+            clone.descendingMap = null;
             clone.buildFromSorted(this);
             return clone;
         } catch (CloneNotSupportedException e) {
@@ -1352,58 +1144,49 @@
     private void buildFromSorted(SortedMap<K, ? extends V> map) {
         if (map == null)
             throw new NullPointerException();
-
-        HeadIndex<K,V> h = head;
-        Node<K,V> basepred = h.node;
-
-        // Track the current rightmost node at each level. Uses an
-        // ArrayList to avoid committing to initial or maximum level.
-        ArrayList<Index<K,V>> preds = new ArrayList<>();
-
-        // initialize
-        for (int i = 0; i <= h.level; ++i)
-            preds.add(null);
-        Index<K,V> q = h;
-        for (int i = h.level; i > 0; --i) {
-            preds.set(i, q);
-            q = q.down;
-        }
-
         Iterator<? extends Map.Entry<? extends K, ? extends V>> it =
             map.entrySet().iterator();
+
+        /*
+         * Add equally spaced indices at log intervals, using the bits
+         * of count during insertion. The maximum possible resulting
+         * level is less than the number of bits in a long (64). The
+         * preds array tracks the current rightmost node at each
+         * level.
+         */
+        @SuppressWarnings("unchecked")
+        Index<K,V>[] preds = (Index<K,V>[])new Index<?,?>[64];
+        Node<K,V> bp = new Node<K,V>(null, null, null);
+        Index<K,V> h = preds[0] = new Index<K,V>(bp, null, null);
+        long count = 0;
+
         while (it.hasNext()) {
             Map.Entry<? extends K, ? extends V> e = it.next();
-            int rnd = ThreadLocalRandom.current().nextInt();
-            int j = 0;
-            if ((rnd & 0x80000001) == 0) {
-                do {
-                    ++j;
-                } while (((rnd >>>= 1) & 1) != 0);
-                if (j > h.level) j = h.level + 1;
-            }
             K k = e.getKey();
             V v = e.getValue();
             if (k == null || v == null)
                 throw new NullPointerException();
             Node<K,V> z = new Node<K,V>(k, v, null);
-            basepred.next = z;
-            basepred = z;
-            if (j > 0) {
-                Index<K,V> idx = null;
-                for (int i = 1; i <= j; ++i) {
+            bp = bp.next = z;
+            if ((++count & 3L) == 0L) {
+                long m = count >>> 2;
+                int i = 0;
+                Index<K,V> idx = null, q;
+                do {
                     idx = new Index<K,V>(z, idx, null);
-                    if (i > h.level)
-                        h = new HeadIndex<K,V>(h.node, h, idx, i);
-
-                    if (i < preds.size()) {
-                        preds.get(i).right = idx;
-                        preds.set(i, idx);
-                    } else
-                        preds.add(idx);
-                }
+                    if ((q = preds[i]) == null)
+                        preds[i] = h = new Index<K,V>(h.node, h, idx);
+                    else
+                        preds[i] = q.right = idx;
+                } while (++i < preds.length && ((m >>>= 1) & 1L) != 0L);
             }
         }
-        head = h;
+        if (count != 0L) {
+            VarHandle.releaseFence(); // emulate volatile stores
+            addCount(count);
+            head = h;
+            VarHandle.fullFence();
+        }
     }
 
     /* ---------------- Serialization -------------- */
@@ -1425,11 +1208,14 @@
         s.defaultWriteObject();
 
         // Write out keys and values (alternating)
-        for (Node<K,V> n = findFirst(); n != null; n = n.next) {
-            V v = n.getValidValue();
-            if (v != null) {
-                s.writeObject(n.key);
-                s.writeObject(v);
+        Node<K,V> b, n; V v;
+        if ((b = baseHead()) != null) {
+            while ((n = b.next) != null) {
+                if ((v = n.val) != null) {
+                    s.writeObject(n.key);
+                    s.writeObject(v);
+                }
+                b = n;
             }
         }
         s.writeObject(null);
@@ -1447,64 +1233,47 @@
         throws java.io.IOException, ClassNotFoundException {
         // Read in the Comparator and any hidden stuff
         s.defaultReadObject();
-        // Reset transients
-        initialize();
 
-        /*
-         * This is nearly identical to buildFromSorted, but is
-         * distinct because readObject calls can't be nicely adapted
-         * as the kind of iterator needed by buildFromSorted. (They
-         * can be, but doing so requires type cheats and/or creation
-         * of adapter classes.) It is simpler to just adapt the code.
-         */
-
-        HeadIndex<K,V> h = head;
-        Node<K,V> basepred = h.node;
-        ArrayList<Index<K,V>> preds = new ArrayList<>();
-        for (int i = 0; i <= h.level; ++i)
-            preds.add(null);
-        Index<K,V> q = h;
-        for (int i = h.level; i > 0; --i) {
-            preds.set(i, q);
-            q = q.down;
-        }
+        // Same idea as buildFromSorted
+        @SuppressWarnings("unchecked")
+        Index<K,V>[] preds = (Index<K,V>[])new Index<?,?>[64];
+        Node<K,V> bp = new Node<K,V>(null, null, null);
+        Index<K,V> h = preds[0] = new Index<K,V>(bp, null, null);
+        Comparator<? super K> cmp = comparator;
+        K prevKey = null;
+        long count = 0;
 
         for (;;) {
-            Object k = s.readObject();
+            K k = (K)s.readObject();
             if (k == null)
                 break;
-            Object v = s.readObject();
+            V v = (V)s.readObject();
             if (v == null)
                 throw new NullPointerException();
-            K key = (K) k;
-            V val = (V) v;
-            int rnd = ThreadLocalRandom.current().nextInt();
-            int j = 0;
-            if ((rnd & 0x80000001) == 0) {
+            if (prevKey != null && cpr(cmp, prevKey, k) > 0)
+                throw new IllegalStateException("out of order");
+            prevKey = k;
+            Node<K,V> z = new Node<K,V>(k, v, null);
+            bp = bp.next = z;
+            if ((++count & 3L) == 0L) {
+                long m = count >>> 2;
+                int i = 0;
+                Index<K,V> idx = null, q;
                 do {
-                    ++j;
-                } while (((rnd >>>= 1) & 1) != 0);
-                if (j > h.level) j = h.level + 1;
-            }
-            Node<K,V> z = new Node<K,V>(key, val, null);
-            basepred.next = z;
-            basepred = z;
-            if (j > 0) {
-                Index<K,V> idx = null;
-                for (int i = 1; i <= j; ++i) {
                     idx = new Index<K,V>(z, idx, null);
-                    if (i > h.level)
-                        h = new HeadIndex<K,V>(h.node, h, idx, i);
-
-                    if (i < preds.size()) {
-                        preds.get(i).right = idx;
-                        preds.set(i, idx);
-                    } else
-                        preds.add(idx);
-                }
+                    if ((q = preds[i]) == null)
+                        preds[i] = h = new Index<K,V>(h.node, h, idx);
+                    else
+                        preds[i] = q.right = idx;
+                } while (++i < preds.length && ((m >>>= 1) & 1L) != 0L);
             }
         }
-        head = h;
+        if (count != 0L) {
+            VarHandle.releaseFence();
+            addCount(count);
+            head = h;
+            VarHandle.fullFence();
+        }
     }
 
     /* ------ Map API methods ------ */
@@ -1605,42 +1374,30 @@
     public boolean containsValue(Object value) {
         if (value == null)
             throw new NullPointerException();
-        for (Node<K,V> n = findFirst(); n != null; n = n.next) {
-            V v = n.getValidValue();
-            if (v != null && value.equals(v))
-                return true;
+        Node<K,V> b, n; V v;
+        if ((b = baseHead()) != null) {
+            while ((n = b.next) != null) {
+                if ((v = n.val) != null && value.equals(v))
+                    return true;
+                else
+                    b = n;
+            }
         }
         return false;
     }
 
     /**
-     * Returns the number of key-value mappings in this map.  If this map
-     * contains more than {@code Integer.MAX_VALUE} elements, it
-     * returns {@code Integer.MAX_VALUE}.
-     *
-     * <p>Beware that, unlike in most collections, this method is
-     * <em>NOT</em> a constant-time operation. Because of the
-     * asynchronous nature of these maps, determining the current
-     * number of elements requires traversing them all to count them.
-     * Additionally, it is possible for the size to change during
-     * execution of this method, in which case the returned result
-     * will be inaccurate. Thus, this method is typically not very
-     * useful in concurrent applications.
-     *
-     * @return the number of elements in this map
+     * {@inheritDoc}
      */
     public int size() {
-        long count = 0;
-        for (Node<K,V> n = findFirst(); n != null; n = n.next) {
-            if (n.getValidValue() != null)
-                ++count;
-        }
-        return (count >= Integer.MAX_VALUE) ? Integer.MAX_VALUE : (int) count;
+        long c;
+        return ((baseHead() == null) ? 0 :
+                ((c = getAdderCount()) >= Integer.MAX_VALUE) ?
+                Integer.MAX_VALUE : (int) c);
     }
 
     /**
-     * Returns {@code true} if this map contains no key-value mappings.
-     * @return {@code true} if this map contains no key-value mappings
+     * {@inheritDoc}
      */
     public boolean isEmpty() {
         return findFirst() == null;
@@ -1650,7 +1407,33 @@
      * Removes all of the mappings from this map.
      */
     public void clear() {
-        initialize();
+        Index<K,V> h, r, d; Node<K,V> b;
+        VarHandle.acquireFence();
+        while ((h = head) != null) {
+            if ((r = h.right) != null)        // remove indices
+                RIGHT.compareAndSet(h, r, null);
+            else if ((d = h.down) != null)    // remove levels
+                HEAD.compareAndSet(this, h, d);
+            else {
+                long count = 0L;
+                if ((b = h.node) != null) {    // remove nodes
+                    Node<K,V> n; V v;
+                    while ((n = b.next) != null) {
+                        if ((v = n.val) != null &&
+                            VAL.compareAndSet(n, v, null)) {
+                            --count;
+                            v = null;
+                        }
+                        if (v == null)
+                            unlinkNode(b, n);
+                    }
+                }
+                if (count != 0L)
+                    addCount(count);
+                else
+                    break;
+            }
+        }
     }
 
     /**
@@ -1696,16 +1479,15 @@
                               BiFunction<? super K, ? super V, ? extends V> remappingFunction) {
         if (key == null || remappingFunction == null)
             throw new NullPointerException();
-        Node<K,V> n; Object v;
+        Node<K,V> n; V v;
         while ((n = findNode(key)) != null) {
-            if ((v = n.value) != null) {
-                @SuppressWarnings("unchecked") V vv = (V) v;
-                V r = remappingFunction.apply(key, vv);
+            if ((v = n.val) != null) {
+                V r = remappingFunction.apply(key, v);
                 if (r != null) {
-                    if (n.casValue(vv, r))
+                    if (VAL.compareAndSet(n, v, r))
                         return r;
                 }
-                else if (doRemove(key, vv) != null)
+                else if (doRemove(key, v) != null)
                     break;
             }
         }
@@ -1730,20 +1512,19 @@
         if (key == null || remappingFunction == null)
             throw new NullPointerException();
         for (;;) {
-            Node<K,V> n; Object v; V r;
+            Node<K,V> n; V v; V r;
             if ((n = findNode(key)) == null) {
                 if ((r = remappingFunction.apply(key, null)) == null)
                     break;
                 if (doPut(key, r, true) == null)
                     return r;
             }
-            else if ((v = n.value) != null) {
-                @SuppressWarnings("unchecked") V vv = (V) v;
-                if ((r = remappingFunction.apply(key, vv)) != null) {
-                    if (n.casValue(vv, r))
+            else if ((v = n.val) != null) {
+                if ((r = remappingFunction.apply(key, v)) != null) {
+                    if (VAL.compareAndSet(n, v, r))
                         return r;
                 }
-                else if (doRemove(key, vv) != null)
+                else if (doRemove(key, v) != null)
                     break;
             }
         }
@@ -1770,18 +1551,17 @@
         if (key == null || value == null || remappingFunction == null)
             throw new NullPointerException();
         for (;;) {
-            Node<K,V> n; Object v; V r;
+            Node<K,V> n; V v; V r;
             if ((n = findNode(key)) == null) {
                 if (doPut(key, value, true) == null)
                     return value;
             }
-            else if ((v = n.value) != null) {
-                @SuppressWarnings("unchecked") V vv = (V) v;
-                if ((r = remappingFunction.apply(vv, value)) != null) {
-                    if (n.casValue(vv, r))
+            else if ((v = n.val) != null) {
+                if ((r = remappingFunction.apply(v, value)) != null) {
+                    if (VAL.compareAndSet(n, v, r))
                         return r;
                 }
-                else if (doRemove(key, vv) != null)
+                else if (doRemove(key, v) != null)
                     return null;
             }
         }
@@ -1805,9 +1585,11 @@
      * The set's spliterator additionally reports {@link Spliterator#CONCURRENT},
      * {@link Spliterator#NONNULL}, {@link Spliterator#SORTED} and
      * {@link Spliterator#ORDERED}, with an encounter order that is ascending
-     * key order.  The spliterator's comparator (see
-     * {@link java.util.Spliterator#getComparator()}) is {@code null} if
-     * the map's comparator (see {@link #comparator()}) is {@code null}.
+     * key order.
+     *
+     * <p>The {@linkplain Spliterator#getComparator() spliterator's comparator}
+     * is {@code null} if the {@linkplain #comparator() map's comparator}
+     * is {@code null}.
      * Otherwise, the spliterator's comparator is the same as or imposes the
      * same total ordering as the map's comparator.
      *
@@ -1827,13 +1609,15 @@
      * @return a navigable set view of the keys in this map
      */
     public NavigableSet<K> keySet() {
-        KeySet<K,V> ks = keySet;
-        return (ks != null) ? ks : (keySet = new KeySet<>(this));
+        KeySet<K,V> ks;
+        if ((ks = keySet) != null) return ks;
+        return keySet = new KeySet<>(this);
     }
 
     public NavigableSet<K> navigableKeySet() {
-        KeySet<K,V> ks = keySet;
-        return (ks != null) ? ks : (keySet = new KeySet<>(this));
+        KeySet<K,V> ks;
+        if ((ks = keySet) != null) return ks;
+        return keySet = new KeySet<>(this);
     }
 
     /**
@@ -1856,8 +1640,9 @@
      * <a href="package-summary.html#Weakly"><i>weakly consistent</i></a>.
      */
     public Collection<V> values() {
-        Values<K,V> vs = values;
-        return (vs != null) ? vs : (values = new Values<>(this));
+        Values<K,V> vs;
+        if ((vs = values) != null) return vs;
+        return values = new Values<>(this);
     }
 
     /**
@@ -1888,14 +1673,16 @@
      *         sorted in ascending key order
      */
     public Set<Map.Entry<K,V>> entrySet() {
-        EntrySet<K,V> es = entrySet;
-        return (es != null) ? es : (entrySet = new EntrySet<K,V>(this));
+        EntrySet<K,V> es;
+        if ((es = entrySet) != null) return es;
+        return entrySet = new EntrySet<K,V>(this);
     }
 
     public ConcurrentNavigableMap<K,V> descendingMap() {
-        ConcurrentNavigableMap<K,V> dm = descendingMap;
-        return (dm != null) ? dm : (descendingMap = new SubMap<K,V>
-                                    (this, null, false, null, false, true));
+        ConcurrentNavigableMap<K,V> dm;
+        if ((dm = descendingMap) != null) return dm;
+        return descendingMap =
+            new SubMap<K,V>(this, null, false, null, false, true);
     }
 
     public NavigableSet<K> descendingKeySet() {
@@ -1923,19 +1710,61 @@
             return false;
         Map<?,?> m = (Map<?,?>) o;
         try {
-            for (Map.Entry<K,V> e : this.entrySet())
-                if (! e.getValue().equals(m.get(e.getKey())))
-                    return false;
-            for (Map.Entry<?,?> e : m.entrySet()) {
-                Object k = e.getKey();
-                Object v = e.getValue();
-                if (k == null || v == null || !v.equals(get(k)))
-                    return false;
+            Comparator<? super K> cmp = comparator;
+            @SuppressWarnings("unchecked")
+            Iterator<Map.Entry<?,?>> it =
+                (Iterator<Map.Entry<?,?>>)m.entrySet().iterator();
+            if (m instanceof SortedMap &&
+                ((SortedMap<?,?>)m).comparator() == cmp) {
+                Node<K,V> b, n;
+                if ((b = baseHead()) != null) {
+                    while ((n = b.next) != null) {
+                        K k; V v;
+                        if ((v = n.val) != null && (k = n.key) != null) {
+                            if (!it.hasNext())
+                                return false;
+                            Map.Entry<?,?> e = it.next();
+                            Object mk = e.getKey();
+                            Object mv = e.getValue();
+                            if (mk == null || mv == null)
+                                return false;
+                            try {
+                                if (cpr(cmp, k, mk) != 0)
+                                    return false;
+                            } catch (ClassCastException cce) {
+                                return false;
+                            }
+                            if (!mv.equals(v))
+                                return false;
+                        }
+                        b = n;
+                    }
+                }
+                return !it.hasNext();
             }
-            return true;
-        } catch (ClassCastException unused) {
-            return false;
-        } catch (NullPointerException unused) {
+            else {
+                while (it.hasNext()) {
+                    V v;
+                    Map.Entry<?,?> e = it.next();
+                    Object mk = e.getKey();
+                    Object mv = e.getValue();
+                    if (mk == null || mv == null ||
+                        (v = get(mk)) == null || !v.equals(mv))
+                        return false;
+                }
+                Node<K,V> b, n;
+                if ((b = baseHead()) != null) {
+                    K k; V v; Object mv;
+                    while ((n = b.next) != null) {
+                        if ((v = n.val) != null && (k = n.key) != null &&
+                            ((mv = m.get(k)) == null || !mv.equals(v)))
+                            return false;
+                        b = n;
+                    }
+                }
+                return true;
+            }
+        } catch (ClassCastException | NullPointerException unused) {
             return false;
         }
     }
@@ -1981,13 +1810,13 @@
         if (key == null || oldValue == null || newValue == null)
             throw new NullPointerException();
         for (;;) {
-            Node<K,V> n; Object v;
+            Node<K,V> n; V v;
             if ((n = findNode(key)) == null)
                 return false;
-            if ((v = n.value) != null) {
+            if ((v = n.val) != null) {
                 if (!oldValue.equals(v))
                     return false;
-                if (n.casValue(v, newValue))
+                if (VAL.compareAndSet(n, v, newValue))
                     return true;
             }
         }
@@ -2006,13 +1835,11 @@
         if (key == null || value == null)
             throw new NullPointerException();
         for (;;) {
-            Node<K,V> n; Object v;
+            Node<K,V> n; V v;
             if ((n = findNode(key)) == null)
                 return null;
-            if ((v = n.value) != null && n.casValue(v, value)) {
-                @SuppressWarnings("unchecked") V vv = (V)v;
-                return vv;
-            }
+            if ((v = n.val) != null && VAL.compareAndSet(n, v, value))
+                return v;
         }
     }
 
@@ -2122,7 +1949,7 @@
      * @throws NullPointerException if the specified key is null
      */
     public Map.Entry<K,V> lowerEntry(K key) {
-        return getNear(key, LT);
+        return findNearEntry(key, LT, comparator);
     }
 
     /**
@@ -2145,7 +1972,7 @@
      * @throws NullPointerException if the specified key is null
      */
     public Map.Entry<K,V> floorEntry(K key) {
-        return getNear(key, LT|EQ);
+        return findNearEntry(key, LT|EQ, comparator);
     }
 
     /**
@@ -2168,7 +1995,7 @@
      * @throws NullPointerException if the specified key is null
      */
     public Map.Entry<K,V> ceilingEntry(K key) {
-        return getNear(key, GT|EQ);
+        return findNearEntry(key, GT|EQ, comparator);
     }
 
     /**
@@ -2191,7 +2018,7 @@
      * @throws NullPointerException if the specified key is null
      */
     public Map.Entry<K,V> higherEntry(K key) {
-        return getNear(key, GT);
+        return findNearEntry(key, GT, comparator);
     }
 
     /**
@@ -2211,14 +2038,7 @@
      * the {@code Entry.setValue} method.
      */
     public Map.Entry<K,V> firstEntry() {
-        for (;;) {
-            Node<K,V> n = findFirst();
-            if (n == null)
-                return null;
-            AbstractMap.SimpleImmutableEntry<K,V> e = n.createSnapshot();
-            if (e != null)
-                return e;
-        }
+        return findFirstEntry();
     }
 
     /**
@@ -2228,14 +2048,7 @@
      * the {@code Entry.setValue} method.
      */
     public Map.Entry<K,V> lastEntry() {
-        for (;;) {
-            Node<K,V> n = findLast();
-            if (n == null)
-                return null;
-            AbstractMap.SimpleImmutableEntry<K,V> e = n.createSnapshot();
-            if (e != null)
-                return e;
-        }
+        return findLastEntry();
     }
 
     /**
@@ -2258,11 +2071,10 @@
         return doRemoveLastEntry();
     }
 
-
     /* ---------------- Iterators -------------- */
 
     /**
-     * Base of iterator classes:
+     * Base of iterator classes
      */
     abstract class Iter<T> implements Iterator<T> {
         /** the last node returned by next() */
@@ -2274,14 +2086,7 @@
 
         /** Initializes ascending iterator for entire range. */
         Iter() {
-            while ((next = findFirst()) != null) {
-                Object x = next.value;
-                if (x != null && x != next) {
-                    @SuppressWarnings("unchecked") V vv = (V)x;
-                    nextValue = vv;
-                    break;
-                }
-            }
+            advance(baseHead());
         }
 
         public final boolean hasNext() {
@@ -2289,54 +2094,58 @@
         }
 
         /** Advances next to higher entry. */
-        final void advance() {
-            if (next == null)
-                throw new NoSuchElementException();
-            lastReturned = next;
-            while ((next = next.next) != null) {
-                Object x = next.value;
-                if (x != null && x != next) {
-                    @SuppressWarnings("unchecked") V vv = (V)x;
-                    nextValue = vv;
-                    break;
-                }
+        final void advance(Node<K,V> b) {
+            Node<K,V> n = null;
+            V v = null;
+            if ((lastReturned = b) != null) {
+                while ((n = b.next) != null && (v = n.val) == null)
+                    b = n;
             }
+            nextValue = v;
+            next = n;
         }
 
-        public void remove() {
-            Node<K,V> l = lastReturned;
-            if (l == null)
+        public final void remove() {
+            Node<K,V> n; K k;
+            if ((n = lastReturned) == null || (k = n.key) == null)
                 throw new IllegalStateException();
             // It would not be worth all of the overhead to directly
             // unlink from here. Using remove is fast enough.
-            ConcurrentSkipListMap.this.remove(l.key);
+            ConcurrentSkipListMap.this.remove(k);
             lastReturned = null;
         }
-
     }
 
     final class ValueIterator extends Iter<V> {
         public V next() {
-            V v = nextValue;
-            advance();
+            V v;
+            if ((v = nextValue) == null)
+                throw new NoSuchElementException();
+            advance(next);
             return v;
         }
     }
 
     final class KeyIterator extends Iter<K> {
         public K next() {
-            Node<K,V> n = next;
-            advance();
-            return n.key;
+            Node<K,V> n;
+            if ((n = next) == null)
+                throw new NoSuchElementException();
+            K k = n.key;
+            advance(n);
+            return k;
         }
     }
 
     final class EntryIterator extends Iter<Map.Entry<K,V>> {
         public Map.Entry<K,V> next() {
-            Node<K,V> n = next;
+            Node<K,V> n;
+            if ((n = next) == null)
+                throw new NoSuchElementException();
+            K k = n.key;
             V v = nextValue;
-            advance();
-            return new AbstractMap.SimpleImmutableEntry<K,V>(n.key, v);
+            advance(n);
+            return new AbstractMap.SimpleImmutableEntry<K,V>(k, v);
         }
     }
 
@@ -2393,9 +2202,7 @@
             Collection<?> c = (Collection<?>) o;
             try {
                 return containsAll(c) && c.containsAll(this);
-            } catch (ClassCastException unused) {
-                return false;
-            } catch (NullPointerException unused) {
+            } catch (ClassCastException | NullPointerException unused) {
                 return false;
             }
         }
@@ -2520,9 +2327,7 @@
             Collection<?> c = (Collection<?>) o;
             try {
                 return containsAll(c) && c.containsAll(this);
-            } catch (ClassCastException unused) {
-                return false;
-            } catch (NullPointerException unused) {
+            } catch (ClassCastException | NullPointerException unused) {
                 return false;
             }
         }
@@ -2564,7 +2369,7 @@
      * @serial include
      */
     static final class SubMap<K,V> extends AbstractMap<K,V>
-        implements ConcurrentNavigableMap<K,V>, Cloneable, Serializable {
+        implements ConcurrentNavigableMap<K,V>, Serializable {
         private static final long serialVersionUID = -7647078645895051609L;
 
         /** Underlying map */
@@ -2582,8 +2387,8 @@
 
         // Lazily initialized view holders
         private transient KeySet<K,V> keySetView;
-        private transient Set<Map.Entry<K,V>> entrySetView;
-        private transient Collection<V> valuesView;
+        private transient Values<K,V> valuesView;
+        private transient EntrySet<K,V> entrySetView;
 
         /**
          * Creates a new submap, initializing all fields.
@@ -2642,9 +2447,7 @@
             if (k == null) // pass by markers and headers
                 return true;
             int c = cpr(cmp, k, hi);
-            if (c > 0 || (c == 0 && !hiInclusive))
-                return false;
-            return true;
+            return c < 0 || (c == 0 && hiInclusive);
         }
 
         /**
@@ -2702,38 +2505,34 @@
         Map.Entry<K,V> lowestEntry() {
             Comparator<? super K> cmp = m.comparator;
             for (;;) {
-                ConcurrentSkipListMap.Node<K,V> n = loNode(cmp);
-                if (!isBeforeEnd(n, cmp))
+                ConcurrentSkipListMap.Node<K,V> n; V v;
+                if ((n = loNode(cmp)) == null || !isBeforeEnd(n, cmp))
                     return null;
-                Map.Entry<K,V> e = n.createSnapshot();
-                if (e != null)
-                    return e;
+                else if ((v = n.val) != null)
+                    return new AbstractMap.SimpleImmutableEntry<K,V>(n.key, v);
             }
         }
 
         Map.Entry<K,V> highestEntry() {
             Comparator<? super K> cmp = m.comparator;
             for (;;) {
-                ConcurrentSkipListMap.Node<K,V> n = hiNode(cmp);
-                if (n == null || !inBounds(n.key, cmp))
+                ConcurrentSkipListMap.Node<K,V> n; V v;
+                if ((n = hiNode(cmp)) == null || !inBounds(n.key, cmp))
                     return null;
-                Map.Entry<K,V> e = n.createSnapshot();
-                if (e != null)
-                    return e;
+                else if ((v = n.val) != null)
+                    return new AbstractMap.SimpleImmutableEntry<K,V>(n.key, v);
             }
         }
 
         Map.Entry<K,V> removeLowest() {
             Comparator<? super K> cmp = m.comparator;
             for (;;) {
-                Node<K,V> n = loNode(cmp);
-                if (n == null)
+                ConcurrentSkipListMap.Node<K,V> n; K k; V v;
+                if ((n = loNode(cmp)) == null)
                     return null;
-                K k = n.key;
-                if (!inBounds(k, cmp))
+                else if (!inBounds((k = n.key), cmp))
                     return null;
-                V v = m.doRemove(k, null);
-                if (v != null)
+                else if ((v = m.doRemove(k, null)) != null)
                     return new AbstractMap.SimpleImmutableEntry<K,V>(k, v);
             }
         }
@@ -2741,20 +2540,18 @@
         Map.Entry<K,V> removeHighest() {
             Comparator<? super K> cmp = m.comparator;
             for (;;) {
-                Node<K,V> n = hiNode(cmp);
-                if (n == null)
+                ConcurrentSkipListMap.Node<K,V> n; K k; V v;
+                if ((n = hiNode(cmp)) == null)
                     return null;
-                K k = n.key;
-                if (!inBounds(k, cmp))
+                else if (!inBounds((k = n.key), cmp))
                     return null;
-                V v = m.doRemove(k, null);
-                if (v != null)
+                else if ((v = m.doRemove(k, null)) != null)
                     return new AbstractMap.SimpleImmutableEntry<K,V>(k, v);
             }
         }
 
         /**
-         * Submap version of ConcurrentSkipListMap.getNearEntry.
+         * Submap version of ConcurrentSkipListMap.findNearEntry.
          */
         Map.Entry<K,V> getNearEntry(K key, int rel) {
             Comparator<? super K> cmp = m.comparator;
@@ -2768,15 +2565,12 @@
                 return ((rel & LT) != 0) ? null : lowestEntry();
             if (tooHigh(key, cmp))
                 return ((rel & LT) != 0) ? highestEntry() : null;
-            for (;;) {
-                Node<K,V> n = m.findNear(key, rel, cmp);
-                if (n == null || !inBounds(n.key, cmp))
-                    return null;
-                K k = n.key;
-                V v = n.getValidValue();
-                if (v != null)
-                    return new AbstractMap.SimpleImmutableEntry<K,V>(k, v);
-            }
+            AbstractMap.SimpleImmutableEntry<K,V> e =
+                m.findNearEntry(key, rel, cmp);
+            if (e == null || !inBounds(e.getKey(), cmp))
+                return null;
+            else
+                return e;
         }
 
         // Almost the same as getNearEntry, except for keys
@@ -2811,10 +2605,8 @@
                 Node<K,V> n = m.findNear(key, rel, cmp);
                 if (n == null || !inBounds(n.key, cmp))
                     return null;
-                K k = n.key;
-                V v = n.getValidValue();
-                if (v != null)
-                    return k;
+                if (n.val != null)
+                    return n.key;
             }
         }
 
@@ -2845,7 +2637,7 @@
             for (ConcurrentSkipListMap.Node<K,V> n = loNode(cmp);
                  isBeforeEnd(n, cmp);
                  n = n.next) {
-                if (n.getValidValue() != null)
+                if (n.val != null)
                     ++count;
             }
             return count >= Integer.MAX_VALUE ? Integer.MAX_VALUE : (int)count;
@@ -2863,7 +2655,7 @@
             for (ConcurrentSkipListMap.Node<K,V> n = loNode(cmp);
                  isBeforeEnd(n, cmp);
                  n = n.next) {
-                V v = n.getValidValue();
+                V v = n.val;
                 if (v != null && value.equals(v))
                     return true;
             }
@@ -2875,7 +2667,7 @@
             for (ConcurrentSkipListMap.Node<K,V> n = loNode(cmp);
                  isBeforeEnd(n, cmp);
                  n = n.next) {
-                if (n.getValidValue() != null)
+                if (n.val != null)
                     m.remove(n.key);
             }
         }
@@ -3049,23 +2841,27 @@
         /* ---------------- Submap Views -------------- */
 
         public NavigableSet<K> keySet() {
-            KeySet<K,V> ks = keySetView;
-            return (ks != null) ? ks : (keySetView = new KeySet<>(this));
+            KeySet<K,V> ks;
+            if ((ks = keySetView) != null) return ks;
+            return keySetView = new KeySet<>(this);
         }
 
         public NavigableSet<K> navigableKeySet() {
-            KeySet<K,V> ks = keySetView;
-            return (ks != null) ? ks : (keySetView = new KeySet<>(this));
+            KeySet<K,V> ks;
+            if ((ks = keySetView) != null) return ks;
+            return keySetView = new KeySet<>(this);
         }
 
         public Collection<V> values() {
-            Collection<V> vs = valuesView;
-            return (vs != null) ? vs : (valuesView = new Values<>(this));
+            Values<K,V> vs;
+            if ((vs = valuesView) != null) return vs;
+            return valuesView = new Values<>(this);
         }
 
         public Set<Map.Entry<K,V>> entrySet() {
-            Set<Map.Entry<K,V>> es = entrySetView;
-            return (es != null) ? es : (entrySetView = new EntrySet<K,V>(this));
+            EntrySet<K,V> es;
+            if ((es = entrySetView) != null) return es;
+            return entrySetView = new EntrySet<K,V>(this);
         }
 
         public NavigableSet<K> descendingKeySet() {
@@ -3085,19 +2881,18 @@
             V nextValue;
 
             SubMapIter() {
+                VarHandle.acquireFence();
                 Comparator<? super K> cmp = m.comparator;
                 for (;;) {
                     next = isDescending ? hiNode(cmp) : loNode(cmp);
                     if (next == null)
                         break;
-                    Object x = next.value;
-                    if (x != null && x != next) {
+                    V x = next.val;
+                    if (x != null) {
                         if (! inBounds(next.key, cmp))
                             next = null;
-                        else {
-                            @SuppressWarnings("unchecked") V vv = (V)x;
-                            nextValue = vv;
-                        }
+                        else
+                            nextValue = x;
                         break;
                     }
                 }
@@ -3123,14 +2918,12 @@
                     next = next.next;
                     if (next == null)
                         break;
-                    Object x = next.value;
-                    if (x != null && x != next) {
+                    V x = next.val;
+                    if (x != null) {
                         if (tooHigh(next.key, cmp))
                             next = null;
-                        else {
-                            @SuppressWarnings("unchecked") V vv = (V)x;
-                            nextValue = vv;
-                        }
+                        else
+                            nextValue = x;
                         break;
                     }
                 }
@@ -3142,14 +2935,12 @@
                     next = m.findNear(lastReturned.key, LT, cmp);
                     if (next == null)
                         break;
-                    Object x = next.value;
-                    if (x != null && x != next) {
+                    V x = next.val;
+                    if (x != null) {
                         if (tooLow(next.key, cmp))
                             next = null;
-                        else {
-                            @SuppressWarnings("unchecked") V vv = (V)x;
-                            nextValue = vv;
-                        }
+                        else
+                            nextValue = x;
                         break;
                     }
                 }
@@ -3229,22 +3020,28 @@
 
     public void forEach(BiConsumer<? super K, ? super V> action) {
         if (action == null) throw new NullPointerException();
-        V v;
-        for (Node<K,V> n = findFirst(); n != null; n = n.next) {
-            if ((v = n.getValidValue()) != null)
-                action.accept(n.key, v);
+        Node<K,V> b, n; V v;
+        if ((b = baseHead()) != null) {
+            while ((n = b.next) != null) {
+                if ((v = n.val) != null)
+                    action.accept(n.key, v);
+                b = n;
+            }
         }
     }
 
     public void replaceAll(BiFunction<? super K, ? super V, ? extends V> function) {
         if (function == null) throw new NullPointerException();
-        V v;
-        for (Node<K,V> n = findFirst(); n != null; n = n.next) {
-            while ((v = n.getValidValue()) != null) {
-                V r = function.apply(n.key, v);
-                if (r == null) throw new NullPointerException();
-                if (n.casValue(v, r))
-                    break;
+        Node<K,V> b, n; V v;
+        if ((b = baseHead()) != null) {
+            while ((n = b.next) != null) {
+                while ((v = n.val) != null) {
+                    V r = function.apply(n.key, v);
+                    if (r == null) throw new NullPointerException();
+                    if (VAL.compareAndSet(n, v, r))
+                        break;
+                }
+                b = n;
             }
         }
     }
@@ -3255,13 +3052,16 @@
     boolean removeEntryIf(Predicate<? super Entry<K,V>> function) {
         if (function == null) throw new NullPointerException();
         boolean removed = false;
-        for (Node<K,V> n = findFirst(); n != null; n = n.next) {
-            V v;
-            if ((v = n.getValidValue()) != null) {
-                K k = n.key;
-                Map.Entry<K,V> e = new AbstractMap.SimpleImmutableEntry<>(k, v);
-                if (function.test(e) && remove(k, v))
-                    removed = true;
+        Node<K,V> b, n; V v;
+        if ((b = baseHead()) != null) {
+            while ((n = b.next) != null) {
+                if ((v = n.val) != null) {
+                    K k = n.key;
+                    Map.Entry<K,V> e = new AbstractMap.SimpleImmutableEntry<>(k, v);
+                    if (function.test(e) && remove(k, v))
+                        removed = true;
+                }
+                b = n;
             }
         }
         return removed;
@@ -3273,12 +3073,12 @@
     boolean removeValueIf(Predicate<? super V> function) {
         if (function == null) throw new NullPointerException();
         boolean removed = false;
-        for (Node<K,V> n = findFirst(); n != null; n = n.next) {
-            V v;
-            if ((v = n.getValidValue()) != null) {
-                K k = n.key;
-                if (function.test(v) && remove(k, v))
+        Node<K,V> b, n; V v;
+        if ((b = baseHead()) != null) {
+            while ((n = b.next) != null) {
+                if ((v = n.val) != null && function.test(v) && remove(n.key, v))
                     removed = true;
+                b = n;
             }
         }
         return removed;
@@ -3296,30 +3096,27 @@
      * off, or the end of row is encountered. Control of the number of
      * splits relies on some statistical estimation: The expected
      * remaining number of elements of a skip list when advancing
-     * either across or down decreases by about 25%. To make this
-     * observation useful, we need to know initial size, which we
-     * don't. But we can just use Integer.MAX_VALUE so that we
-     * don't prematurely zero out while splitting.
+     * either across or down decreases by about 25%.
      */
     abstract static class CSLMSpliterator<K,V> {
         final Comparator<? super K> comparator;
         final K fence;     // exclusive upper bound for keys, or null if to end
         Index<K,V> row;    // the level to split out
         Node<K,V> current; // current traversal node; initialize at origin
-        int est;           // pseudo-size estimate
+        long est;          // size estimate
         CSLMSpliterator(Comparator<? super K> comparator, Index<K,V> row,
-                        Node<K,V> origin, K fence, int est) {
+                        Node<K,V> origin, K fence, long est) {
             this.comparator = comparator; this.row = row;
             this.current = origin; this.fence = fence; this.est = est;
         }
 
-        public final long estimateSize() { return (long)est; }
+        public final long estimateSize() { return est; }
     }
 
     static final class KeySpliterator<K,V> extends CSLMSpliterator<K,V>
         implements Spliterator<K> {
         KeySpliterator(Comparator<? super K> comparator, Index<K,V> row,
-                       Node<K,V> origin, K fence, int est) {
+                       Node<K,V> origin, K fence, long est) {
             super(comparator, row, origin, fence, est);
         }
 
@@ -3331,7 +3128,7 @@
                 for (Index<K,V> q = row; q != null; q = row = q.down) {
                     Index<K,V> s; Node<K,V> b, n; K sk;
                     if ((s = q.right) != null && (b = s.node) != null &&
-                        (n = b.next) != null && n.value != null &&
+                        (n = b.next) != null && n.val != null &&
                         (sk = n.key) != null && cpr(cmp, sk, ek) > 0 &&
                         (f == null || cpr(cmp, sk, f) < 0)) {
                         current = n;
@@ -3352,10 +3149,10 @@
             Node<K,V> e = current;
             current = null;
             for (; e != null; e = e.next) {
-                K k; Object v;
+                K k;
                 if ((k = e.key) != null && f != null && cpr(cmp, f, k) <= 0)
                     break;
-                if ((v = e.value) != null && v != e)
+                if (e.val != null)
                     action.accept(k);
             }
         }
@@ -3366,12 +3163,12 @@
             K f = fence;
             Node<K,V> e = current;
             for (; e != null; e = e.next) {
-                K k; Object v;
+                K k;
                 if ((k = e.key) != null && f != null && cpr(cmp, f, k) <= 0) {
                     e = null;
                     break;
                 }
-                if ((v = e.value) != null && v != e) {
+                if (e.val != null) {
                     current = e.next;
                     action.accept(k);
                     return true;
@@ -3393,21 +3190,23 @@
     }
     // factory method for KeySpliterator
     final KeySpliterator<K,V> keySpliterator() {
-        Comparator<? super K> cmp = comparator;
-        for (;;) { // ensure h corresponds to origin p
-            HeadIndex<K,V> h; Node<K,V> p;
-            Node<K,V> b = (h = head).node;
-            if ((p = b.next) == null || p.value != null)
-                return new KeySpliterator<K,V>(cmp, h, p, null, (p == null) ?
-                                               0 : Integer.MAX_VALUE);
-            p.helpDelete(b, p.next);
+        Index<K,V> h; Node<K,V> n; long est;
+        VarHandle.acquireFence();
+        if ((h = head) == null) {
+            n = null;
+            est = 0L;
         }
+        else {
+            n = h.node;
+            est = getAdderCount();
+        }
+        return new KeySpliterator<K,V>(comparator, h, n, null, est);
     }
 
     static final class ValueSpliterator<K,V> extends CSLMSpliterator<K,V>
         implements Spliterator<V> {
         ValueSpliterator(Comparator<? super K> comparator, Index<K,V> row,
-                       Node<K,V> origin, K fence, int est) {
+                       Node<K,V> origin, K fence, long est) {
             super(comparator, row, origin, fence, est);
         }
 
@@ -3419,7 +3218,7 @@
                 for (Index<K,V> q = row; q != null; q = row = q.down) {
                     Index<K,V> s; Node<K,V> b, n; K sk;
                     if ((s = q.right) != null && (b = s.node) != null &&
-                        (n = b.next) != null && n.value != null &&
+                        (n = b.next) != null && n.val != null &&
                         (sk = n.key) != null && cpr(cmp, sk, ek) > 0 &&
                         (f == null || cpr(cmp, sk, f) < 0)) {
                         current = n;
@@ -3440,13 +3239,11 @@
             Node<K,V> e = current;
             current = null;
             for (; e != null; e = e.next) {
-                K k; Object v;
+                K k; V v;
                 if ((k = e.key) != null && f != null && cpr(cmp, f, k) <= 0)
                     break;
-                if ((v = e.value) != null && v != e) {
-                    @SuppressWarnings("unchecked") V vv = (V)v;
-                    action.accept(vv);
-                }
+                if ((v = e.val) != null)
+                    action.accept(v);
             }
         }
 
@@ -3456,15 +3253,14 @@
             K f = fence;
             Node<K,V> e = current;
             for (; e != null; e = e.next) {
-                K k; Object v;
+                K k; V v;
                 if ((k = e.key) != null && f != null && cpr(cmp, f, k) <= 0) {
                     e = null;
                     break;
                 }
-                if ((v = e.value) != null && v != e) {
+                if ((v = e.val) != null) {
                     current = e.next;
-                    @SuppressWarnings("unchecked") V vv = (V)v;
-                    action.accept(vv);
+                    action.accept(v);
                     return true;
                 }
             }
@@ -3480,21 +3276,23 @@
 
     // Almost the same as keySpliterator()
     final ValueSpliterator<K,V> valueSpliterator() {
-        Comparator<? super K> cmp = comparator;
-        for (;;) {
-            HeadIndex<K,V> h; Node<K,V> p;
-            Node<K,V> b = (h = head).node;
-            if ((p = b.next) == null || p.value != null)
-                return new ValueSpliterator<K,V>(cmp, h, p, null, (p == null) ?
-                                                 0 : Integer.MAX_VALUE);
-            p.helpDelete(b, p.next);
+        Index<K,V> h; Node<K,V> n; long est;
+        VarHandle.acquireFence();
+        if ((h = head) == null) {
+            n = null;
+            est = 0L;
         }
+        else {
+            n = h.node;
+            est = getAdderCount();
+        }
+        return new ValueSpliterator<K,V>(comparator, h, n, null, est);
     }
 
     static final class EntrySpliterator<K,V> extends CSLMSpliterator<K,V>
         implements Spliterator<Map.Entry<K,V>> {
         EntrySpliterator(Comparator<? super K> comparator, Index<K,V> row,
-                         Node<K,V> origin, K fence, int est) {
+                         Node<K,V> origin, K fence, long est) {
             super(comparator, row, origin, fence, est);
         }
 
@@ -3506,7 +3304,7 @@
                 for (Index<K,V> q = row; q != null; q = row = q.down) {
                     Index<K,V> s; Node<K,V> b, n; K sk;
                     if ((s = q.right) != null && (b = s.node) != null &&
-                        (n = b.next) != null && n.value != null &&
+                        (n = b.next) != null && n.val != null &&
                         (sk = n.key) != null && cpr(cmp, sk, ek) > 0 &&
                         (f == null || cpr(cmp, sk, f) < 0)) {
                         current = n;
@@ -3527,13 +3325,12 @@
             Node<K,V> e = current;
             current = null;
             for (; e != null; e = e.next) {
-                K k; Object v;
+                K k; V v;
                 if ((k = e.key) != null && f != null && cpr(cmp, f, k) <= 0)
                     break;
-                if ((v = e.value) != null && v != e) {
-                    @SuppressWarnings("unchecked") V vv = (V)v;
+                if ((v = e.val) != null) {
                     action.accept
-                        (new AbstractMap.SimpleImmutableEntry<K,V>(k, vv));
+                        (new AbstractMap.SimpleImmutableEntry<K,V>(k, v));
                 }
             }
         }
@@ -3544,16 +3341,15 @@
             K f = fence;
             Node<K,V> e = current;
             for (; e != null; e = e.next) {
-                K k; Object v;
+                K k; V v;
                 if ((k = e.key) != null && f != null && cpr(cmp, f, k) <= 0) {
                     e = null;
                     break;
                 }
-                if ((v = e.value) != null && v != e) {
+                if ((v = e.val) != null) {
                     current = e.next;
-                    @SuppressWarnings("unchecked") V vv = (V)v;
                     action.accept
-                        (new AbstractMap.SimpleImmutableEntry<K,V>(k, vv));
+                        (new AbstractMap.SimpleImmutableEntry<K,V>(k, v));
                     return true;
                 }
             }
@@ -3584,26 +3380,37 @@
 
     // Almost the same as keySpliterator()
     final EntrySpliterator<K,V> entrySpliterator() {
-        Comparator<? super K> cmp = comparator;
-        for (;;) { // almost same as key version
-            HeadIndex<K,V> h; Node<K,V> p;
-            Node<K,V> b = (h = head).node;
-            if ((p = b.next) == null || p.value != null)
-                return new EntrySpliterator<K,V>(cmp, h, p, null, (p == null) ?
-                                                 0 : Integer.MAX_VALUE);
-            p.helpDelete(b, p.next);
+        Index<K,V> h; Node<K,V> n; long est;
+        VarHandle.acquireFence();
+        if ((h = head) == null) {
+            n = null;
+            est = 0L;
         }
+        else {
+            n = h.node;
+            est = getAdderCount();
+        }
+        return new EntrySpliterator<K,V>(comparator, h, n, null, est);
     }
 
-    // Unsafe mechanics
-    private static final sun.misc.Unsafe U = sun.misc.Unsafe.getUnsafe();
-    private static final long HEAD;
+    // VarHandle mechanics
+    private static final VarHandle HEAD;
+    private static final VarHandle ADDER;
+    private static final VarHandle NEXT;
+    private static final VarHandle VAL;
+    private static final VarHandle RIGHT;
     static {
         try {
-            HEAD = U.objectFieldOffset
-                (ConcurrentSkipListMap.class.getDeclaredField("head"));
+            MethodHandles.Lookup l = MethodHandles.lookup();
+            HEAD = l.findVarHandle(ConcurrentSkipListMap.class, "head",
+                                   Index.class);
+            ADDER = l.findVarHandle(ConcurrentSkipListMap.class, "adder",
+                                    LongAdder.class);
+            NEXT = l.findVarHandle(Node.class, "next", Node.class);
+            VAL = l.findVarHandle(Node.class, "val", Object.class);
+            RIGHT = l.findVarHandle(Index.class, "right", Index.class);
         } catch (ReflectiveOperationException e) {
-            throw new Error(e);
+            throw new ExceptionInInitializerError(e);
         }
     }
 }
diff --git a/ojluni/src/main/java/java/util/concurrent/ConcurrentSkipListSet.java b/ojluni/src/main/java/java/util/concurrent/ConcurrentSkipListSet.java
index 2e11b17..140bde4 100644
--- a/ojluni/src/main/java/java/util/concurrent/ConcurrentSkipListSet.java
+++ b/ojluni/src/main/java/java/util/concurrent/ConcurrentSkipListSet.java
@@ -35,23 +35,18 @@
 
 package java.util.concurrent;
 
+import java.lang.reflect.Field;
 import java.util.AbstractSet;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.Iterator;
 import java.util.Map;
-import java.util.NavigableMap;
 import java.util.NavigableSet;
 import java.util.Set;
 import java.util.SortedSet;
 import java.util.Spliterator;
 
-// BEGIN android-note
-// removed link to collections framework docs
-// fixed framework docs link to "Collection#optional"
-// END android-note
-
 /**
  * A scalable concurrent {@link NavigableSet} implementation based on
  * a {@link ConcurrentSkipListMap}.  The elements of the set are kept
@@ -75,12 +70,12 @@
  * asynchronous nature of these sets, determining the current number
  * of elements requires a traversal of the elements, and so may report
  * inaccurate results if this collection is modified during traversal.
- * Additionally, the bulk operations {@code addAll},
- * {@code removeAll}, {@code retainAll}, {@code containsAll},
- * {@code equals}, and {@code toArray} are <em>not</em> guaranteed
- * to be performed atomically. For example, an iterator operating
- * concurrently with an {@code addAll} operation might view only some
- * of the added elements.
+ *
+ * <p>Bulk operations that add, remove, or examine multiple elements,
+ * such as {@link #addAll}, {@link #removeIf} or {@link #forEach},
+ * are <em>not</em> guaranteed to be performed atomically.
+ * For example, a {@code forEach} traversal concurrent with an {@code
+ * addAll} operation might observe only some of the added elements.
  *
  * <p>This class and its iterators implement all of the
  * <em>optional</em> methods of the {@link Set} and {@link Iterator}
@@ -89,6 +84,10 @@
  * because {@code null} arguments and return values cannot be reliably
  * distinguished from the absence of elements.
  *
+ * <p>This class is a member of the
+ * <a href="{@docRoot}/java.base/java/util/package-summary.html#CollectionsFramework">
+ * Java Collections Framework</a>.
+ *
  * @author Doug Lea
  * @param <E> the type of elements maintained by this set
  * @since 1.6
@@ -310,9 +309,7 @@
         Collection<?> c = (Collection<?>) o;
         try {
             return containsAll(c) && c.containsAll(this);
-        } catch (ClassCastException unused) {
-            return false;
-        } catch (NullPointerException unused) {
+        } catch (ClassCastException | NullPointerException unused) {
             return false;
         }
     }
@@ -327,7 +324,7 @@
      * @return {@code true} if this set changed as a result of the call
      * @throws ClassCastException if the class of an element of this set
      *         is incompatible with the specified collection
-     * (<a href="../Collection.html#optional-restrictions">optional</a>)
+     * (<a href="{@docRoot}/java.base/java/util/Collection.html#optional-restrictions">optional</a>)
      * @throws NullPointerException if the specified collection or any
      *         of its elements are null
      */
@@ -491,9 +488,9 @@
      * encounter order that is ascending order.  Overriding implementations
      * should document the reporting of additional characteristic values.
      *
-     * <p>The spliterator's comparator (see
-     * {@link java.util.Spliterator#getComparator()}) is {@code null} if
-     * the set's comparator (see {@link #comparator()}) is {@code null}.
+     * <p>The {@linkplain Spliterator#getComparator() spliterator's comparator}
+     * is {@code null} if the {@linkplain #comparator() set's comparator}
+     * is {@code null}.
      * Otherwise, the spliterator's comparator is the same as or imposes the
      * same total ordering as the set's comparator.
      *
@@ -506,18 +503,21 @@
             : ((ConcurrentSkipListMap.SubMap<E,?>)m).new SubMapKeyIterator();
     }
 
-    // Support for resetting map in clone
+    /** Initializes map field; for use in clone. */
     private void setMap(ConcurrentNavigableMap<E,Object> map) {
-        U.putObjectVolatile(this, MAP, map);
-    }
-
-    private static final sun.misc.Unsafe U = sun.misc.Unsafe.getUnsafe();
-    private static final long MAP;
-    static {
+        Field mapField = java.security.AccessController.doPrivileged(
+            (java.security.PrivilegedAction<Field>) () -> {
+                try {
+                    Field f = ConcurrentSkipListSet.class
+                        .getDeclaredField("m");
+                    f.setAccessible(true);
+                    return f;
+                } catch (ReflectiveOperationException e) {
+                    throw new Error(e);
+                }});
         try {
-            MAP = U.objectFieldOffset
-                (ConcurrentSkipListSet.class.getDeclaredField("m"));
-        } catch (ReflectiveOperationException e) {
+            mapField.set(this, map);
+        } catch (IllegalAccessException e) {
             throw new Error(e);
         }
     }
diff --git a/ojluni/src/main/java/java/util/concurrent/CopyOnWriteArrayList.java b/ojluni/src/main/java/java/util/concurrent/CopyOnWriteArrayList.java
index ebcbbef..912204f 100644
--- a/ojluni/src/main/java/java/util/concurrent/CopyOnWriteArrayList.java
+++ b/ojluni/src/main/java/java/util/concurrent/CopyOnWriteArrayList.java
@@ -34,7 +34,8 @@
 
 package java.util.concurrent;
 
-import java.util.AbstractList;
+import java.lang.invoke.VarHandle;
+import java.lang.reflect.Field;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Comparator;
@@ -50,6 +51,7 @@
 import java.util.function.Consumer;
 import java.util.function.Predicate;
 import java.util.function.UnaryOperator;
+import jdk.internal.misc.SharedSecrets;
 
 // Android-changed: Removed javadoc link to collections framework docs
 /**
@@ -81,6 +83,10 @@
  * actions subsequent to the access or removal of that element from
  * the {@code CopyOnWriteArrayList} in another thread.
  *
+ * <p>This class is a member of the
+ * <a href="{@docRoot}/java.base/java/util/package-summary.html#CollectionsFramework">
+ * Java Collections Framework</a>.
+ *
  * @since 1.5
  * @author Doug Lea
  * @param <E> the type of elements held in this list
@@ -129,17 +135,17 @@
      * @throws NullPointerException if the specified collection is null
      */
     public CopyOnWriteArrayList(Collection<? extends E> c) {
-        Object[] elements;
+        Object[] es;
         if (c.getClass() == CopyOnWriteArrayList.class)
-            elements = ((CopyOnWriteArrayList<?>)c).getArray();
+            es = ((CopyOnWriteArrayList<?>)c).getArray();
         else {
-            elements = c.toArray();
+            es = c.toArray();
             // defend against c.toArray (incorrectly) not returning Object[]
             // (see e.g. https://bugs.openjdk.java.net/browse/JDK-6260652)
-            if (elements.getClass() != Object[].class)
-                elements = Arrays.copyOf(elements, elements.length, Object[].class);
+            if (es.getClass() != Object[].class)
+                es = Arrays.copyOf(es, es.length, Object[].class);
         }
-        setArray(elements);
+        setArray(es);
     }
 
     /**
@@ -175,20 +181,19 @@
      * static version of indexOf, to allow repeated calls without
      * needing to re-acquire array each time.
      * @param o element to search for
-     * @param elements the array
-     * @param index first index to search
-     * @param fence one past last index to search
+     * @param es the array
+     * @param from first index to search
+     * @param to one past last index to search
      * @return index of element, or -1 if absent
      */
-    private static int indexOf(Object o, Object[] elements,
-                               int index, int fence) {
+    private static int indexOfRange(Object o, Object[] es, int from, int to) {
         if (o == null) {
-            for (int i = index; i < fence; i++)
-                if (elements[i] == null)
+            for (int i = from; i < to; i++)
+                if (es[i] == null)
                     return i;
         } else {
-            for (int i = index; i < fence; i++)
-                if (o.equals(elements[i]))
+            for (int i = from; i < to; i++)
+                if (o.equals(es[i]))
                     return i;
         }
         return -1;
@@ -197,18 +202,19 @@
     /**
      * static version of lastIndexOf.
      * @param o element to search for
-     * @param elements the array
-     * @param index first index to search
+     * @param es the array
+     * @param from index of first element of range, last element to search
+     * @param to one past last element of range, first element to search
      * @return index of element, or -1 if absent
      */
-    private static int lastIndexOf(Object o, Object[] elements, int index) {
+    private static int lastIndexOfRange(Object o, Object[] es, int from, int to) {
         if (o == null) {
-            for (int i = index; i >= 0; i--)
-                if (elements[i] == null)
+            for (int i = to - 1; i >= from; i--)
+                if (es[i] == null)
                     return i;
         } else {
-            for (int i = index; i >= 0; i--)
-                if (o.equals(elements[i]))
+            for (int i = to - 1; i >= from; i--)
+                if (o.equals(es[i]))
                     return i;
         }
         return -1;
@@ -223,16 +229,15 @@
      * @return {@code true} if this list contains the specified element
      */
     public boolean contains(Object o) {
-        Object[] elements = getArray();
-        return indexOf(o, elements, 0, elements.length) >= 0;
+        return indexOf(o) >= 0;
     }
 
     /**
      * {@inheritDoc}
      */
     public int indexOf(Object o) {
-        Object[] elements = getArray();
-        return indexOf(o, elements, 0, elements.length);
+        Object[] es = getArray();
+        return indexOfRange(o, es, 0, es.length);
     }
 
     /**
@@ -251,16 +256,16 @@
      * @throws IndexOutOfBoundsException if the specified index is negative
      */
     public int indexOf(E e, int index) {
-        Object[] elements = getArray();
-        return indexOf(e, elements, index, elements.length);
+        Object[] es = getArray();
+        return indexOfRange(e, es, index, es.length);
     }
 
     /**
      * {@inheritDoc}
      */
     public int lastIndexOf(Object o) {
-        Object[] elements = getArray();
-        return lastIndexOf(o, elements, elements.length - 1);
+        Object[] es = getArray();
+        return lastIndexOfRange(o, es, 0, es.length);
     }
 
     /**
@@ -280,8 +285,8 @@
      *         than or equal to the current size of this list
      */
     public int lastIndexOf(E e, int index) {
-        Object[] elements = getArray();
-        return lastIndexOf(e, elements, index);
+        Object[] es = getArray();
+        return lastIndexOfRange(e, es, 0, index + 1);
     }
 
     /**
@@ -296,6 +301,9 @@
             CopyOnWriteArrayList<E> clone =
                 (CopyOnWriteArrayList<E>) super.clone();
             clone.resetLock();
+            // Unlike in readObject, here we cannot visibility-piggyback on the
+            // volatile write in setArray().
+            VarHandle.releaseFence();
             return clone;
         } catch (CloneNotSupportedException e) {
             // this shouldn't happen, since we are Cloneable
@@ -317,8 +325,7 @@
      * @return an array containing all the elements in this list
      */
     public Object[] toArray() {
-        Object[] elements = getArray();
-        return Arrays.copyOf(elements, elements.length);
+        return getArray().clone();
     }
 
     /**
@@ -361,12 +368,12 @@
      */
     @SuppressWarnings("unchecked")
     public <T> T[] toArray(T[] a) {
-        Object[] elements = getArray();
-        int len = elements.length;
+        Object[] es = getArray();
+        int len = es.length;
         if (a.length < len)
-            return (T[]) Arrays.copyOf(elements, len, a.getClass());
+            return (T[]) Arrays.copyOf(es, len, a.getClass());
         else {
-            System.arraycopy(elements, 0, a, 0, len);
+            System.arraycopy(es, 0, a, 0, len);
             if (a.length > len)
                 a[len] = null;
             return a;
@@ -376,7 +383,7 @@
     // Positional Access Operations
 
     @SuppressWarnings("unchecked")
-    private E get(Object[] a, int index) {
+    static <E> E elementAt(Object[] a, int index) {
         return (E) a[index];
     }
 
@@ -390,7 +397,7 @@
      * @throws IndexOutOfBoundsException {@inheritDoc}
      */
     public E get(int index) {
-        return get(getArray(), index);
+        return elementAt(getArray(), index);
     }
 
     /**
@@ -401,17 +408,13 @@
      */
     public E set(int index, E element) {
         synchronized (lock) {
-            Object[] elements = getArray();
-            E oldValue = get(elements, index);
+            Object[] es = getArray();
+            E oldValue = elementAt(es, index);
 
             if (oldValue != element) {
-                int len = elements.length;
-                Object[] newElements = Arrays.copyOf(elements, len);
-                newElements[index] = element;
-                setArray(newElements);
-            } else {
-                // Not quite a no-op; ensures volatile write semantics
-                setArray(elements);
+                es = es.clone();
+                es[index] = element;
+                setArray(es);
             }
             return oldValue;
         }
@@ -425,11 +428,11 @@
      */
     public boolean add(E e) {
         synchronized (lock) {
-            Object[] elements = getArray();
-            int len = elements.length;
-            Object[] newElements = Arrays.copyOf(elements, len + 1);
-            newElements[len] = e;
-            setArray(newElements);
+            Object[] es = getArray();
+            int len = es.length;
+            es = Arrays.copyOf(es, len + 1);
+            es[len] = e;
+            setArray(es);
             return true;
         }
     }
@@ -443,18 +446,18 @@
      */
     public void add(int index, E element) {
         synchronized (lock) {
-            Object[] elements = getArray();
-            int len = elements.length;
+            Object[] es = getArray();
+            int len = es.length;
             if (index > len || index < 0)
                 throw new IndexOutOfBoundsException(outOfBounds(index, len));
             Object[] newElements;
             int numMoved = len - index;
             if (numMoved == 0)
-                newElements = Arrays.copyOf(elements, len + 1);
+                newElements = Arrays.copyOf(es, len + 1);
             else {
                 newElements = new Object[len + 1];
-                System.arraycopy(elements, 0, newElements, 0, index);
-                System.arraycopy(elements, index, newElements, index + 1,
+                System.arraycopy(es, 0, newElements, 0, index);
+                System.arraycopy(es, index, newElements, index + 1,
                                  numMoved);
             }
             newElements[index] = element;
@@ -471,19 +474,20 @@
      */
     public E remove(int index) {
         synchronized (lock) {
-            Object[] elements = getArray();
-            int len = elements.length;
-            E oldValue = get(elements, index);
+            Object[] es = getArray();
+            int len = es.length;
+            E oldValue = elementAt(es, index);
             int numMoved = len - index - 1;
+            Object[] newElements;
             if (numMoved == 0)
-                setArray(Arrays.copyOf(elements, len - 1));
+                newElements = Arrays.copyOf(es, len - 1);
             else {
-                Object[] newElements = new Object[len - 1];
-                System.arraycopy(elements, 0, newElements, 0, index);
-                System.arraycopy(elements, index + 1, newElements, index,
+                newElements = new Object[len - 1];
+                System.arraycopy(es, 0, newElements, 0, index);
+                System.arraycopy(es, index + 1, newElements, index,
                                  numMoved);
-                setArray(newElements);
             }
+            setArray(newElements);
             return oldValue;
         }
     }
@@ -502,8 +506,8 @@
      */
     public boolean remove(Object o) {
         Object[] snapshot = getArray();
-        int index = indexOf(o, snapshot, 0, snapshot.length);
-        return (index < 0) ? false : remove(o, snapshot, index);
+        int index = indexOfRange(o, snapshot, 0, snapshot.length);
+        return index >= 0 && remove(o, snapshot, index);
     }
 
     /**
@@ -527,7 +531,7 @@
                     return false;
                 if (current[index] == o)
                     break findIndex;
-                index = indexOf(o, current, index, len);
+                index = indexOfRange(o, current, index, len);
                 if (index < 0)
                     return false;
             }
@@ -555,19 +559,19 @@
      */
     void removeRange(int fromIndex, int toIndex) {
         synchronized (lock) {
-            Object[] elements = getArray();
-            int len = elements.length;
+            Object[] es = getArray();
+            int len = es.length;
 
             if (fromIndex < 0 || toIndex > len || toIndex < fromIndex)
                 throw new IndexOutOfBoundsException();
             int newlen = len - (toIndex - fromIndex);
             int numMoved = len - toIndex;
             if (numMoved == 0)
-                setArray(Arrays.copyOf(elements, newlen));
+                setArray(Arrays.copyOf(es, newlen));
             else {
                 Object[] newElements = new Object[newlen];
-                System.arraycopy(elements, 0, newElements, 0, fromIndex);
-                System.arraycopy(elements, toIndex, newElements,
+                System.arraycopy(es, 0, newElements, 0, fromIndex);
+                System.arraycopy(es, toIndex, newElements,
                                  fromIndex, numMoved);
                 setArray(newElements);
             }
@@ -582,8 +586,8 @@
      */
     public boolean addIfAbsent(E e) {
         Object[] snapshot = getArray();
-        return indexOf(e, snapshot, 0, snapshot.length) >= 0 ? false :
-            addIfAbsent(e, snapshot);
+        return indexOfRange(e, snapshot, 0, snapshot.length) < 0
+            && addIfAbsent(e, snapshot);
     }
 
     /**
@@ -601,7 +605,7 @@
                     if (current[i] != snapshot[i]
                         && Objects.equals(e, current[i]))
                         return false;
-                if (indexOf(e, current, common, len) >= 0)
+                if (indexOfRange(e, current, common, len) >= 0)
                         return false;
             }
             Object[] newElements = Arrays.copyOf(current, len + 1);
@@ -622,10 +626,10 @@
      * @see #contains(Object)
      */
     public boolean containsAll(Collection<?> c) {
-        Object[] elements = getArray();
-        int len = elements.length;
+        Object[] es = getArray();
+        int len = es.length;
         for (Object e : c) {
-            if (indexOf(e, elements, 0, len) < 0)
+            if (indexOfRange(e, es, 0, len) < 0)
                 return false;
         }
         return true;
@@ -640,34 +644,16 @@
      * @return {@code true} if this list changed as a result of the call
      * @throws ClassCastException if the class of an element of this list
      *         is incompatible with the specified collection
-     * (<a href="../Collection.html#optional-restrictions">optional</a>)
+     * (<a href="{@docRoot}/java.base/java/util/Collection.html#optional-restrictions">optional</a>)
      * @throws NullPointerException if this list contains a null element and the
      *         specified collection does not permit null elements
-     * (<a href="../Collection.html#optional-restrictions">optional</a>),
+     * (<a href="{@docRoot}/java.base/java/util/Collection.html#optional-restrictions">optional</a>),
      *         or if the specified collection is null
      * @see #remove(Object)
      */
     public boolean removeAll(Collection<?> c) {
-        if (c == null) throw new NullPointerException();
-        synchronized (lock) {
-            Object[] elements = getArray();
-            int len = elements.length;
-            if (len != 0) {
-                // temp array holds those elements we know we want to keep
-                int newlen = 0;
-                Object[] temp = new Object[len];
-                for (int i = 0; i < len; ++i) {
-                    Object element = elements[i];
-                    if (!c.contains(element))
-                        temp[newlen++] = element;
-                }
-                if (newlen != len) {
-                    setArray(Arrays.copyOf(temp, newlen));
-                    return true;
-                }
-            }
-            return false;
-        }
+        Objects.requireNonNull(c);
+        return bulkRemove(e -> c.contains(e));
     }
 
     /**
@@ -679,34 +665,16 @@
      * @return {@code true} if this list changed as a result of the call
      * @throws ClassCastException if the class of an element of this list
      *         is incompatible with the specified collection
-     * (<a href="{@docRoot}/../api/java/util/Collection.html#optional-restrictions">optional</a>)
+     * (<a href="{@docRoot}/java.base/java/util/Collection.html#optional-restrictions">optional</a>)
      * @throws NullPointerException if this list contains a null element and the
      *         specified collection does not permit null elements
-     * (<a href="{@docRoot}/../api/java/util/Collection.html#optional-restrictions">optional</a>),
+     * (<a href="{@docRoot}/java.base/java/util/Collection.html#optional-restrictions">optional</a>),
      *         or if the specified collection is null
      * @see #remove(Object)
      */
     public boolean retainAll(Collection<?> c) {
-        if (c == null) throw new NullPointerException();
-        synchronized (lock) {
-            Object[] elements = getArray();
-            int len = elements.length;
-            if (len != 0) {
-                // temp array holds those elements we know we want to keep
-                int newlen = 0;
-                Object[] temp = new Object[len];
-                for (int i = 0; i < len; ++i) {
-                    Object element = elements[i];
-                    if (c.contains(element))
-                        temp[newlen++] = element;
-                }
-                if (newlen != len) {
-                    setArray(Arrays.copyOf(temp, newlen));
-                    return true;
-                }
-            }
-            return false;
-        }
+        Objects.requireNonNull(c);
+        return bulkRemove(e -> !c.contains(e));
     }
 
     /**
@@ -725,18 +693,18 @@
         if (cs.length == 0)
             return 0;
         synchronized (lock) {
-            Object[] elements = getArray();
-            int len = elements.length;
+            Object[] es = getArray();
+            int len = es.length;
             int added = 0;
             // uniquify and compact elements in cs
             for (int i = 0; i < cs.length; ++i) {
                 Object e = cs[i];
-                if (indexOf(e, elements, 0, len) < 0 &&
-                    indexOf(e, cs, 0, added) < 0)
+                if (indexOfRange(e, es, 0, len) < 0 &&
+                    indexOfRange(e, cs, 0, added) < 0)
                     cs[added++] = e;
             }
             if (added > 0) {
-                Object[] newElements = Arrays.copyOf(elements, len + added);
+                Object[] newElements = Arrays.copyOf(es, len + added);
                 System.arraycopy(cs, 0, newElements, len, added);
                 setArray(newElements);
             }
@@ -770,15 +738,16 @@
         if (cs.length == 0)
             return false;
         synchronized (lock) {
-            Object[] elements = getArray();
-            int len = elements.length;
+            Object[] es = getArray();
+            int len = es.length;
+            Object[] newElements;
             if (len == 0 && cs.getClass() == Object[].class)
-                setArray(cs);
+                newElements = cs;
             else {
-                Object[] newElements = Arrays.copyOf(elements, len + cs.length);
+                newElements = Arrays.copyOf(es, len + cs.length);
                 System.arraycopy(cs, 0, newElements, len, cs.length);
-                setArray(newElements);
             }
+            setArray(newElements);
             return true;
         }
     }
@@ -802,8 +771,8 @@
     public boolean addAll(int index, Collection<? extends E> c) {
         Object[] cs = c.toArray();
         synchronized (lock) {
-            Object[] elements = getArray();
-            int len = elements.length;
+            Object[] es = getArray();
+            int len = es.length;
             if (index > len || index < 0)
                 throw new IndexOutOfBoundsException(outOfBounds(index, len));
             if (cs.length == 0)
@@ -811,11 +780,11 @@
             int numMoved = len - index;
             Object[] newElements;
             if (numMoved == 0)
-                newElements = Arrays.copyOf(elements, len + cs.length);
+                newElements = Arrays.copyOf(es, len + cs.length);
             else {
                 newElements = new Object[len + cs.length];
-                System.arraycopy(elements, 0, newElements, 0, index);
-                System.arraycopy(elements, index,
+                System.arraycopy(es, 0, newElements, 0, index);
+                System.arraycopy(es, index,
                                  newElements, index + cs.length,
                                  numMoved);
             }
@@ -825,65 +794,106 @@
         }
     }
 
+    /**
+     * @throws NullPointerException {@inheritDoc}
+     */
     public void forEach(Consumer<? super E> action) {
-        if (action == null) throw new NullPointerException();
+        Objects.requireNonNull(action);
         for (Object x : getArray()) {
             @SuppressWarnings("unchecked") E e = (E) x;
             action.accept(e);
         }
     }
 
+    /**
+     * @throws NullPointerException {@inheritDoc}
+     */
     public boolean removeIf(Predicate<? super E> filter) {
-        if (filter == null) throw new NullPointerException();
+        Objects.requireNonNull(filter);
+        return bulkRemove(filter);
+    }
+
+    // A tiny bit set implementation
+
+    private static long[] nBits(int n) {
+        return new long[((n - 1) >> 6) + 1];
+    }
+    private static void setBit(long[] bits, int i) {
+        bits[i >> 6] |= 1L << i;
+    }
+    private static boolean isClear(long[] bits, int i) {
+        return (bits[i >> 6] & (1L << i)) == 0;
+    }
+
+    private boolean bulkRemove(Predicate<? super E> filter) {
         synchronized (lock) {
-            final Object[] elements = getArray();
-            final int len = elements.length;
-            int i;
-            for (i = 0; i < len; i++) {
-                @SuppressWarnings("unchecked") E e = (E) elements[i];
-                if (filter.test(e)) {
-                    int newlen = i;
-                    final Object[] newElements = new Object[len - 1];
-                    System.arraycopy(elements, 0, newElements, 0, newlen);
-                    for (i++; i < len; i++) {
-                        @SuppressWarnings("unchecked") E x = (E) elements[i];
-                        if (!filter.test(x))
-                            newElements[newlen++] = x;
-                    }
-                    setArray((newlen == len - 1)
-                             ? newElements // one match => one copy
-                             : Arrays.copyOf(newElements, newlen));
-                    return true;
+            return bulkRemove(filter, 0, getArray().length);
+        }
+    }
+
+    boolean bulkRemove(Predicate<? super E> filter, int i, int end) {
+        // assert Thread.holdsLock(lock);
+        final Object[] es = getArray();
+        // Optimize for initial run of survivors
+        for (; i < end && !filter.test(elementAt(es, i)); i++)
+            ;
+        if (i < end) {
+            final int beg = i;
+            final long[] deathRow = nBits(end - beg);
+            int deleted = 1;
+            deathRow[0] = 1L;   // set bit 0
+            for (i = beg + 1; i < end; i++)
+                if (filter.test(elementAt(es, i))) {
+                    setBit(deathRow, i - beg);
+                    deleted++;
                 }
-            }
-            return false;       // zero matches => zero copies
+            // Did filter reentrantly modify the list?
+            if (es != getArray())
+                throw new ConcurrentModificationException();
+            final Object[] newElts = Arrays.copyOf(es, es.length - deleted);
+            int w = beg;
+            for (i = beg; i < end; i++)
+                if (isClear(deathRow, i - beg))
+                    newElts[w++] = es[i];
+            System.arraycopy(es, i, newElts, w, es.length - i);
+            setArray(newElts);
+            return true;
+        } else {
+            if (es != getArray())
+                throw new ConcurrentModificationException();
+            return false;
         }
     }
 
     public void replaceAll(UnaryOperator<E> operator) {
-        if (operator == null) throw new NullPointerException();
         synchronized (lock) {
-            Object[] elements = getArray();
-            int len = elements.length;
-            Object[] newElements = Arrays.copyOf(elements, len);
-            for (int i = 0; i < len; ++i) {
-                @SuppressWarnings("unchecked") E e = (E) elements[i];
-                newElements[i] = operator.apply(e);
-            }
-            setArray(newElements);
+            replaceAllRange(operator, 0, getArray().length);
         }
     }
 
+    void replaceAllRange(UnaryOperator<E> operator, int i, int end) {
+        // assert Thread.holdsLock(lock);
+        Objects.requireNonNull(operator);
+        final Object[] es = getArray().clone();
+        for (; i < end; i++)
+            es[i] = operator.apply(elementAt(es, i));
+        setArray(es);
+    }
+
     public void sort(Comparator<? super E> c) {
         synchronized (lock) {
-            Object[] elements = getArray();
-            Object[] newElements = Arrays.copyOf(elements, elements.length);
-            @SuppressWarnings("unchecked") E[] es = (E[])newElements;
-            Arrays.sort(es, c);
-            setArray(newElements);
+            sortRange(c, 0, getArray().length);
         }
     }
 
+    @SuppressWarnings("unchecked")
+    void sortRange(Comparator<? super E> c, int i, int end) {
+        // assert Thread.holdsLock(lock);
+        final Object[] es = getArray().clone();
+        Arrays.sort(es, i, end, (Comparator<Object>)c);
+        setArray(es);
+    }
+
     /**
      * Saves this list to a stream (that is, serializes it).
      *
@@ -898,12 +908,12 @@
 
         s.defaultWriteObject();
 
-        Object[] elements = getArray();
+        Object[] es = getArray();
         // Write out array length
-        s.writeInt(elements.length);
+        s.writeInt(es.length);
 
         // Write out all elements in the proper order.
-        for (Object element : elements)
+        for (Object element : es)
             s.writeObject(element);
     }
 
@@ -924,12 +934,13 @@
 
         // Read in array length and allocate array
         int len = s.readInt();
-        Object[] elements = new Object[len];
+        SharedSecrets.getJavaObjectInputStreamAccess().checkArray(s, Object[].class, len);
+        Object[] es = new Object[len];
 
         // Read in all elements in the proper order.
         for (int i = 0; i < len; i++)
-            elements[i] = s.readObject();
-        setArray(elements);
+            es[i] = s.readObject();
+        setArray(es);
     }
 
     /**
@@ -969,13 +980,19 @@
 
         List<?> list = (List<?>)o;
         Iterator<?> it = list.iterator();
-        Object[] elements = getArray();
-        for (int i = 0, len = elements.length; i < len; i++)
-            if (!it.hasNext() || !Objects.equals(elements[i], it.next()))
+        for (Object element : getArray())
+            if (!it.hasNext() || !Objects.equals(element, it.next()))
                 return false;
-        if (it.hasNext())
-            return false;
-        return true;
+        return !it.hasNext();
+    }
+
+    private static int hashCodeOfRange(Object[] es, int from, int to) {
+        int hashCode = 1;
+        for (int i = from; i < to; i++) {
+            Object x = es[i];
+            hashCode = 31 * hashCode + (x == null ? 0 : x.hashCode());
+        }
+        return hashCode;
     }
 
     /**
@@ -986,10 +1003,8 @@
      * @return the hash code value for this list
      */
     public int hashCode() {
-        int hashCode = 1;
-        for (Object x : getArray())
-            hashCode = 31 * hashCode + (x == null ? 0 : x.hashCode());
-        return hashCode;
+        Object[] es = getArray();
+        return hashCodeOfRange(es, 0, es.length);
     }
 
     /**
@@ -1029,12 +1044,12 @@
      * @throws IndexOutOfBoundsException {@inheritDoc}
      */
     public ListIterator<E> listIterator(int index) {
-        Object[] elements = getArray();
-        int len = elements.length;
+        Object[] es = getArray();
+        int len = es.length;
         if (index < 0 || index > len)
             throw new IndexOutOfBoundsException(outOfBounds(index, len));
 
-        return new COWIterator<E>(elements, index);
+        return new COWIterator<E>(es, index);
     }
 
     /**
@@ -1062,9 +1077,9 @@
         /** Index of element to be returned by subsequent call to next.  */
         private int cursor;
 
-        COWIterator(Object[] elements, int initialCursor) {
+        COWIterator(Object[] es, int initialCursor) {
             cursor = initialCursor;
-            snapshot = elements;
+            snapshot = es;
         }
 
         public boolean hasNext() {
@@ -1094,7 +1109,7 @@
         }
 
         public int previousIndex() {
-            return cursor-1;
+            return cursor - 1;
         }
 
         /**
@@ -1125,14 +1140,13 @@
         }
 
         @Override
-        @SuppressWarnings("unchecked")
         public void forEachRemaining(Consumer<? super E> action) {
             Objects.requireNonNull(action);
             final int size = snapshot.length;
-            for (int i = cursor; i < size; i++) {
-                action.accept((E) snapshot[i]);
-            }
+            int i = cursor;
             cursor = size;
+            for (; i < size; i++)
+                action.accept(elementAt(snapshot, i));
         }
     }
 
@@ -1153,324 +1167,358 @@
      */
     public List<E> subList(int fromIndex, int toIndex) {
         synchronized (lock) {
-            Object[] elements = getArray();
-            int len = elements.length;
-            if (fromIndex < 0 || toIndex > len || fromIndex > toIndex)
+            Object[] es = getArray();
+            int len = es.length;
+            int size = toIndex - fromIndex;
+            if (fromIndex < 0 || toIndex > len || size < 0)
                 throw new IndexOutOfBoundsException();
-            return new COWSubList<E>(this, fromIndex, toIndex);
+            return new COWSubList(es, fromIndex, size);
         }
     }
 
     /**
      * Sublist for CopyOnWriteArrayList.
-     * This class extends AbstractList merely for convenience, to
-     * avoid having to define addAll, etc. This doesn't hurt, but
-     * is wasteful.  This class does not need or use modCount
-     * mechanics in AbstractList, but does need to check for
-     * concurrent modification using similar mechanics.  On each
-     * operation, the array that we expect the backing list to use
-     * is checked and updated.  Since we do this for all of the
-     * base operations invoked by those defined in AbstractList,
-     * all is well.  While inefficient, this is not worth
-     * improving.  The kinds of list operations inherited from
-     * AbstractList are already so slow on COW sublists that
-     * adding a bit more space/time doesn't seem even noticeable.
      */
-    private static class COWSubList<E>
-        extends AbstractList<E>
-        implements RandomAccess
-    {
-        private final CopyOnWriteArrayList<E> l;
+    private class COWSubList implements List<E>, RandomAccess {
         private final int offset;
         private int size;
         private Object[] expectedArray;
 
-        // only call this holding l's lock
-        COWSubList(CopyOnWriteArrayList<E> list,
-                   int fromIndex, int toIndex) {
-            // assert Thread.holdsLock(list.lock);
-            l = list;
-            expectedArray = l.getArray();
-            offset = fromIndex;
-            size = toIndex - fromIndex;
+        COWSubList(Object[] es, int offset, int size) {
+            // assert Thread.holdsLock(lock);
+            expectedArray = es;
+            this.offset = offset;
+            this.size = size;
         }
 
-        // only call this holding l's lock
         private void checkForComodification() {
-            // assert Thread.holdsLock(l.lock);
-            if (l.getArray() != expectedArray)
+            // assert Thread.holdsLock(lock);
+            if (getArray() != expectedArray)
                 throw new ConcurrentModificationException();
         }
 
-        // only call this holding l's lock
+        private Object[] getArrayChecked() {
+            // assert Thread.holdsLock(lock);
+            Object[] a = getArray();
+            if (a != expectedArray)
+                throw new ConcurrentModificationException();
+            return a;
+        }
+
         private void rangeCheck(int index) {
-            // assert Thread.holdsLock(l.lock);
+            // assert Thread.holdsLock(lock);
             if (index < 0 || index >= size)
                 throw new IndexOutOfBoundsException(outOfBounds(index, size));
         }
 
+        private void rangeCheckForAdd(int index) {
+            // assert Thread.holdsLock(lock);
+            if (index < 0 || index > size)
+                throw new IndexOutOfBoundsException(outOfBounds(index, size));
+        }
+
+        public Object[] toArray() {
+            final Object[] es;
+            final int offset;
+            final int size;
+            synchronized (lock) {
+                es = getArrayChecked();
+                offset = this.offset;
+                size = this.size;
+            }
+            return Arrays.copyOfRange(es, offset, offset + size);
+        }
+
+        @SuppressWarnings("unchecked")
+        public <T> T[] toArray(T[] a) {
+            final Object[] es;
+            final int offset;
+            final int size;
+            synchronized (lock) {
+                es = getArrayChecked();
+                offset = this.offset;
+                size = this.size;
+            }
+            if (a.length < size)
+                return (T[]) Arrays.copyOfRange(
+                        es, offset, offset + size, a.getClass());
+            else {
+                System.arraycopy(es, offset, a, 0, size);
+                if (a.length > size)
+                    a[size] = null;
+                return a;
+            }
+        }
+
+        public int indexOf(Object o) {
+            final Object[] es;
+            final int offset;
+            final int size;
+            synchronized (lock) {
+                es = getArrayChecked();
+                offset = this.offset;
+                size = this.size;
+            }
+            int i = indexOfRange(o, es, offset, offset + size);
+            return (i == -1) ? -1 : i - offset;
+        }
+
+        public int lastIndexOf(Object o) {
+            final Object[] es;
+            final int offset;
+            final int size;
+            synchronized (lock) {
+                es = getArrayChecked();
+                offset = this.offset;
+                size = this.size;
+            }
+            int i = lastIndexOfRange(o, es, offset, offset + size);
+            return (i == -1) ? -1 : i - offset;
+        }
+
+        public boolean contains(Object o) {
+            return indexOf(o) >= 0;
+        }
+
+        public boolean containsAll(Collection<?> c) {
+            final Object[] es;
+            final int offset;
+            final int size;
+            synchronized (lock) {
+                es = getArrayChecked();
+                offset = this.offset;
+                size = this.size;
+            }
+            for (Object o : c)
+                if (indexOfRange(o, es, offset, offset + size) < 0)
+                    return false;
+            return true;
+        }
+
+        public boolean isEmpty() {
+            return size() == 0;
+        }
+
+        public String toString() {
+            return Arrays.toString(toArray());
+        }
+
+        public int hashCode() {
+            final Object[] es;
+            final int offset;
+            final int size;
+            synchronized (lock) {
+                es = getArrayChecked();
+                offset = this.offset;
+                size = this.size;
+            }
+            return hashCodeOfRange(es, offset, offset + size);
+        }
+
+        public boolean equals(Object o) {
+            if (o == this)
+                return true;
+            if (!(o instanceof List))
+                return false;
+            Iterator<?> it = ((List<?>)o).iterator();
+
+            final Object[] es;
+            final int offset;
+            final int size;
+            synchronized (lock) {
+                es = getArrayChecked();
+                offset = this.offset;
+                size = this.size;
+            }
+
+            for (int i = offset, end = offset + size; i < end; i++)
+                if (!it.hasNext() || !Objects.equals(es[i], it.next()))
+                    return false;
+            return !it.hasNext();
+        }
+
         public E set(int index, E element) {
-            synchronized (l.lock) {
+            synchronized (lock) {
                 rangeCheck(index);
                 checkForComodification();
-                E x = l.set(index+offset, element);
-                expectedArray = l.getArray();
+                E x = CopyOnWriteArrayList.this.set(offset + index, element);
+                expectedArray = getArray();
                 return x;
             }
         }
 
         public E get(int index) {
-            synchronized (l.lock) {
+            synchronized (lock) {
                 rangeCheck(index);
                 checkForComodification();
-                return l.get(index+offset);
+                return CopyOnWriteArrayList.this.get(offset + index);
             }
         }
 
         public int size() {
-            synchronized (l.lock) {
+            synchronized (lock) {
                 checkForComodification();
                 return size;
             }
         }
 
-        public void add(int index, E element) {
-            synchronized (l.lock) {
+        public boolean add(E element) {
+            synchronized (lock) {
                 checkForComodification();
-                if (index < 0 || index > size)
-                    throw new IndexOutOfBoundsException
-                        (outOfBounds(index, size));
-                l.add(index+offset, element);
-                expectedArray = l.getArray();
+                CopyOnWriteArrayList.this.add(offset + size, element);
+                expectedArray = getArray();
+                size++;
+            }
+            return true;
+        }
+
+        public void add(int index, E element) {
+            synchronized (lock) {
+                checkForComodification();
+                rangeCheckForAdd(index);
+                CopyOnWriteArrayList.this.add(offset + index, element);
+                expectedArray = getArray();
                 size++;
             }
         }
 
+        public boolean addAll(Collection<? extends E> c) {
+            synchronized (lock) {
+                final Object[] oldArray = getArrayChecked();
+                boolean modified =
+                    CopyOnWriteArrayList.this.addAll(offset + size, c);
+                size += (expectedArray = getArray()).length - oldArray.length;
+                return modified;
+            }
+        }
+
+        public boolean addAll(int index, Collection<? extends E> c) {
+            synchronized (lock) {
+                rangeCheckForAdd(index);
+                final Object[] oldArray = getArrayChecked();
+                boolean modified =
+                    CopyOnWriteArrayList.this.addAll(offset + index, c);
+                size += (expectedArray = getArray()).length - oldArray.length;
+                return modified;
+            }
+        }
+
         public void clear() {
-            synchronized (l.lock) {
+            synchronized (lock) {
                 checkForComodification();
-                l.removeRange(offset, offset+size);
-                expectedArray = l.getArray();
+                removeRange(offset, offset + size);
+                expectedArray = getArray();
                 size = 0;
             }
         }
 
         public E remove(int index) {
-            synchronized (l.lock) {
+            synchronized (lock) {
                 rangeCheck(index);
                 checkForComodification();
-                E result = l.remove(index+offset);
-                expectedArray = l.getArray();
+                E result = CopyOnWriteArrayList.this.remove(offset + index);
+                expectedArray = getArray();
                 size--;
                 return result;
             }
         }
 
         public boolean remove(Object o) {
-            int index = indexOf(o);
-            if (index == -1)
-                return false;
-            remove(index);
-            return true;
-        }
-
-        public Iterator<E> iterator() {
-            synchronized (l.lock) {
+            synchronized (lock) {
                 checkForComodification();
-                return new COWSubListIterator<E>(l, 0, offset, size);
+                int index = indexOf(o);
+                if (index == -1)
+                    return false;
+                remove(index);
+                return true;
             }
         }
 
+        public Iterator<E> iterator() {
+            return listIterator(0);
+        }
+
+        public ListIterator<E> listIterator() {
+            return listIterator(0);
+        }
+
         public ListIterator<E> listIterator(int index) {
-            synchronized (l.lock) {
+            synchronized (lock) {
                 checkForComodification();
-                if (index < 0 || index > size)
-                    throw new IndexOutOfBoundsException
-                        (outOfBounds(index, size));
-                return new COWSubListIterator<E>(l, index, offset, size);
+                rangeCheckForAdd(index);
+                return new COWSubListIterator<E>(
+                    CopyOnWriteArrayList.this, index, offset, size);
             }
         }
 
         public List<E> subList(int fromIndex, int toIndex) {
-            synchronized (l.lock) {
+            synchronized (lock) {
                 checkForComodification();
                 if (fromIndex < 0 || toIndex > size || fromIndex > toIndex)
                     throw new IndexOutOfBoundsException();
-                return new COWSubList<E>(l, fromIndex + offset,
-                                         toIndex + offset);
+                return new COWSubList(expectedArray, fromIndex + offset, toIndex - fromIndex);
             }
         }
 
         public void forEach(Consumer<? super E> action) {
-            if (action == null) throw new NullPointerException();
-            int lo = offset;
-            int hi = offset + size;
-            Object[] a = expectedArray;
-            if (l.getArray() != a)
-                throw new ConcurrentModificationException();
-            if (lo < 0 || hi > a.length)
-                throw new IndexOutOfBoundsException();
-            for (int i = lo; i < hi; ++i) {
-                @SuppressWarnings("unchecked") E e = (E) a[i];
-                action.accept(e);
+            Objects.requireNonNull(action);
+            int i, end; final Object[] es;
+            synchronized (lock) {
+                es = getArrayChecked();
+                i = offset;
+                end = i + size;
             }
+            for (; i < end; i++)
+                action.accept(elementAt(es, i));
         }
 
         public void replaceAll(UnaryOperator<E> operator) {
-            if (operator == null) throw new NullPointerException();
-            synchronized (l.lock) {
-                int lo = offset;
-                int hi = offset + size;
-                Object[] elements = expectedArray;
-                if (l.getArray() != elements)
-                    throw new ConcurrentModificationException();
-                int len = elements.length;
-                if (lo < 0 || hi > len)
-                    throw new IndexOutOfBoundsException();
-                Object[] newElements = Arrays.copyOf(elements, len);
-                for (int i = lo; i < hi; ++i) {
-                    @SuppressWarnings("unchecked") E e = (E) elements[i];
-                    newElements[i] = operator.apply(e);
-                }
-                l.setArray(expectedArray = newElements);
+            synchronized (lock) {
+                checkForComodification();
+                replaceAllRange(operator, offset, offset + size);
+                expectedArray = getArray();
             }
         }
 
         public void sort(Comparator<? super E> c) {
-            synchronized (l.lock) {
-                int lo = offset;
-                int hi = offset + size;
-                Object[] elements = expectedArray;
-                if (l.getArray() != elements)
-                    throw new ConcurrentModificationException();
-                int len = elements.length;
-                if (lo < 0 || hi > len)
-                    throw new IndexOutOfBoundsException();
-                Object[] newElements = Arrays.copyOf(elements, len);
-                @SuppressWarnings("unchecked") E[] es = (E[])newElements;
-                Arrays.sort(es, lo, hi, c);
-                l.setArray(expectedArray = newElements);
+            synchronized (lock) {
+                checkForComodification();
+                sortRange(c, offset, offset + size);
+                expectedArray = getArray();
             }
         }
 
         public boolean removeAll(Collection<?> c) {
-            if (c == null) throw new NullPointerException();
-            boolean removed = false;
-            synchronized (l.lock) {
-                int n = size;
-                if (n > 0) {
-                    int lo = offset;
-                    int hi = offset + n;
-                    Object[] elements = expectedArray;
-                    if (l.getArray() != elements)
-                        throw new ConcurrentModificationException();
-                    int len = elements.length;
-                    if (lo < 0 || hi > len)
-                        throw new IndexOutOfBoundsException();
-                    int newSize = 0;
-                    Object[] temp = new Object[n];
-                    for (int i = lo; i < hi; ++i) {
-                        Object element = elements[i];
-                        if (!c.contains(element))
-                            temp[newSize++] = element;
-                    }
-                    if (newSize != n) {
-                        Object[] newElements = new Object[len - n + newSize];
-                        System.arraycopy(elements, 0, newElements, 0, lo);
-                        System.arraycopy(temp, 0, newElements, lo, newSize);
-                        System.arraycopy(elements, hi, newElements,
-                                         lo + newSize, len - hi);
-                        size = newSize;
-                        removed = true;
-                        l.setArray(expectedArray = newElements);
-                    }
-                }
-            }
-            return removed;
+            Objects.requireNonNull(c);
+            return bulkRemove(e -> c.contains(e));
         }
 
         public boolean retainAll(Collection<?> c) {
-            if (c == null) throw new NullPointerException();
-            boolean removed = false;
-            synchronized (l.lock) {
-                int n = size;
-                if (n > 0) {
-                    int lo = offset;
-                    int hi = offset + n;
-                    Object[] elements = expectedArray;
-                    if (l.getArray() != elements)
-                        throw new ConcurrentModificationException();
-                    int len = elements.length;
-                    if (lo < 0 || hi > len)
-                        throw new IndexOutOfBoundsException();
-                    int newSize = 0;
-                    Object[] temp = new Object[n];
-                    for (int i = lo; i < hi; ++i) {
-                        Object element = elements[i];
-                        if (c.contains(element))
-                            temp[newSize++] = element;
-                    }
-                    if (newSize != n) {
-                        Object[] newElements = new Object[len - n + newSize];
-                        System.arraycopy(elements, 0, newElements, 0, lo);
-                        System.arraycopy(temp, 0, newElements, lo, newSize);
-                        System.arraycopy(elements, hi, newElements,
-                                         lo + newSize, len - hi);
-                        size = newSize;
-                        removed = true;
-                        l.setArray(expectedArray = newElements);
-                    }
-                }
-            }
-            return removed;
+            Objects.requireNonNull(c);
+            return bulkRemove(e -> !c.contains(e));
         }
 
         public boolean removeIf(Predicate<? super E> filter) {
-            if (filter == null) throw new NullPointerException();
-            boolean removed = false;
-            synchronized (l.lock) {
-                int n = size;
-                if (n > 0) {
-                    int lo = offset;
-                    int hi = offset + n;
-                    Object[] elements = expectedArray;
-                    if (l.getArray() != elements)
-                        throw new ConcurrentModificationException();
-                    int len = elements.length;
-                    if (lo < 0 || hi > len)
-                        throw new IndexOutOfBoundsException();
-                    int newSize = 0;
-                    Object[] temp = new Object[n];
-                    for (int i = lo; i < hi; ++i) {
-                        @SuppressWarnings("unchecked") E e = (E) elements[i];
-                        if (!filter.test(e))
-                            temp[newSize++] = e;
-                    }
-                    if (newSize != n) {
-                        Object[] newElements = new Object[len - n + newSize];
-                        System.arraycopy(elements, 0, newElements, 0, lo);
-                        System.arraycopy(temp, 0, newElements, lo, newSize);
-                        System.arraycopy(elements, hi, newElements,
-                                         lo + newSize, len - hi);
-                        size = newSize;
-                        removed = true;
-                        l.setArray(expectedArray = newElements);
-                    }
-                }
+            Objects.requireNonNull(filter);
+            return bulkRemove(filter);
+        }
+
+        private boolean bulkRemove(Predicate<? super E> filter) {
+            synchronized (lock) {
+                final Object[] oldArray = getArrayChecked();
+                boolean modified = CopyOnWriteArrayList.this.bulkRemove(
+                    filter, offset, offset + size);
+                size += (expectedArray = getArray()).length - oldArray.length;
+                return modified;
             }
-            return removed;
         }
 
         public Spliterator<E> spliterator() {
-            int lo = offset;
-            int hi = offset + size;
-            Object[] a = expectedArray;
-            if (l.getArray() != a)
-                throw new ConcurrentModificationException();
-            if (lo < 0 || hi > a.length)
-                throw new IndexOutOfBoundsException();
-            return Spliterators.spliterator
-                (a, lo, hi, Spliterator.IMMUTABLE | Spliterator.ORDERED);
+            synchronized (lock) {
+                return Spliterators.spliterator(
+                        getArrayChecked(), offset, offset + size,
+                        Spliterator.IMMUTABLE | Spliterator.ORDERED);
+            }
         }
 
     }
@@ -1483,7 +1531,7 @@
         COWSubListIterator(List<E> l, int index, int offset, int size) {
             this.offset = offset;
             this.size = size;
-            it = l.listIterator(index+offset);
+            it = l.listIterator(index + offset);
         }
 
         public boolean hasNext() {
@@ -1532,23 +1580,27 @@
         @SuppressWarnings("unchecked")
         public void forEachRemaining(Consumer<? super E> action) {
             Objects.requireNonNull(action);
-            while (nextIndex() < size) {
+            while (hasNext()) {
                 action.accept(it.next());
             }
         }
     }
 
-    // Support for resetting lock while deserializing
+    /** Initializes the lock; for use when deserializing or cloning. */
     private void resetLock() {
-        U.putObjectVolatile(this, LOCK, new Object());
-    }
-    private static final sun.misc.Unsafe U = sun.misc.Unsafe.getUnsafe();
-    private static final long LOCK;
-    static {
+        Field lockField = java.security.AccessController.doPrivileged(
+            (java.security.PrivilegedAction<Field>) () -> {
+                try {
+                    Field f = CopyOnWriteArrayList.class
+                        .getDeclaredField("lock");
+                    f.setAccessible(true);
+                    return f;
+                } catch (ReflectiveOperationException e) {
+                    throw new Error(e);
+                }});
         try {
-            LOCK = U.objectFieldOffset
-                (CopyOnWriteArrayList.class.getDeclaredField("lock"));
-        } catch (ReflectiveOperationException e) {
+            lockField.set(this, new Object());
+        } catch (IllegalAccessException e) {
             throw new Error(e);
         }
     }
diff --git a/ojluni/src/main/java/java/util/concurrent/CopyOnWriteArraySet.java b/ojluni/src/main/java/java/util/concurrent/CopyOnWriteArraySet.java
index fb707dd..b14a3e8 100644
--- a/ojluni/src/main/java/java/util/concurrent/CopyOnWriteArraySet.java
+++ b/ojluni/src/main/java/java/util/concurrent/CopyOnWriteArraySet.java
@@ -45,13 +45,8 @@
 import java.util.function.Consumer;
 import java.util.function.Predicate;
 
-// BEGIN android-note
-// removed link to collections framework docs
-// fixed framework docs link to "Collection#optional"
-// END android-note
-
 /**
- * A {@link java.util.Set} that uses an internal {@link CopyOnWriteArrayList}
+ * A {@link Set} that uses an internal {@link CopyOnWriteArrayList}
  * for all of its operations.  Thus, it shares the same basic properties:
  * <ul>
  *  <li>It is best suited for applications in which set sizes generally
@@ -91,6 +86,10 @@
  *   }
  * }}</pre>
  *
+ * <p>This class is a member of the
+ * <a href="{@docRoot}/java.base/java/util/package-summary.html#CollectionsFramework">
+ * Java Collections Framework</a>.
+ *
  * @see CopyOnWriteArrayList
  * @since 1.5
  * @author Doug Lea
@@ -341,10 +340,10 @@
      * @return {@code true} if this set changed as a result of the call
      * @throws ClassCastException if the class of an element of this set
      *         is incompatible with the specified collection
-     * (<a href="../Collection.html#optional-restrictions">optional</a>)
+     * (<a href="{@docRoot}/java.base/java/util/Collection.html#optional-restrictions">optional</a>)
      * @throws NullPointerException if this set contains a null element and the
      *         specified collection does not permit null elements
-     * (<a href="../Collection.html#optional-restrictions">optional</a>),
+     * (<a href="{@docRoot}/java.base/java/util/Collection.html#optional-restrictions">optional</a>),
      *         or if the specified collection is null
      * @see #remove(Object)
      */
@@ -364,10 +363,10 @@
      * @return {@code true} if this set changed as a result of the call
      * @throws ClassCastException if the class of an element of this set
      *         is incompatible with the specified collection
-     * (<a href="../Collection.html#optional-restrictions">optional</a>)
+     * (<a href="{@docRoot}/java.base/java/util/Collection.html#optional-restrictions">optional</a>)
      * @throws NullPointerException if this set contains a null element and the
      *         specified collection does not permit null elements
-     * (<a href="../Collection.html#optional-restrictions">optional</a>),
+     * (<a href="{@docRoot}/java.base/java/util/Collection.html#optional-restrictions">optional</a>),
      *         or if the specified collection is null
      * @see #remove(Object)
      */
@@ -412,10 +411,16 @@
                 && compareSets(al.getArray(), (Set<?>) o) == 0);
     }
 
+    /**
+     * @throws NullPointerException {@inheritDoc}
+     */
     public boolean removeIf(Predicate<? super E> filter) {
         return al.removeIf(filter);
     }
 
+    /**
+     * @throws NullPointerException {@inheritDoc}
+     */
     public void forEach(Consumer<? super E> action) {
         al.forEach(action);
     }
diff --git a/ojluni/src/main/java/java/util/concurrent/CountedCompleter.java b/ojluni/src/main/java/java/util/concurrent/CountedCompleter.java
index a29208e..a91d12d 100644
--- a/ojluni/src/main/java/java/util/concurrent/CountedCompleter.java
+++ b/ojluni/src/main/java/java/util/concurrent/CountedCompleter.java
@@ -35,6 +35,9 @@
 
 package java.util.concurrent;
 
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.VarHandle;
+
 /**
  * A {@link ForkJoinTask} with a completion action performed when
  * triggered and there are no remaining pending actions.
@@ -54,8 +57,7 @@
  * decremented; otherwise, the completion action is performed, and if
  * this completer itself has a completer, the process is continued
  * with its completer.  As is the case with related synchronization
- * components such as {@link java.util.concurrent.Phaser Phaser} and
- * {@link java.util.concurrent.Semaphore Semaphore}, these methods
+ * components such as {@link Phaser} and {@link Semaphore}, these methods
  * affect only internal counts; they do not establish any further
  * internal bookkeeping. In particular, the identities of pending
  * tasks are not maintained. As illustrated below, you can create
@@ -117,102 +119,114 @@
  * to complete for some elements than others, either because of
  * intrinsic variation (for example I/O) or auxiliary effects such as
  * garbage collection.  Because CountedCompleters provide their own
- * continuations, other threads need not block waiting to perform
- * them.
+ * continuations, other tasks need not block waiting to perform them.
  *
- * <p>For example, here is an initial version of a class that uses
- * divide-by-two recursive decomposition to divide work into single
- * pieces (leaf tasks). Even when work is split into individual calls,
- * tree-based techniques are usually preferable to directly forking
- * leaf tasks, because they reduce inter-thread communication and
- * improve load balancing. In the recursive case, the second of each
- * pair of subtasks to finish triggers completion of its parent
+ * <p>For example, here is an initial version of a utility method that
+ * uses divide-by-two recursive decomposition to divide work into
+ * single pieces (leaf tasks). Even when work is split into individual
+ * calls, tree-based techniques are usually preferable to directly
+ * forking leaf tasks, because they reduce inter-thread communication
+ * and improve load balancing. In the recursive case, the second of
+ * each pair of subtasks to finish triggers completion of their parent
  * (because no result combination is performed, the default no-op
  * implementation of method {@code onCompletion} is not overridden).
- * A static utility method sets up the base task and invokes it
- * (here, implicitly using the {@link ForkJoinPool#commonPool()}).
+ * The utility method sets up the root task and invokes it (here,
+ * implicitly using the {@link ForkJoinPool#commonPool()}).  It is
+ * straightforward and reliable (but not optimal) to always set the
+ * pending count to the number of child tasks and call {@code
+ * tryComplete()} immediately before returning.
  *
  * <pre> {@code
- * class MyOperation<E> { void apply(E e) { ... }  }
- *
- * class ForEach<E> extends CountedCompleter<Void> {
- *
- *   public static <E> void forEach(E[] array, MyOperation<E> op) {
- *     new ForEach<E>(null, array, op, 0, array.length).invoke();
- *   }
- *
- *   final E[] array; final MyOperation<E> op; final int lo, hi;
- *   ForEach(CountedCompleter<?> p, E[] array, MyOperation<E> op, int lo, int hi) {
- *     super(p);
- *     this.array = array; this.op = op; this.lo = lo; this.hi = hi;
- *   }
- *
- *   public void compute() { // version 1
- *     if (hi - lo >= 2) {
- *       int mid = (lo + hi) >>> 1;
- *       setPendingCount(2); // must set pending count before fork
- *       new ForEach(this, array, op, mid, hi).fork(); // right child
- *       new ForEach(this, array, op, lo, mid).fork(); // left child
+ * public static <E> void forEach(E[] array, Consumer<E> action) {
+ *   class Task extends CountedCompleter<Void> {
+ *     final int lo, hi;
+ *     Task(Task parent, int lo, int hi) {
+ *       super(parent); this.lo = lo; this.hi = hi;
  *     }
- *     else if (hi > lo)
- *       op.apply(array[lo]);
- *     tryComplete();
+ *
+ *     public void compute() {
+ *       if (hi - lo >= 2) {
+ *         int mid = (lo + hi) >>> 1;
+ *         // must set pending count before fork
+ *         setPendingCount(2);
+ *         new Task(this, mid, hi).fork(); // right child
+ *         new Task(this, lo, mid).fork(); // left child
+ *       }
+ *       else if (hi > lo)
+ *         action.accept(array[lo]);
+ *       tryComplete();
+ *     }
  *   }
+ *   new Task(null, 0, array.length).invoke();
  * }}</pre>
  *
  * This design can be improved by noticing that in the recursive case,
  * the task has nothing to do after forking its right task, so can
  * directly invoke its left task before returning. (This is an analog
- * of tail recursion removal.)  Also, because the task returns upon
- * executing its left task (rather than falling through to invoke
- * {@code tryComplete}) the pending count is set to one:
+ * of tail recursion removal.)  Also, when the last action in a task
+ * is to fork or invoke a subtask (a "tail call"), the call to {@code
+ * tryComplete()} can be optimized away, at the cost of making the
+ * pending count look "off by one".
  *
  * <pre> {@code
- * class ForEach<E> ... {
- *   ...
- *   public void compute() { // version 2
- *     if (hi - lo >= 2) {
- *       int mid = (lo + hi) >>> 1;
- *       setPendingCount(1); // only one pending
- *       new ForEach(this, array, op, mid, hi).fork(); // right child
- *       new ForEach(this, array, op, lo, mid).compute(); // direct invoke
- *     }
- *     else {
- *       if (hi > lo)
- *         op.apply(array[lo]);
- *       tryComplete();
- *     }
- *   }
- * }}</pre>
+ *     public void compute() {
+ *       if (hi - lo >= 2) {
+ *         int mid = (lo + hi) >>> 1;
+ *         setPendingCount(1); // looks off by one, but correct!
+ *         new Task(this, mid, hi).fork(); // right child
+ *         new Task(this, lo, mid).compute(); // direct invoke
+ *       } else {
+ *         if (hi > lo)
+ *           action.accept(array[lo]);
+ *         tryComplete();
+ *       }
+ *     }}</pre>
  *
  * As a further optimization, notice that the left task need not even exist.
- * Instead of creating a new one, we can iterate using the original task,
+ * Instead of creating a new one, we can continue using the original task,
  * and add a pending count for each fork.  Additionally, because no task
  * in this tree implements an {@link #onCompletion(CountedCompleter)} method,
- * {@code tryComplete()} can be replaced with {@link #propagateCompletion}.
+ * {@code tryComplete} can be replaced with {@link #propagateCompletion}.
  *
  * <pre> {@code
- * class ForEach<E> ... {
- *   ...
- *   public void compute() { // version 3
- *     int l = lo, h = hi;
- *     while (h - l >= 2) {
- *       int mid = (l + h) >>> 1;
- *       addToPendingCount(1);
- *       new ForEach(this, array, op, mid, h).fork(); // right child
- *       h = mid;
+ *     public void compute() {
+ *       int n = hi - lo;
+ *       for (; n >= 2; n /= 2) {
+ *         addToPendingCount(1);
+ *         new Task(this, lo + n/2, lo + n).fork();
+ *       }
+ *       if (n > 0)
+ *         action.accept(array[lo]);
+ *       propagateCompletion();
+ *     }}</pre>
+ *
+ * When pending counts can be precomputed, they can be established in
+ * the constructor:
+ *
+ * <pre> {@code
+ * public static <E> void forEach(E[] array, Consumer<E> action) {
+ *   class Task extends CountedCompleter<Void> {
+ *     final int lo, hi;
+ *     Task(Task parent, int lo, int hi) {
+ *       super(parent, 31 - Integer.numberOfLeadingZeros(hi - lo));
+ *       this.lo = lo; this.hi = hi;
  *     }
- *     if (h > l)
- *       op.apply(array[l]);
- *     propagateCompletion();
+ *
+ *     public void compute() {
+ *       for (int n = hi - lo; n >= 2; n /= 2)
+ *         new Task(this, lo + n/2, lo + n).fork();
+ *       action.accept(array[lo]);
+ *       propagateCompletion();
+ *     }
  *   }
+ *   if (array.length > 0)
+ *     new Task(null, 0, array.length).invoke();
  * }}</pre>
  *
- * Additional optimizations of such classes might entail precomputing
- * pending counts so that they can be established in constructors,
- * specializing classes for leaf steps, subdividing by say, four,
- * instead of two per iteration, and using an adaptive threshold
- * instead of always subdividing down to single elements.
+ * Additional optimizations of such classes might entail specializing
+ * classes for leaf steps, subdividing by say, four, instead of two
+ * per iteration, and using an adaptive threshold instead of always
+ * subdividing down to single elements.
  *
  * <p><b>Searching.</b> A tree of CountedCompleters can search for a
  * value or property in different parts of a data structure, and
@@ -524,7 +538,7 @@
      * @param delta the value to add
      */
     public final void addToPendingCount(int delta) {
-        U.getAndAddInt(this, PENDING, delta);
+        PENDING.getAndAdd(this, delta);
     }
 
     /**
@@ -536,7 +550,7 @@
      * @return {@code true} if successful
      */
     public final boolean compareAndSetPendingCount(int expected, int count) {
-        return U.compareAndSwapInt(this, PENDING, expected, count);
+        return PENDING.compareAndSet(this, expected, count);
     }
 
     /**
@@ -548,7 +562,7 @@
     public final int decrementPendingCountUnlessZero() {
         int c;
         do {} while ((c = pending) != 0 &&
-                     !U.compareAndSwapInt(this, PENDING, c, c - 1));
+                     !PENDING.weakCompareAndSet(this, c, c - 1));
         return c;
     }
 
@@ -581,7 +595,7 @@
                     return;
                 }
             }
-            else if (U.compareAndSwapInt(a, PENDING, c, c - 1))
+            else if (PENDING.weakCompareAndSet(a, c, c - 1))
                 return;
         }
     }
@@ -596,7 +610,7 @@
      * not, be invoked for each completer in a computation.
      */
     public final void propagateCompletion() {
-        CountedCompleter<?> a = this, s = a;
+        CountedCompleter<?> a = this, s;
         for (int c;;) {
             if ((c = a.pending) == 0) {
                 if ((a = (s = a).completer) == null) {
@@ -604,7 +618,7 @@
                     return;
                 }
             }
-            else if (U.compareAndSwapInt(a, PENDING, c, c - 1))
+            else if (PENDING.weakCompareAndSet(a, c, c - 1))
                 return;
         }
     }
@@ -649,7 +663,7 @@
         for (int c;;) {
             if ((c = pending) == 0)
                 return this;
-            else if (U.compareAndSwapInt(this, PENDING, c, c - 1))
+            else if (PENDING.weakCompareAndSet(this, c, c - 1))
                 return null;
         }
     }
@@ -721,7 +735,7 @@
         CountedCompleter<?> a = this, s = a;
         while (a.onExceptionalCompletion(ex, s) &&
                (a = (s = a).completer) != null && a.status >= 0 &&
-               a.recordExceptionalCompletion(ex) == EXCEPTIONAL)
+               isExceptionalStatus(a.recordExceptionalCompletion(ex)))
             ;
     }
 
@@ -753,15 +767,15 @@
      */
     protected void setRawResult(T t) { }
 
-    // Unsafe mechanics
-    private static final sun.misc.Unsafe U = sun.misc.Unsafe.getUnsafe();
-    private static final long PENDING;
+    // VarHandle mechanics
+    private static final VarHandle PENDING;
     static {
         try {
-            PENDING = U.objectFieldOffset
-                (CountedCompleter.class.getDeclaredField("pending"));
+            MethodHandles.Lookup l = MethodHandles.lookup();
+            PENDING = l.findVarHandle(CountedCompleter.class, "pending", int.class);
+
         } catch (ReflectiveOperationException e) {
-            throw new Error(e);
+            throw new ExceptionInInitializerError(e);
         }
     }
 }
diff --git a/ojluni/src/main/java/java/util/concurrent/CyclicBarrier.java b/ojluni/src/main/java/java/util/concurrent/CyclicBarrier.java
index 5e018f1..269bec4 100644
--- a/ojluni/src/main/java/java/util/concurrent/CyclicBarrier.java
+++ b/ojluni/src/main/java/java/util/concurrent/CyclicBarrier.java
@@ -82,8 +82,7 @@
  *   public Solver(float[][] matrix) {
  *     data = matrix;
  *     N = matrix.length;
- *     Runnable barrierAction =
- *       new Runnable() { public void run() { mergeRows(...); }};
+ *     Runnable barrierAction = () -> mergeRows(...);
  *     barrier = new CyclicBarrier(N, barrierAction);
  *
  *     List<Thread> threads = new ArrayList<>(N);
@@ -132,10 +131,10 @@
  * <i>happen-before</i> actions following a successful return from the
  * corresponding {@code await()} in other threads.
  *
- * @since 1.5
  * @see CountDownLatch
  *
  * @author Doug Lea
+ * @since 1.5
  */
 public class CyclicBarrier {
     /**
@@ -150,7 +149,8 @@
      * but no subsequent reset.
      */
     private static class Generation {
-        boolean broken;         // initially false
+        Generation() {}                 // prevent access constructor creation
+        boolean broken;                 // initially false
     }
 
     /** The lock for guarding barrier entry */
diff --git a/ojluni/src/main/java/java/util/concurrent/DelayQueue.java b/ojluni/src/main/java/java/util/concurrent/DelayQueue.java
index 04a83d9..bf0858d 100644
--- a/ojluni/src/main/java/java/util/concurrent/DelayQueue.java
+++ b/ojluni/src/main/java/java/util/concurrent/DelayQueue.java
@@ -41,14 +41,11 @@
 import java.util.Collection;
 import java.util.Iterator;
 import java.util.NoSuchElementException;
+import java.util.Objects;
 import java.util.PriorityQueue;
 import java.util.concurrent.locks.Condition;
 import java.util.concurrent.locks.ReentrantLock;
 
-// BEGIN android-note
-// removed link to collections framework docs
-// END android-note
-
 /**
  * An unbounded {@linkplain BlockingQueue blocking queue} of
  * {@code Delayed} elements, in which an element can only be taken
@@ -63,11 +60,15 @@
  * returns the count of both expired and unexpired elements.
  * This queue does not permit null elements.
  *
- * <p>This class and its iterator implement all of the
- * <em>optional</em> methods of the {@link Collection} and {@link
- * Iterator} interfaces.  The Iterator provided in method {@link
- * #iterator()} is <em>not</em> guaranteed to traverse the elements of
- * the DelayQueue in any particular order.
+ * <p>This class and its iterator implement all of the <em>optional</em>
+ * methods of the {@link Collection} and {@link Iterator} interfaces.
+ * The Iterator provided in method {@link #iterator()} is <em>not</em>
+ * guaranteed to traverse the elements of the DelayQueue in any
+ * particular order.
+ *
+ * <p>This class is a member of the
+ * <a href="{@docRoot}/java.base/java/util/package-summary.html#CollectionsFramework">
+ * Java Collections Framework</a>.
  *
  * @since 1.5
  * @author Doug Lea
@@ -322,40 +323,13 @@
     }
 
     /**
-     * Returns first element only if it is expired.
-     * Used only by drainTo.  Call only when holding lock.
-     */
-    private E peekExpired() {
-        // assert lock.isHeldByCurrentThread();
-        E first = q.peek();
-        return (first == null || first.getDelay(NANOSECONDS) > 0) ?
-            null : first;
-    }
-
-    /**
      * @throws UnsupportedOperationException {@inheritDoc}
      * @throws ClassCastException            {@inheritDoc}
      * @throws NullPointerException          {@inheritDoc}
      * @throws IllegalArgumentException      {@inheritDoc}
      */
     public int drainTo(Collection<? super E> c) {
-        if (c == null)
-            throw new NullPointerException();
-        if (c == this)
-            throw new IllegalArgumentException();
-        final ReentrantLock lock = this.lock;
-        lock.lock();
-        try {
-            int n = 0;
-            for (E e; (e = peekExpired()) != null;) {
-                c.add(e);       // In this order, in case add() throws.
-                q.poll();
-                ++n;
-            }
-            return n;
-        } finally {
-            lock.unlock();
-        }
+        return drainTo(c, Integer.MAX_VALUE);
     }
 
     /**
@@ -365,8 +339,7 @@
      * @throws IllegalArgumentException      {@inheritDoc}
      */
     public int drainTo(Collection<? super E> c, int maxElements) {
-        if (c == null)
-            throw new NullPointerException();
+        Objects.requireNonNull(c);
         if (c == this)
             throw new IllegalArgumentException();
         if (maxElements <= 0)
@@ -375,8 +348,11 @@
         lock.lock();
         try {
             int n = 0;
-            for (E e; n < maxElements && (e = peekExpired()) != null;) {
-                c.add(e);       // In this order, in case add() throws.
+            for (E first;
+                 n < maxElements
+                     && (first = q.peek()) != null
+                     && first.getDelay(NANOSECONDS) <= 0;) {
+                c.add(first);   // In this order, in case add() throws.
                 q.poll();
                 ++n;
             }
@@ -547,8 +523,7 @@
         public E next() {
             if (cursor >= array.length)
                 throw new NoSuchElementException();
-            lastRet = cursor;
-            return (E)array[cursor++];
+            return (E)array[lastRet = cursor++];
         }
 
         public void remove() {
diff --git a/ojluni/src/main/java/java/util/concurrent/Exchanger.java b/ojluni/src/main/java/java/util/concurrent/Exchanger.java
index f01a705..38c43fe 100644
--- a/ojluni/src/main/java/java/util/concurrent/Exchanger.java
+++ b/ojluni/src/main/java/java/util/concurrent/Exchanger.java
@@ -36,6 +36,10 @@
 
 package java.util.concurrent;
 
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.VarHandle;
+import java.util.concurrent.locks.LockSupport;
+
 /**
  * A synchronization point at which threads can pair and swap elements
  * within pairs.  Each thread presents some object on entry to the
@@ -155,9 +159,7 @@
      * a value that is enough for common platforms.  Additionally,
      * extra care elsewhere is taken to avoid other false/unintended
      * sharing and to enhance locality, including adding padding (via
-     * @Contended) to Nodes, embedding "bound" as an Exchanger field,
-     * and reworking some park/unpark mechanics compared to
-     * LockSupport versions.
+     * @Contended) to Nodes, embedding "bound" as an Exchanger field.
      *
      * The arena starts out with only one used slot. We expand the
      * effective arena size by tracking collisions; i.e., failed CASes
@@ -233,29 +235,23 @@
      * As is too common in this sort of code, methods are monolithic
      * because most of the logic relies on reads of fields that are
      * maintained as local variables so can't be nicely factored --
-     * mainly, here, bulky spin->yield->block/cancel code), and
-     * heavily dependent on intrinsics (Unsafe) to use inlined
-     * embedded CAS and related memory access operations (that tend
-     * not to be as readily inlined by dynamic compilers when they are
-     * hidden behind other methods that would more nicely name and
-     * encapsulate the intended effects). This includes the use of
-     * putOrderedX to clear fields of the per-thread Nodes between
-     * uses. Note that field Node.item is not declared as volatile
-     * even though it is read by releasing threads, because they only
-     * do so after CAS operations that must precede access, and all
-     * uses by the owning thread are otherwise acceptably ordered by
-     * other operations. (Because the actual points of atomicity are
-     * slot CASes, it would also be legal for the write to Node.match
-     * in a release to be weaker than a full volatile write. However,
-     * this is not done because it could allow further postponement of
-     * the write, delaying progress.)
+     * mainly, here, bulky spin->yield->block/cancel code.  Note that
+     * field Node.item is not declared as volatile even though it is
+     * read by releasing threads, because they only do so after CAS
+     * operations that must precede access, and all uses by the owning
+     * thread are otherwise acceptably ordered by other operations.
+     * (Because the actual points of atomicity are slot CASes, it
+     * would also be legal for the write to Node.match in a release to
+     * be weaker than a full volatile write. However, this is not done
+     * because it could allow further postponement of the write,
+     * delaying progress.)
      */
 
     /**
-     * The byte distance (as a shift value) between any two used slots
-     * in the arena.  1 << ASHIFT should be at least cacheline size.
+     * The index distance (as a shift value) between any two used slots
+     * in the arena, spacing them out to avoid false sharing.
      */
-    private static final int ASHIFT = 7;
+    private static final int ASHIFT = 5;
 
     /**
      * The maximum supported arena index. The maximum allocatable
@@ -358,27 +354,31 @@
      */
     private final Object arenaExchange(Object item, boolean timed, long ns) {
         Node[] a = arena;
+        int alen = a.length;
         Node p = participant.get();
         for (int i = p.index;;) {                      // access slot at i
-            int b, m, c; long j;                       // j is raw array offset
-            Node q = (Node)U.getObjectVolatile(a, j = (i << ASHIFT) + ABASE);
-            if (q != null && U.compareAndSwapObject(a, j, q, null)) {
+            int b, m, c;
+            int j = (i << ASHIFT) + ((1 << ASHIFT) - 1);
+            if (j < 0 || j >= alen)
+                j = alen - 1;
+            Node q = (Node)AA.getAcquire(a, j);
+            if (q != null && AA.compareAndSet(a, j, q, null)) {
                 Object v = q.item;                     // release
                 q.match = item;
                 Thread w = q.parked;
                 if (w != null)
-                    U.unpark(w);
+                    LockSupport.unpark(w);
                 return v;
             }
             else if (i <= (m = (b = bound) & MMASK) && q == null) {
                 p.item = item;                         // offer
-                if (U.compareAndSwapObject(a, j, null, p)) {
+                if (AA.compareAndSet(a, j, null, p)) {
                     long end = (timed && m == 0) ? System.nanoTime() + ns : 0L;
                     Thread t = Thread.currentThread(); // wait
                     for (int h = p.hash, spins = SPINS;;) {
                         Object v = p.match;
                         if (v != null) {
-                            U.putOrderedObject(p, MATCH, null);
+                            MATCH.setRelease(p, null);
                             p.item = null;             // clear for next use
                             p.hash = h;
                             return v;
@@ -391,22 +391,24 @@
                                      (--spins & ((SPINS >>> 1) - 1)) == 0)
                                 Thread.yield();        // two yields per wait
                         }
-                        else if (U.getObjectVolatile(a, j) != p)
+                        else if (AA.getAcquire(a, j) != p)
                             spins = SPINS;       // releaser hasn't set match yet
                         else if (!t.isInterrupted() && m == 0 &&
                                  (!timed ||
                                   (ns = end - System.nanoTime()) > 0L)) {
-                            U.putObject(t, BLOCKER, this); // emulate LockSupport
                             p.parked = t;              // minimize window
-                            if (U.getObjectVolatile(a, j) == p)
-                                U.park(false, ns);
+                            if (AA.getAcquire(a, j) == p) {
+                                if (ns == 0L)
+                                    LockSupport.park(this);
+                                else
+                                    LockSupport.parkNanos(this, ns);
+                            }
                             p.parked = null;
-                            U.putObject(t, BLOCKER, null);
                         }
-                        else if (U.getObjectVolatile(a, j) == p &&
-                                 U.compareAndSwapObject(a, j, p, null)) {
+                        else if (AA.getAcquire(a, j) == p &&
+                                 AA.compareAndSet(a, j, p, null)) {
                             if (m != 0)                // try to shrink
-                                U.compareAndSwapInt(this, BOUND, b, b + SEQ - 1);
+                                BOUND.compareAndSet(this, b, b + SEQ - 1);
                             p.item = null;
                             p.hash = h;
                             i = p.index >>>= 1;        // descend
@@ -428,7 +430,7 @@
                     i = (i != m || m == 0) ? m : m - 1;
                 }
                 else if ((c = p.collides) < m || m == FULL ||
-                         !U.compareAndSwapInt(this, BOUND, b, b + SEQ + 1)) {
+                         !BOUND.compareAndSet(this, b, b + SEQ + 1)) {
                     p.collides = c + 1;
                     i = (i == 0) ? m : i - 1;          // cyclically traverse
                 }
@@ -457,24 +459,24 @@
 
         for (Node q;;) {
             if ((q = slot) != null) {
-                if (U.compareAndSwapObject(this, SLOT, q, null)) {
+                if (SLOT.compareAndSet(this, q, null)) {
                     Object v = q.item;
                     q.match = item;
                     Thread w = q.parked;
                     if (w != null)
-                        U.unpark(w);
+                        LockSupport.unpark(w);
                     return v;
                 }
                 // create arena on contention, but continue until slot null
                 if (NCPU > 1 && bound == 0 &&
-                    U.compareAndSwapInt(this, BOUND, 0, SEQ))
+                    BOUND.compareAndSet(this, 0, SEQ))
                     arena = new Node[(FULL + 2) << ASHIFT];
             }
             else if (arena != null)
                 return null; // caller must reroute to arenaExchange
             else {
                 p.item = item;
-                if (U.compareAndSwapObject(this, SLOT, null, p))
+                if (SLOT.compareAndSet(this, null, p))
                     break;
                 p.item = null;
             }
@@ -497,19 +499,21 @@
                 spins = SPINS;
             else if (!t.isInterrupted() && arena == null &&
                      (!timed || (ns = end - System.nanoTime()) > 0L)) {
-                U.putObject(t, BLOCKER, this);
                 p.parked = t;
-                if (slot == p)
-                    U.park(false, ns);
+                if (slot == p) {
+                    if (ns == 0L)
+                        LockSupport.park(this);
+                    else
+                        LockSupport.parkNanos(this, ns);
+                }
                 p.parked = null;
-                U.putObject(t, BLOCKER, null);
             }
-            else if (U.compareAndSwapObject(this, SLOT, p, null)) {
+            else if (SLOT.compareAndSet(this, p, null)) {
                 v = timed && ns <= 0L && !t.isInterrupted() ? TIMED_OUT : null;
                 break;
             }
         }
-        U.putOrderedObject(p, MATCH, null);
+        MATCH.setRelease(p, null);
         p.item = null;
         p.hash = h;
         return v;
@@ -558,8 +562,9 @@
     @SuppressWarnings("unchecked")
     public V exchange(V x) throws InterruptedException {
         Object v;
+        Node[] a;
         Object item = (x == null) ? NULL_ITEM : x; // translate null args
-        if ((arena != null ||
+        if (((a = arena) != null ||
              (v = slotExchange(item, false, 0L)) == null) &&
             ((Thread.interrupted() || // disambiguates null return
               (v = arenaExchange(item, false, 0L)) == null)))
@@ -625,33 +630,20 @@
         return (v == NULL_ITEM) ? null : (V)v;
     }
 
-    // Unsafe mechanics
-    private static final sun.misc.Unsafe U = sun.misc.Unsafe.getUnsafe();
-    private static final long BOUND;
-    private static final long SLOT;
-    private static final long MATCH;
-    private static final long BLOCKER;
-    private static final int ABASE;
+    // VarHandle mechanics
+    private static final VarHandle BOUND;
+    private static final VarHandle SLOT;
+    private static final VarHandle MATCH;
+    private static final VarHandle AA;
     static {
         try {
-            BOUND = U.objectFieldOffset
-                (Exchanger.class.getDeclaredField("bound"));
-            SLOT = U.objectFieldOffset
-                (Exchanger.class.getDeclaredField("slot"));
-
-            MATCH = U.objectFieldOffset
-                (Node.class.getDeclaredField("match"));
-
-            BLOCKER = U.objectFieldOffset
-                (Thread.class.getDeclaredField("parkBlocker"));
-
-            int scale = U.arrayIndexScale(Node[].class);
-            if ((scale & (scale - 1)) != 0 || scale > (1 << ASHIFT))
-                throw new Error("Unsupported array scale");
-            // ABASE absorbs padding in front of element 0
-            ABASE = U.arrayBaseOffset(Node[].class) + (1 << ASHIFT);
+            MethodHandles.Lookup l = MethodHandles.lookup();
+            BOUND = l.findVarHandle(Exchanger.class, "bound", int.class);
+            SLOT = l.findVarHandle(Exchanger.class, "slot", Node.class);
+            MATCH = l.findVarHandle(Node.class, "match", Object.class);
+            AA = MethodHandles.arrayElementVarHandle(Node[].class);
         } catch (ReflectiveOperationException e) {
-            throw new Error(e);
+            throw new ExceptionInInitializerError(e);
         }
     }
 
diff --git a/ojluni/src/main/java/java/util/concurrent/Executor.java b/ojluni/src/main/java/java/util/concurrent/Executor.java
index a615705..378cacd 100644
--- a/ojluni/src/main/java/java/util/concurrent/Executor.java
+++ b/ojluni/src/main/java/java/util/concurrent/Executor.java
@@ -87,14 +87,12 @@
  *     this.executor = executor;
  *   }
  *
- *   public synchronized void execute(final Runnable r) {
- *     tasks.add(new Runnable() {
- *       public void run() {
- *         try {
- *           r.run();
- *         } finally {
- *           scheduleNext();
- *         }
+ *   public synchronized void execute(Runnable r) {
+ *     tasks.add(() -> {
+ *       try {
+ *         r.run();
+ *       } finally {
+ *         scheduleNext();
  *       }
  *     });
  *     if (active == null) {
diff --git a/ojluni/src/main/java/java/util/concurrent/ExecutorCompletionService.java b/ojluni/src/main/java/java/util/concurrent/ExecutorCompletionService.java
index a093844..d60d3dd 100644
--- a/ojluni/src/main/java/java/util/concurrent/ExecutorCompletionService.java
+++ b/ojluni/src/main/java/java/util/concurrent/ExecutorCompletionService.java
@@ -56,13 +56,11 @@
  * void solve(Executor e,
  *            Collection<Callable<Result>> solvers)
  *     throws InterruptedException, ExecutionException {
- *   CompletionService<Result> ecs
- *       = new ExecutorCompletionService<Result>(e);
- *   for (Callable<Result> s : solvers)
- *     ecs.submit(s);
- *   int n = solvers.size();
- *   for (int i = 0; i < n; ++i) {
- *     Result r = ecs.take().get();
+ *   CompletionService<Result> cs
+ *       = new ExecutorCompletionService<>(e);
+ *   solvers.forEach(cs::submit);
+ *   for (int i = solvers.size(); i > 0; i--) {
+ *     Result r = cs.take().get();
  *     if (r != null)
  *       use(r);
  *   }
@@ -76,32 +74,31 @@
  * void solve(Executor e,
  *            Collection<Callable<Result>> solvers)
  *     throws InterruptedException {
- *   CompletionService<Result> ecs
- *       = new ExecutorCompletionService<Result>(e);
+ *   CompletionService<Result> cs
+ *       = new ExecutorCompletionService<>(e);
  *   int n = solvers.size();
  *   List<Future<Result>> futures = new ArrayList<>(n);
  *   Result result = null;
  *   try {
- *     for (Callable<Result> s : solvers)
- *       futures.add(ecs.submit(s));
- *     for (int i = 0; i < n; ++i) {
+ *     solvers.forEach(solver -> futures.add(cs.submit(solver)));
+ *     for (int i = n; i > 0; i--) {
  *       try {
- *         Result r = ecs.take().get();
+ *         Result r = cs.take().get();
  *         if (r != null) {
  *           result = r;
  *           break;
  *         }
  *       } catch (ExecutionException ignore) {}
  *     }
- *   }
- *   finally {
- *     for (Future<Result> f : futures)
- *       f.cancel(true);
+ *   } finally {
+ *     futures.forEach(future -> future.cancel(true));
  *   }
  *
  *   if (result != null)
  *     use(result);
  * }}</pre>
+ *
+ * @since 1.5
  */
 public class ExecutorCompletionService<V> implements CompletionService<V> {
     private final Executor executor;
@@ -177,6 +174,10 @@
         this.completionQueue = completionQueue;
     }
 
+    /**
+     * @throws RejectedExecutionException {@inheritDoc}
+     * @throws NullPointerException       {@inheritDoc}
+     */
     public Future<V> submit(Callable<V> task) {
         if (task == null) throw new NullPointerException();
         RunnableFuture<V> f = newTaskFor(task);
@@ -184,6 +185,10 @@
         return f;
     }
 
+    /**
+     * @throws RejectedExecutionException {@inheritDoc}
+     * @throws NullPointerException       {@inheritDoc}
+     */
     public Future<V> submit(Runnable task, V result) {
         if (task == null) throw new NullPointerException();
         RunnableFuture<V> f = newTaskFor(task, result);
diff --git a/ojluni/src/main/java/java/util/concurrent/Executors.java b/ojluni/src/main/java/java/util/concurrent/Executors.java
index 565fdeb..e8cc0c1 100644
--- a/ojluni/src/main/java/java/util/concurrent/Executors.java
+++ b/ojluni/src/main/java/java/util/concurrent/Executors.java
@@ -35,6 +35,7 @@
 
 package java.util.concurrent;
 
+import static java.lang.ref.Reference.reachabilityFence;
 import dalvik.annotation.optimization.ReachabilitySensitive;
 import java.security.AccessControlContext;
 import java.security.AccessControlException;
@@ -190,9 +191,7 @@
      * returned executor is guaranteed not to be reconfigurable to use
      * additional threads.
      *
-     * @param threadFactory the factory to use when creating new
-     * threads
-     *
+     * @param threadFactory the factory to use when creating new threads
      * @return the newly created single-threaded Executor
      * @throws NullPointerException if threadFactory is null
      */
@@ -231,6 +230,7 @@
      * will reuse previously constructed threads when they are
      * available, and uses the provided
      * ThreadFactory to create new threads when needed.
+     *
      * @param threadFactory the factory to use when creating new threads
      * @return the newly created thread pool
      * @throws NullPointerException if threadFactory is null
@@ -253,6 +253,7 @@
      * given time. Unlike the otherwise equivalent
      * {@code newScheduledThreadPool(1)} the returned executor is
      * guaranteed not to be reconfigurable to use additional threads.
+     *
      * @return the newly created scheduled executor
      */
     public static ScheduledExecutorService newSingleThreadScheduledExecutor() {
@@ -271,9 +272,9 @@
      * equivalent {@code newScheduledThreadPool(1, threadFactory)}
      * the returned executor is guaranteed not to be reconfigurable to
      * use additional threads.
-     * @param threadFactory the factory to use when creating new
-     * threads
-     * @return a newly created scheduled executor
+     *
+     * @param threadFactory the factory to use when creating new threads
+     * @return the newly created scheduled executor
      * @throws NullPointerException if threadFactory is null
      */
     public static ScheduledExecutorService newSingleThreadScheduledExecutor(ThreadFactory threadFactory) {
@@ -286,7 +287,7 @@
      * given delay, or to execute periodically.
      * @param corePoolSize the number of threads to keep in the pool,
      * even if they are idle
-     * @return a newly created scheduled thread pool
+     * @return the newly created scheduled thread pool
      * @throws IllegalArgumentException if {@code corePoolSize < 0}
      */
     public static ScheduledExecutorService newScheduledThreadPool(int corePoolSize) {
@@ -300,7 +301,7 @@
      * even if they are idle
      * @param threadFactory the factory to use when the executor
      * creates a new thread
-     * @return a newly created scheduled thread pool
+     * @return the newly created scheduled thread pool
      * @throws IllegalArgumentException if {@code corePoolSize < 0}
      * @throws NullPointerException if threadFactory is null
      */
@@ -462,6 +463,9 @@
             task.run();
             return result;
         }
+        public String toString() {
+            return super.toString() + "[Wrapped task = " + task + "]";
+        }
     }
 
     /**
@@ -488,6 +492,10 @@
                 throw e.getException();
             }
         }
+
+        public String toString() {
+            return super.toString() + "[Wrapped task = " + task + "]";
+        }
     }
 
     /**
@@ -543,6 +551,10 @@
                 throw e.getException();
             }
         }
+
+        public String toString() {
+            return super.toString() + "[Wrapped task = " + task + "]";
+        }
     }
 
     /**
@@ -604,7 +616,7 @@
         public Thread newThread(final Runnable r) {
             return super.newThread(new Runnable() {
                 public void run() {
-                    AccessController.doPrivileged(new PrivilegedAction<Void>() {
+                    AccessController.doPrivileged(new PrivilegedAction<>() {
                         public Void run() {
                             Thread.currentThread().setContextClassLoader(ccl);
                             r.run();
@@ -621,47 +633,79 @@
      * of an ExecutorService implementation.
      */
     private static class DelegatedExecutorService
-            extends AbstractExecutorService {
+            implements ExecutorService {
         // Android-added: @ReachabilitySensitive
         // Needed for FinalizableDelegatedExecutorService below.
         @ReachabilitySensitive
         private final ExecutorService e;
         DelegatedExecutorService(ExecutorService executor) { e = executor; }
-        public void execute(Runnable command) { e.execute(command); }
+        public void execute(Runnable command) {
+            try {
+                e.execute(command);
+            } finally { reachabilityFence(this); }
+        }
         public void shutdown() { e.shutdown(); }
-        public List<Runnable> shutdownNow() { return e.shutdownNow(); }
-        public boolean isShutdown() { return e.isShutdown(); }
-        public boolean isTerminated() { return e.isTerminated(); }
+        public List<Runnable> shutdownNow() {
+            try {
+                return e.shutdownNow();
+            } finally { reachabilityFence(this); }
+        }
+        public boolean isShutdown() {
+            try {
+                return e.isShutdown();
+            } finally { reachabilityFence(this); }
+        }
+        public boolean isTerminated() {
+            try {
+                return e.isTerminated();
+            } finally { reachabilityFence(this); }
+        }
         public boolean awaitTermination(long timeout, TimeUnit unit)
             throws InterruptedException {
-            return e.awaitTermination(timeout, unit);
+            try {
+                return e.awaitTermination(timeout, unit);
+            } finally { reachabilityFence(this); }
         }
         public Future<?> submit(Runnable task) {
-            return e.submit(task);
+            try {
+                return e.submit(task);
+            } finally { reachabilityFence(this); }
         }
         public <T> Future<T> submit(Callable<T> task) {
-            return e.submit(task);
+            try {
+                return e.submit(task);
+            } finally { reachabilityFence(this); }
         }
         public <T> Future<T> submit(Runnable task, T result) {
-            return e.submit(task, result);
+            try {
+                return e.submit(task, result);
+            } finally { reachabilityFence(this); }
         }
         public <T> List<Future<T>> invokeAll(Collection<? extends Callable<T>> tasks)
             throws InterruptedException {
-            return e.invokeAll(tasks);
+            try {
+                return e.invokeAll(tasks);
+            } finally { reachabilityFence(this); }
         }
         public <T> List<Future<T>> invokeAll(Collection<? extends Callable<T>> tasks,
                                              long timeout, TimeUnit unit)
             throws InterruptedException {
-            return e.invokeAll(tasks, timeout, unit);
+            try {
+                return e.invokeAll(tasks, timeout, unit);
+            } finally { reachabilityFence(this); }
         }
         public <T> T invokeAny(Collection<? extends Callable<T>> tasks)
             throws InterruptedException, ExecutionException {
-            return e.invokeAny(tasks);
+            try {
+                return e.invokeAny(tasks);
+            } finally { reachabilityFence(this); }
         }
         public <T> T invokeAny(Collection<? extends Callable<T>> tasks,
                                long timeout, TimeUnit unit)
             throws InterruptedException, ExecutionException, TimeoutException {
-            return e.invokeAny(tasks, timeout, unit);
+            try {
+                return e.invokeAny(tasks, timeout, unit);
+            } finally { reachabilityFence(this); }
         }
     }
 
@@ -670,6 +714,7 @@
         FinalizableDelegatedExecutorService(ExecutorService executor) {
             super(executor);
         }
+        @SuppressWarnings("deprecation")
         protected void finalize() {
             super.shutdown();
         }
diff --git a/ojluni/src/main/java/java/util/concurrent/Flow.java b/ojluni/src/main/java/java/util/concurrent/Flow.java
index 0231790..727a507 100644
--- a/ojluni/src/main/java/java/util/concurrent/Flow.java
+++ b/ojluni/src/main/java/java/util/concurrent/Flow.java
@@ -85,9 +85,9 @@
  *       this.executor = executor;
  *     }
  *     public synchronized void request(long n) {
- *       if (n != 0 && !completed) {
+ *       if (!completed) {
  *         completed = true;
- *         if (n < 0) {
+ *         if (n <= 0) {
  *           IllegalArgumentException ex = new IllegalArgumentException();
  *           executor.execute(() -> subscriber.onError(ex));
  *         } else {
diff --git a/ojluni/src/main/java/java/util/concurrent/ForkJoinPool.java b/ojluni/src/main/java/java/util/concurrent/ForkJoinPool.java
index 04ad7d7..04d155e 100644
--- a/ojluni/src/main/java/java/util/concurrent/ForkJoinPool.java
+++ b/ojluni/src/main/java/java/util/concurrent/ForkJoinPool.java
@@ -36,15 +36,19 @@
 package java.util.concurrent;
 
 import java.lang.Thread.UncaughtExceptionHandler;
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.VarHandle;
+import java.security.AccessController;
 import java.security.AccessControlContext;
+import java.security.Permission;
 import java.security.Permissions;
+import java.security.PrivilegedAction;
 import java.security.ProtectionDomain;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
-import java.util.concurrent.locks.ReentrantLock;
+import java.util.function.Predicate;
 import java.util.concurrent.locks.LockSupport;
 
 /**
@@ -63,7 +67,8 @@
  * tasks are submitted to the pool from external clients.  Especially
  * when setting <em>asyncMode</em> to true in constructors, {@code
  * ForkJoinPool}s may also be appropriate for use with event-style
- * tasks that are never joined.
+ * tasks that are never joined. All worker threads are initialized
+ * with {@link Thread#isDaemon} set {@code true}.
  *
  * <p>A static {@link #commonPool()} is available and appropriate for
  * most applications. The common pool is used by any ForkJoinTask that
@@ -81,7 +86,9 @@
  * However, no such adjustments are guaranteed in the face of blocked
  * I/O or other unmanaged synchronization. The nested {@link
  * ManagedBlocker} interface enables extension of the kinds of
- * synchronization accommodated.
+ * synchronization accommodated. The default policies may be
+ * overridden using a constructor with parameters corresponding to
+ * those documented in class {@link ThreadPoolExecutor}.
  *
  * <p>In addition to execution and lifecycle control methods, this
  * class provides status check methods (for example
@@ -102,48 +109,54 @@
  * async event-style tasks that are not usually joined, in which case
  * there is little difference among choice of methods.
  *
- * <table BORDER CELLPADDING=3 CELLSPACING=1>
+ * <table class="plain">
  * <caption>Summary of task execution methods</caption>
  *  <tr>
  *    <td></td>
- *    <td ALIGN=CENTER> <b>Call from non-fork/join clients</b></td>
- *    <td ALIGN=CENTER> <b>Call from within fork/join computations</b></td>
+ *    <th scope="col"> Call from non-fork/join clients</th>
+ *    <th scope="col"> Call from within fork/join computations</th>
  *  </tr>
  *  <tr>
- *    <td> <b>Arrange async execution</b></td>
+ *    <th scope="row" style="text-align:left"> Arrange async execution</th>
  *    <td> {@link #execute(ForkJoinTask)}</td>
  *    <td> {@link ForkJoinTask#fork}</td>
  *  </tr>
  *  <tr>
- *    <td> <b>Await and obtain result</b></td>
+ *    <th scope="row" style="text-align:left"> Await and obtain result</th>
  *    <td> {@link #invoke(ForkJoinTask)}</td>
  *    <td> {@link ForkJoinTask#invoke}</td>
  *  </tr>
  *  <tr>
- *    <td> <b>Arrange exec and obtain Future</b></td>
+ *    <th scope="row" style="text-align:left"> Arrange exec and obtain Future</th>
  *    <td> {@link #submit(ForkJoinTask)}</td>
  *    <td> {@link ForkJoinTask#fork} (ForkJoinTasks <em>are</em> Futures)</td>
  *  </tr>
  * </table>
  *
- * <p>The common pool is by default constructed with default
- * parameters, but these may be controlled by setting three
- * {@linkplain System#getProperty system properties}:
+ * <p>The parameters used to construct the common pool may be controlled by
+ * setting the following {@linkplain System#getProperty system properties}:
  * <ul>
  * <li>{@code java.util.concurrent.ForkJoinPool.common.parallelism}
  * - the parallelism level, a non-negative integer
  * <li>{@code java.util.concurrent.ForkJoinPool.common.threadFactory}
- * - the class name of a {@link ForkJoinWorkerThreadFactory}
+ * - the class name of a {@link ForkJoinWorkerThreadFactory}.
+ * The {@linkplain ClassLoader#getSystemClassLoader() system class loader}
+ * is used to load this class.
  * <li>{@code java.util.concurrent.ForkJoinPool.common.exceptionHandler}
- * - the class name of a {@link UncaughtExceptionHandler}
+ * - the class name of a {@link UncaughtExceptionHandler}.
+ * The {@linkplain ClassLoader#getSystemClassLoader() system class loader}
+ * is used to load this class.
  * <li>{@code java.util.concurrent.ForkJoinPool.common.maximumSpares}
  * - the maximum number of allowed extra threads to maintain target
  * parallelism (default 256).
  * </ul>
- * If a {@link SecurityManager} is present and no factory is
- * specified, then the default pool uses a factory supplying
- * threads that have no {@link Permissions} enabled.
- * The system class loader is used to load these classes.
+ * If no thread factory is supplied via a system property, then the
+ * common pool uses a factory that uses the system class loader as the
+ * {@linkplain Thread#getContextClassLoader() thread context class loader}.
+ * In addition, if a {@link SecurityManager} is present, then
+ * the common pool uses a factory supplying threads that have no
+ * {@link Permissions} enabled.
+ *
  * Upon any error in establishing these settings, default parameters
  * are used. It is possible to disable or limit the use of threads in
  * the common pool by setting the parallelism property to zero, and/or
@@ -173,17 +186,22 @@
      * functionality and control for a set of worker threads:
      * Submissions from non-FJ threads enter into submission queues.
      * Workers take these tasks and typically split them into subtasks
-     * that may be stolen by other workers.  Preference rules give
-     * first priority to processing tasks from their own queues (LIFO
-     * or FIFO, depending on mode), then to randomized FIFO steals of
-     * tasks in other queues.  This framework began as vehicle for
-     * supporting tree-structured parallelism using work-stealing.
-     * Over time, its scalability advantages led to extensions and
-     * changes to better support more diverse usage contexts.  Because
-     * most internal methods and nested classes are interrelated,
-     * their main rationale and descriptions are presented here;
-     * individual methods and nested classes contain only brief
-     * comments about details.
+     * that may be stolen by other workers. Work-stealing based on
+     * randomized scans generally leads to better throughput than
+     * "work dealing" in which producers assign tasks to idle threads,
+     * in part because threads that have finished other tasks before
+     * the signalled thread wakes up (which can be a long time) can
+     * take the task instead.  Preference rules give first priority to
+     * processing tasks from their own queues (LIFO or FIFO, depending
+     * on mode), then to randomized FIFO steals of tasks in other
+     * queues.  This framework began as vehicle for supporting
+     * tree-structured parallelism using work-stealing.  Over time,
+     * its scalability advantages led to extensions and changes to
+     * better support more diverse usage contexts.  Because most
+     * internal methods and nested classes are interrelated, their
+     * main rationale and descriptions are presented here; individual
+     * methods and nested classes contain only brief comments about
+     * details.
      *
      * WorkQueues
      * ==========
@@ -216,9 +234,10 @@
      *
      * (The actual code needs to null-check and size-check the array,
      * uses masking, not mod, for indexing a power-of-two-sized array,
-     * properly fences accesses, and possibly signals waiting workers
-     * to start scanning -- see below.)  Both a successful pop and
-     * poll mainly entail a CAS of a slot from non-null to null.
+     * adds a release fence for publication, and possibly signals
+     * waiting workers to start scanning -- see below.)  Both a
+     * successful pop and poll mainly entail a CAS of a slot from
+     * non-null to null.
      *
      * The pop operation (always performed by owner) is:
      *   if ((the task at top slot is not null) and
@@ -230,10 +249,14 @@
      *        (CAS slot to null))
      *           increment base and return task;
      *
-     * There are several variants of each of these; for example most
-     * versions of poll pre-screen the CAS by rechecking that the base
-     * has not changed since reading the slot, and most methods only
-     * attempt the CAS if base appears not to be equal to top.
+     * There are several variants of each of these. Most uses occur
+     * within operations that also interleave contention or emptiness
+     * tracking or inspection of elements before extracting them, so
+     * must interleave these with the above code. When performed by
+     * owner, getAndSet is used instead of CAS (see for example method
+     * nextLocalTask) which is usually more efficient, and possible
+     * because the top index cannot independently change during the
+     * operation.
      *
      * Memory ordering.  See "Correct and Efficient Work-Stealing for
      * Weak Memory Models" by Le, Pop, Cohen, and Nardelli, PPoPP 2013
@@ -242,33 +265,37 @@
      * algorithms similar to (but different than) the one used here.
      * Extracting tasks in array slots via (fully fenced) CAS provides
      * primary synchronization. The base and top indices imprecisely
-     * guide where to extract from. We do not always require strict
-     * orderings of array and index updates, so sometimes let them be
-     * subject to compiler and processor reorderings. However, the
-     * volatile "base" index also serves as a basis for memory
-     * ordering: Slot accesses are preceded by a read of base,
-     * ensuring happens-before ordering with respect to stealers (so
-     * the slots themselves can be read via plain array reads.)  The
-     * only other memory orderings relied on are maintained in the
-     * course of signalling and activation (see below).  A check that
-     * base == top indicates (momentary) emptiness, but otherwise may
-     * err on the side of possibly making the queue appear nonempty
-     * when a push, pop, or poll have not fully committed, or making
-     * it appear empty when an update of top has not yet been visibly
-     * written.  (Method isEmpty() checks the case of a partially
-     * completed removal of the last element.)  Because of this, the
-     * poll operation, considered individually, is not wait-free. One
-     * thief cannot successfully continue until another in-progress
-     * one (or, if previously empty, a push) visibly completes.
-     * However, in the aggregate, we ensure at least probabilistic
+     * guide where to extract from. We do not usually require strict
+     * orderings of array and index updates. Many index accesses use
+     * plain mode, with ordering constrained by surrounding context
+     * (usually with respect to element CASes or the two WorkQueue
+     * volatile fields source and phase). When not otherwise already
+     * constrained, reads of "base" by queue owners use acquire-mode,
+     * and some externally callable methods preface accesses with
+     * acquire fences.  Additionally, to ensure that index update
+     * writes are not coalesced or postponed in loops etc, "opaque"
+     * mode is used in a few cases where timely writes are not
+     * otherwise ensured. The "locked" versions of push- and pop-
+     * based methods for shared queues differ from owned versions
+     * because locking already forces some of the ordering.
+     *
+     * Because indices and slot contents cannot always be consistent,
+     * a check that base == top indicates (momentary) emptiness, but
+     * otherwise may err on the side of possibly making the queue
+     * appear nonempty when a push, pop, or poll have not fully
+     * committed, or making it appear empty when an update of top has
+     * not yet been visibly written.  (Method isEmpty() checks the
+     * case of a partially completed removal of the last element.)
+     * Because of this, the poll operation, considered individually,
+     * is not wait-free. One thief cannot successfully continue until
+     * another in-progress one (or, if previously empty, a push)
+     * visibly completes.  This can stall threads when required to
+     * consume from a given queue (see method poll()).  However, in
+     * the aggregate, we ensure at least probabilistic
      * non-blockingness.  If an attempted steal fails, a scanning
      * thief chooses a different random victim target to try next. So,
      * in order for one thief to progress, it suffices for any
-     * in-progress poll or new push on any empty queue to
-     * complete. (This is why we normally use method pollAt and its
-     * variants that try once at the apparent base index, else
-     * consider alternative actions, rather than method poll, which
-     * retries.)
+     * in-progress poll or new push on any empty queue to complete.
      *
      * This approach also enables support of a user mode in which
      * local task processing is in FIFO, not LIFO order, simply by
@@ -283,16 +310,13 @@
      * choosing existing queues, and may be randomly repositioned upon
      * contention with other submitters.  In essence, submitters act
      * like workers except that they are restricted to executing local
-     * tasks that they submitted (or in the case of CountedCompleters,
-     * others with the same root task).  Insertion of tasks in shared
-     * mode requires a lock but we use only a simple spinlock (using
-     * field qlock), because submitters encountering a busy queue move
-     * on to try or create other queues -- they block only when
-     * creating and registering new queues. Because it is used only as
-     * a spinlock, unlocking requires only a "releasing" store (using
-     * putOrderedInt).  The qlock is also used during termination
-     * detection, in which case it is forced to a negative
-     * non-lockable value.
+     * tasks that they submitted.  Insertion of tasks in shared mode
+     * requires a lock but we use only a simple spinlock (using field
+     * phase), because submitters encountering a busy queue move to a
+     * different position to use or create other queues -- they block
+     * only when creating and registering new queues. Because it is
+     * used only as a spinlock, unlocking requires only a "releasing"
+     * store (using setRelease) unless otherwise signalling.
      *
      * Management
      * ==========
@@ -306,43 +330,36 @@
      * There are only a few properties that we can globally track or
      * maintain, so we pack them into a small number of variables,
      * often maintaining atomicity without blocking or locking.
-     * Nearly all essentially atomic control state is held in two
+     * Nearly all essentially atomic control state is held in a few
      * volatile variables that are by far most often read (not
-     * written) as status and consistency checks. (Also, field
-     * "config" holds unchanging configuration state.)
+     * written) as status and consistency checks. We pack as much
+     * information into them as we can.
      *
      * Field "ctl" contains 64 bits holding information needed to
-     * atomically decide to add, inactivate, enqueue (on an event
-     * queue), dequeue, and/or re-activate workers.  To enable this
-     * packing, we restrict maximum parallelism to (1<<15)-1 (which is
-     * far in excess of normal operating range) to allow ids, counts,
-     * and their negations (used for thresholding) to fit into 16bit
+     * atomically decide to add, enqueue (on an event queue), and
+     * dequeue and release workers.  To enable this packing, we
+     * restrict maximum parallelism to (1<<15)-1 (which is far in
+     * excess of normal operating range) to allow ids, counts, and
+     * their negations (used for thresholding) to fit into 16bit
      * subfields.
      *
-     * Field "runState" holds lifetime status, atomically and
-     * monotonically setting STARTED, SHUTDOWN, STOP, and finally
-     * TERMINATED bits.
-     *
-     * Field "auxState" is a ReentrantLock subclass that also
-     * opportunistically holds some other bookkeeping fields accessed
-     * only when locked.  It is mainly used to lock (infrequent)
-     * updates to workQueues.  The auxState instance is itself lazily
-     * constructed (see tryInitialize), requiring a double-check-style
-     * bootstrapping use of field runState, and locking a private
-     * static.
+     * Field "mode" holds configuration parameters as well as lifetime
+     * status, atomically and monotonically setting SHUTDOWN, STOP,
+     * and finally TERMINATED bits.
      *
      * Field "workQueues" holds references to WorkQueues.  It is
-     * updated (only during worker creation and termination) under the
-     * lock, but is otherwise concurrently readable, and accessed
-     * directly. We also ensure that reads of the array reference
-     * itself never become too stale (for example, re-reading before
-     * each scan). To simplify index-based operations, the array size
-     * is always a power of two, and all readers must tolerate null
-     * slots. Worker queues are at odd indices. Shared (submission)
-     * queues are at even indices, up to a maximum of 64 slots, to
-     * limit growth even if array needs to expand to add more
-     * workers. Grouping them together in this way simplifies and
-     * speeds up task scanning.
+     * updated (only during worker creation and termination) under
+     * lock (using field workerNamePrefix as lock), but is otherwise
+     * concurrently readable, and accessed directly. We also ensure
+     * that uses of the array reference itself never become too stale
+     * in case of resizing, by arranging that (re-)reads are separated
+     * by at least one acquiring read access.  To simplify index-based
+     * operations, the array size is always a power of two, and all
+     * readers must tolerate null slots. Worker queues are at odd
+     * indices. Shared (submission) queues are at even indices, up to
+     * a maximum of 64 slots, to limit growth even if the array needs
+     * to expand to add more workers. Grouping them together in this
+     * way simplifies and speeds up task scanning.
      *
      * All worker thread creation is on-demand, triggered by task
      * submissions, replacement of terminated workers, and/or
@@ -361,30 +378,37 @@
      * workers unless there appear to be tasks available.  On the
      * other hand, we must quickly prod them into action when new
      * tasks are submitted or generated. In many usages, ramp-up time
-     * to activate workers is the main limiting factor in overall
-     * performance, which is compounded at program start-up by JIT
-     * compilation and allocation. So we streamline this as much as
-     * possible.
+     * is the main limiting factor in overall performance, which is
+     * compounded at program start-up by JIT compilation and
+     * allocation. So we streamline this as much as possible.
      *
-     * The "ctl" field atomically maintains active and total worker
-     * counts as well as a queue to place waiting threads so they can
-     * be located for signalling. Active counts also play the role of
-     * quiescence indicators, so are decremented when workers believe
-     * that there are no more tasks to execute. The "queue" is
-     * actually a form of Treiber stack.  A stack is ideal for
-     * activating threads in most-recently used order. This improves
+     * The "ctl" field atomically maintains total worker and
+     * "released" worker counts, plus the head of the available worker
+     * queue (actually stack, represented by the lower 32bit subfield
+     * of ctl).  Released workers are those known to be scanning for
+     * and/or running tasks. Unreleased ("available") workers are
+     * recorded in the ctl stack. These workers are made available for
+     * signalling by enqueuing in ctl (see method runWorker).  The
+     * "queue" is a form of Treiber stack. This is ideal for
+     * activating threads in most-recently used order, and improves
      * performance and locality, outweighing the disadvantages of
      * being prone to contention and inability to release a worker
-     * unless it is topmost on stack.  We block/unblock workers after
-     * pushing on the idle worker stack (represented by the lower
-     * 32bit subfield of ctl) when they cannot find work.  The top
-     * stack state holds the value of the "scanState" field of the
-     * worker: its index and status, plus a version counter that, in
-     * addition to the count subfields (also serving as version
-     * stamps) provide protection against Treiber stack ABA effects.
+     * unless it is topmost on stack.  To avoid missed signal problems
+     * inherent in any wait/signal design, available workers rescan
+     * for (and if found run) tasks after enqueuing.  Normally their
+     * release status will be updated while doing so, but the released
+     * worker ctl count may underestimate the number of active
+     * threads. (However, it is still possible to determine quiescence
+     * via a validation traversal -- see isQuiescent).  After an
+     * unsuccessful rescan, available workers are blocked until
+     * signalled (see signalWork).  The top stack state holds the
+     * value of the "phase" field of the worker: its index and status,
+     * plus a version counter that, in addition to the count subfields
+     * (also serving as version stamps) provide protection against
+     * Treiber stack ABA effects.
      *
-     * Creating workers. To create a worker, we pre-increment total
-     * count (serving as a reservation), and attempt to construct a
+     * Creating workers. To create a worker, we pre-increment counts
+     * (serving as a reservation), and attempt to construct a
      * ForkJoinWorkerThread via its factory. Upon construction, the
      * new thread invokes registerWorker, where it constructs a
      * WorkQueue and is assigned an index in the workQueues array
@@ -406,15 +430,14 @@
      * submission queues for existing external threads (see
      * externalPush).
      *
-     * WorkQueue field scanState is used by both workers and the pool
-     * to manage and track whether a worker is UNSIGNALLED (possibly
-     * blocked waiting for a signal).  When a worker is inactivated,
-     * its scanState field is set, and is prevented from executing
-     * tasks, even though it must scan once for them to avoid queuing
-     * races. Note that scanState updates lag queue CAS releases so
-     * usage requires care. When queued, the lower 16 bits of
-     * scanState must hold its pool index. So we place the index there
-     * upon initialization (see registerWorker) and otherwise keep it
+     * WorkQueue field "phase" is used by both workers and the pool to
+     * manage and track whether a worker is UNSIGNALLED (possibly
+     * blocked waiting for a signal).  When a worker is enqueued its
+     * phase field is set. Note that phase field updates lag queue CAS
+     * releases so usage requires care -- seeing a negative phase does
+     * not guarantee that the worker is available. When queued, the
+     * lower 16 bits of scanState must hold its pool index. So we
+     * place the index there upon initialization and otherwise keep it
      * there or restore it when necessary.
      *
      * The ctl field also serves as the basis for memory
@@ -423,85 +446,68 @@
      * consumers sync with each other by both writing/CASing ctl (even
      * if to its current value).  This would be extremely costly. So
      * we relax it in several ways: (1) Producers only signal when
-     * their queue is empty. Other workers propagate this signal (in
-     * method scan) when they find tasks. (2) Workers only enqueue
-     * after scanning (see below) and not finding any tasks.  (3)
-     * Rather than CASing ctl to its current value in the common case
-     * where no action is required, we reduce write contention by
-     * equivalently prefacing signalWork when called by an external
-     * task producer using a memory access with full-volatile
-     * semantics or a "fullFence". (4) For internal task producers we
-     * rely on the fact that even if no other workers awaken, the
-     * producer itself will eventually see the task and execute it.
+     * their queue is possibly empty at some point during a push
+     * operation (which requires conservatively checking size zero or
+     * one to cover races). (2) Other workers propagate this signal
+     * when they find tasks in a queue with size greater than one. (3)
+     * Workers only enqueue after scanning (see below) and not finding
+     * any tasks.  (4) Rather than CASing ctl to its current value in
+     * the common case where no action is required, we reduce write
+     * contention by equivalently prefacing signalWork when called by
+     * an external task producer using a memory access with
+     * full-volatile semantics or a "fullFence".
      *
-     * Almost always, too many signals are issued. A task producer
-     * cannot in general tell if some existing worker is in the midst
-     * of finishing one task (or already scanning) and ready to take
-     * another without being signalled. So the producer might instead
-     * activate a different worker that does not find any work, and
-     * then inactivates. This scarcely matters in steady-state
-     * computations involving all workers, but can create contention
-     * and bookkeeping bottlenecks during ramp-up, ramp-down, and small
-     * computations involving only a few workers.
+     * Almost always, too many signals are issued, in part because a
+     * task producer cannot tell if some existing worker is in the
+     * midst of finishing one task (or already scanning) and ready to
+     * take another without being signalled. So the producer might
+     * instead activate a different worker that does not find any
+     * work, and then inactivates. This scarcely matters in
+     * steady-state computations involving all workers, but can create
+     * contention and bookkeeping bottlenecks during ramp-up,
+     * ramp-down, and small computations involving only a few workers.
      *
-     * Scanning. Method scan() performs top-level scanning for tasks.
-     * Each scan traverses (and tries to poll from) each queue in
-     * pseudorandom permutation order by randomly selecting an origin
-     * index and a step value.  (The pseudorandom generator need not
-     * have high-quality statistical properties in the long term, but
-     * just within computations; We use 64bit and 32bit Marsaglia
-     * XorShifts, which are cheap and suffice here.)  Scanning also
-     * employs contention reduction: When scanning workers fail a CAS
-     * polling for work, they soon restart with a different
-     * pseudorandom scan order (thus likely retrying at different
-     * intervals). This improves throughput when many threads are
-     * trying to take tasks from few queues.  Scans do not otherwise
-     * explicitly take into account core affinities, loads, cache
-     * localities, etc, However, they do exploit temporal locality
-     * (which usually approximates these) by preferring to re-poll (up
-     * to POLL_LIMIT times) from the same queue after a successful
-     * poll before trying others.  Restricted forms of scanning occur
-     * in methods helpComplete and findNonEmptyStealQueue, and take
-     * similar but simpler forms.
-     *
-     * Deactivation and waiting. Queuing encounters several intrinsic
-     * races; most notably that an inactivating scanning worker can
-     * miss seeing a task produced during a scan.  So when a worker
-     * cannot find a task to steal, it inactivates and enqueues, and
-     * then rescans to ensure that it didn't miss one, reactivating
-     * upon seeing one with probability approximately proportional to
-     * probability of a miss.  (In most cases, the worker will be
-     * signalled before self-signalling, avoiding cascades of multiple
-     * signals for the same task).
-     *
-     * Workers block (in method awaitWork) using park/unpark;
-     * advertising the need for signallers to unpark by setting their
-     * "parker" fields.
+     * Scanning. Method scan (from runWorker) performs top-level
+     * scanning for tasks. (Similar scans appear in helpQuiesce and
+     * pollScan.)  Each scan traverses and tries to poll from each
+     * queue starting at a random index. Scans are not performed in
+     * ideal random permutation order, to reduce cacheline
+     * contention. The pseudorandom generator need not have
+     * high-quality statistical properties in the long term, but just
+     * within computations; We use Marsaglia XorShifts (often via
+     * ThreadLocalRandom.nextSecondarySeed), which are cheap and
+     * suffice. Scanning also includes contention reduction: When
+     * scanning workers fail to extract an apparently existing task,
+     * they soon restart at a different pseudorandom index.  This form
+     * of backoff improves throughput when many threads are trying to
+     * take tasks from few queues, which can be common in some usages.
+     * Scans do not otherwise explicitly take into account core
+     * affinities, loads, cache localities, etc, However, they do
+     * exploit temporal locality (which usually approximates these) by
+     * preferring to re-poll from the same queue after a successful
+     * poll before trying others (see method topLevelExec). However
+     * this preference is bounded (see TOP_BOUND_SHIFT) as a safeguard
+     * against infinitely unfair looping under unbounded user task
+     * recursion, and also to reduce long-term contention when many
+     * threads poll few queues holding many small tasks. The bound is
+     * high enough to avoid much impact on locality and scheduling
+     * overhead.
      *
      * Trimming workers. To release resources after periods of lack of
      * use, a worker starting to wait when the pool is quiescent will
-     * time out and terminate (see awaitWork) if the pool has remained
-     * quiescent for period given by IDLE_TIMEOUT_MS, increasing the
-     * period as the number of threads decreases, eventually removing
-     * all workers.
+     * time out and terminate (see method runWorker) if the pool has
+     * remained quiescent for period given by field keepAlive.
      *
      * Shutdown and Termination. A call to shutdownNow invokes
      * tryTerminate to atomically set a runState bit. The calling
      * thread, as well as every other worker thereafter terminating,
-     * helps terminate others by setting their (qlock) status,
-     * cancelling their unprocessed tasks, and waking them up, doing
-     * so repeatedly until stable. Calls to non-abrupt shutdown()
-     * preface this by checking whether termination should commence.
-     * This relies primarily on the active count bits of "ctl"
-     * maintaining consensus -- tryTerminate is called from awaitWork
-     * whenever quiescent. However, external submitters do not take
-     * part in this consensus.  So, tryTerminate sweeps through queues
-     * (until stable) to ensure lack of in-flight submissions and
-     * workers about to process them before triggering the "STOP"
-     * phase of termination. (Note: there is an intrinsic conflict if
-     * helpQuiescePool is called when shutdown is enabled. Both wait
-     * for quiescence, but tryTerminate is biased to not trigger until
-     * helpQuiescePool completes.)
+     * helps terminate others by cancelling their unprocessed tasks,
+     * and waking them up, doing so repeatedly until stable. Calls to
+     * non-abrupt shutdown() preface this by checking whether
+     * termination should commence by sweeping through queues (until
+     * stable) to ensure lack of in-flight submissions and workers
+     * about to process them before triggering the "STOP" phase of
+     * termination.
      *
      * Joining Tasks
      * =============
@@ -509,12 +515,12 @@
      * Any of several actions may be taken when one worker is waiting
      * to join a task stolen (or always held) by another.  Because we
      * are multiplexing many tasks on to a pool of workers, we can't
-     * just let them block (as in Thread.join).  We also cannot just
-     * reassign the joiner's run-time stack with another and replace
-     * it later, which would be a form of "continuation", that even if
-     * possible is not necessarily a good idea since we may need both
-     * an unblocked task and its continuation to progress.  Instead we
-     * combine two tactics:
+     * always just let them block (as in Thread.join).  We also cannot
+     * just reassign the joiner's run-time stack with another and
+     * replace it later, which would be a form of "continuation", that
+     * even if possible is not necessarily a good idea since we may
+     * need both an unblocked task and its continuation to progress.
+     * Instead we combine two tactics:
      *
      *   Helping: Arranging for the joiner to execute some task that it
      *      would be running if the steal had not occurred.
@@ -527,79 +533,44 @@
      * helping a hypothetical compensator: If we can readily tell that
      * a possible action of a compensator is to steal and execute the
      * task being joined, the joining thread can do so directly,
-     * without the need for a compensation thread (although at the
-     * expense of larger run-time stacks, but the tradeoff is
-     * typically worthwhile).
+     * without the need for a compensation thread.
      *
      * The ManagedBlocker extension API can't use helping so relies
      * only on compensation in method awaitBlocker.
      *
-     * The algorithm in helpStealer entails a form of "linear
-     * helping".  Each worker records (in field currentSteal) the most
-     * recent task it stole from some other worker (or a submission).
-     * It also records (in field currentJoin) the task it is currently
-     * actively joining. Method helpStealer uses these markers to try
-     * to find a worker to help (i.e., steal back a task from and
-     * execute it) that could hasten completion of the actively joined
-     * task.  Thus, the joiner executes a task that would be on its
-     * own local deque had the to-be-joined task not been stolen. This
-     * is a conservative variant of the approach described in Wagner &
-     * Calder "Leapfrogging: a portable technique for implementing
-     * efficient futures" SIGPLAN Notices, 1993
-     * (http://portal.acm.org/citation.cfm?id=155354). It differs in
-     * that: (1) We only maintain dependency links across workers upon
-     * steals, rather than use per-task bookkeeping.  This sometimes
-     * requires a linear scan of workQueues array to locate stealers,
-     * but often doesn't because stealers leave hints (that may become
-     * stale/wrong) of where to locate them.  It is only a hint
-     * because a worker might have had multiple steals and the hint
-     * records only one of them (usually the most current).  Hinting
-     * isolates cost to when it is needed, rather than adding to
-     * per-task overhead.  (2) It is "shallow", ignoring nesting and
-     * potentially cyclic mutual steals.  (3) It is intentionally
-     * racy: field currentJoin is updated only while actively joining,
-     * which means that we miss links in the chain during long-lived
-     * tasks, GC stalls etc (which is OK since blocking in such cases
-     * is usually a good idea).  (4) We bound the number of attempts
-     * to find work using checksums and fall back to suspending the
-     * worker and if necessary replacing it with another.
+     * The algorithm in awaitJoin entails a form of "linear helping".
+     * Each worker records (in field source) the id of the queue from
+     * which it last stole a task.  The scan in method awaitJoin uses
+     * these markers to try to find a worker to help (i.e., steal back
+     * a task from and execute it) that could hasten completion of the
+     * actively joined task.  Thus, the joiner executes a task that
+     * would be on its own local deque if the to-be-joined task had
+     * not been stolen. This is a conservative variant of the approach
+     * described in Wagner & Calder "Leapfrogging: a portable
+     * technique for implementing efficient futures" SIGPLAN Notices,
+     * 1993 (http://portal.acm.org/citation.cfm?id=155354). It differs
+     * mainly in that we only record queue ids, not full dependency
+     * links.  This requires a linear scan of the workQueues array to
+     * locate stealers, but isolates cost to when it is needed, rather
+     * than adding to per-task overhead. Searches can fail to locate
+     * stealers GC stalls and the like delay recording sources.
+     * Further, even when accurately identified, stealers might not
+     * ever produce a task that the joiner can in turn help with. So,
+     * compensation is tried upon failure to find tasks to run.
      *
-     * Helping actions for CountedCompleters do not require tracking
-     * currentJoins: Method helpComplete takes and executes any task
-     * with the same root as the task being waited on (preferring
-     * local pops to non-local polls). However, this still entails
-     * some traversal of completer chains, so is less efficient than
-     * using CountedCompleters without explicit joins.
-     *
-     * Compensation does not aim to keep exactly the target
+     * Compensation does not by default aim to keep exactly the target
      * parallelism number of unblocked threads running at any given
      * time. Some previous versions of this class employed immediate
      * compensations for any blocked join. However, in practice, the
      * vast majority of blockages are transient byproducts of GC and
-     * other JVM or OS activities that are made worse by replacement.
-     * Currently, compensation is attempted only after validating that
-     * all purportedly active threads are processing tasks by checking
-     * field WorkQueue.scanState, which eliminates most false
-     * positives.  Also, compensation is bypassed (tolerating fewer
-     * threads) in the most common case in which it is rarely
-     * beneficial: when a worker with an empty queue (thus no
-     * continuation tasks) blocks on a join and there still remain
-     * enough threads to ensure liveness.
-     *
-     * Spare threads are removed as soon as they notice that the
-     * target parallelism level has been exceeded, in method
-     * tryDropSpare. (Method scan arranges returns for rechecks upon
-     * each probe via the "bound" parameter.)
-     *
-     * The compensation mechanism may be bounded.  Bounds for the
+     * other JVM or OS activities that are made worse by replacement
+     * when they cause longer-term oversubscription.  Rather than
+     * impose arbitrary policies, we allow users to override the
+     * default of only adding threads upon apparent starvation.  The
+     * compensation mechanism may also be bounded.  Bounds for the
      * commonPool (see COMMON_MAX_SPARES) better enable JVMs to cope
      * with programming errors and abuse before running out of
-     * resources to do so. In other cases, users may supply factories
-     * that limit thread construction. The effects of bounding in this
-     * pool (like all others) is imprecise.  Total worker counts are
-     * decremented when threads deregister, not when they exit and
-     * resources are reclaimed by the JVM and OS. So the number of
-     * simultaneously live threads may transiently exceed bounds.
+     * resources to do so.
      *
      * Common Pool
      * ===========
@@ -607,9 +578,7 @@
      * The static common pool always exists after static
      * initialization.  Since it (or any other created pool) need
      * never be used, we minimize initial construction overhead and
-     * footprint to the setup of about a dozen fields, with no nested
-     * allocation. Most bootstrapping occurs within method
-     * externalSubmit during the first submission to the pool.
+     * footprint to the setup of about a dozen fields.
      *
      * When external threads submit to the common pool, they can
      * perform subtask processing (see externalHelpComplete and
@@ -629,31 +598,39 @@
      * InnocuousForkJoinWorkerThread when there is a SecurityManager
      * present. These workers have no permissions set, do not belong
      * to any user-defined ThreadGroup, and erase all ThreadLocals
-     * after executing any top-level task (see WorkQueue.runTask).
-     * The associated mechanics (mainly in ForkJoinWorkerThread) may
-     * be JVM-dependent and must access particular Thread class fields
-     * to achieve this effect.
+     * after executing any top-level task (see
+     * WorkQueue.afterTopLevelExec).  The associated mechanics (mainly
+     * in ForkJoinWorkerThread) may be JVM-dependent and must access
+     * particular Thread class fields to achieve this effect.
+     *
+     * Memory placement
+     * ================
+     *
+     * Performance can be very sensitive to placement of instances of
+     * ForkJoinPool and WorkQueues and their queue arrays. To reduce
+     * false-sharing impact, the @Contended annotation isolates
+     * adjacent WorkQueue instances, as well as the ForkJoinPool.ctl
+     * field. WorkQueue arrays are allocated (by their threads) with
+     * larger initial sizes than most ever need, mostly to reduce
+     * false sharing with current garbage collectors that use cardmark
+     * tables.
      *
      * Style notes
      * ===========
      *
-     * Memory ordering relies mainly on Unsafe intrinsics that carry
-     * the further responsibility of explicitly performing null- and
-     * bounds- checks otherwise carried out implicitly by JVMs.  This
-     * can be awkward and ugly, but also reflects the need to control
+     * Memory ordering relies mainly on VarHandles.  This can be
+     * awkward and ugly, but also reflects the need to control
      * outcomes across the unusual cases that arise in very racy code
-     * with very few invariants. So these explicit checks would exist
-     * in some form anyway.  All fields are read into locals before
-     * use, and null-checked if they are references.  This is usually
-     * done in a "C"-like style of listing declarations at the heads
-     * of methods or blocks, and using inline assignments on first
-     * encounter.  Array bounds-checks are usually performed by
-     * masking with array.length-1, which relies on the invariant that
-     * these arrays are created with positive lengths, which is itself
-     * paranoically checked. Nearly all explicit checks lead to
-     * bypass/return, not exception throws, because they may
-     * legitimately arise due to cancellation/revocation during
-     * shutdown.
+     * with very few invariants. All fields are read into locals
+     * before use, and null-checked if they are references.  Array
+     * accesses using masked indices include checks (that are always
+     * true) that the array length is non-zero to avoid compilers
+     * inserting more expensive traps.  This is usually done in a
+     * "C"-like style of listing declarations at the heads of methods
+     * or blocks, and using inline assignments on first encounter.
+     * Nearly all explicit checks lead to bypass/return, not exception
+     * throws, because they may legitimately arise due to
+     * cancellation/revocation during shutdown.
      *
      * There is a lot of representation-level coupling among classes
      * ForkJoinPool, ForkJoinWorkerThread, and ForkJoinTask.  The
@@ -663,10 +640,11 @@
      * representations will need to be accompanied by algorithmic
      * changes anyway. Several methods intrinsically sprawl because
      * they must accumulate sets of consistent reads of fields held in
-     * local variables.  There are also other coding oddities
-     * (including several unnecessary-looking hoisted null checks)
-     * that help some methods perform reasonably even when interpreted
-     * (not compiled).
+     * local variables. Some others are artificially broken up to
+     * reduce producer/consumer imbalances due to dynamic compilation.
+     * There are also other coding oddities (including several
+     * unnecessary-looking hoisted null checks) that help some methods
+     * perform reasonably even when interpreted (not compiled).
      *
      * The order of declarations in this file is (with a few exceptions):
      * (1) Static utility functions
@@ -702,6 +680,13 @@
     public static interface ForkJoinWorkerThreadFactory {
         /**
          * Returns a new worker thread operating in the given pool.
+         * Returning null or throwing an exception may result in tasks
+         * never being executed.  If this method throws an exception,
+         * it is relayed to the caller of the method (for example
+         * {@code execute}) causing attempted thread creation. If this
+         * method returns null or throws an exception, it is not
+         * retried until the next attempted creation (for example
+         * another call to {@code execute}).
          *
          * @param pool the pool this thread works in
          * @return the new worker thread, or {@code null} if the request
@@ -711,120 +696,98 @@
         public ForkJoinWorkerThread newThread(ForkJoinPool pool);
     }
 
+    static AccessControlContext contextWithPermissions(Permission ... perms) {
+        Permissions permissions = new Permissions();
+        for (Permission perm : perms)
+            permissions.add(perm);
+        return new AccessControlContext(
+            new ProtectionDomain[] { new ProtectionDomain(null, permissions) });
+    }
+
     /**
      * Default ForkJoinWorkerThreadFactory implementation; creates a
-     * new ForkJoinWorkerThread.
+     * new ForkJoinWorkerThread using the system class loader as the
+     * thread context class loader.
      */
     private static final class DefaultForkJoinWorkerThreadFactory
         implements ForkJoinWorkerThreadFactory {
+        private static final AccessControlContext ACC = contextWithPermissions(
+            new RuntimePermission("getClassLoader"),
+            new RuntimePermission("setContextClassLoader"));
+
         public final ForkJoinWorkerThread newThread(ForkJoinPool pool) {
-            return new ForkJoinWorkerThread(pool);
+            return AccessController.doPrivileged(
+                new PrivilegedAction<>() {
+                    public ForkJoinWorkerThread run() {
+                        return new ForkJoinWorkerThread(
+                            pool, ClassLoader.getSystemClassLoader()); }},
+                ACC);
         }
     }
 
-    /**
-     * Class for artificial tasks that are used to replace the target
-     * of local joins if they are removed from an interior queue slot
-     * in WorkQueue.tryRemoveAndExec. We don't need the proxy to
-     * actually do anything beyond having a unique identity.
-     */
-    private static final class EmptyTask extends ForkJoinTask<Void> {
-        private static final long serialVersionUID = -7721805057305804111L;
-        EmptyTask() { status = ForkJoinTask.NORMAL; } // force done
-        public final Void getRawResult() { return null; }
-        public final void setRawResult(Void x) {}
-        public final boolean exec() { return true; }
-    }
-
-    /**
-     * Additional fields and lock created upon initialization.
-     */
-    private static final class AuxState extends ReentrantLock {
-        private static final long serialVersionUID = -6001602636862214147L;
-        volatile long stealCount;     // cumulative steal count
-        long indexSeed;               // index bits for registerWorker
-        AuxState() {}
-    }
-
     // Constants shared across ForkJoinPool and WorkQueue
 
     // Bounds
+    static final int SWIDTH       = 16;            // width of short
     static final int SMASK        = 0xffff;        // short bits == max index
     static final int MAX_CAP      = 0x7fff;        // max #workers - 1
-    static final int EVENMASK     = 0xfffe;        // even short bits
     static final int SQMASK       = 0x007e;        // max 64 (even) slots
 
-    // Masks and units for WorkQueue.scanState and ctl sp subfield
+    // Masks and units for WorkQueue.phase and ctl sp subfield
     static final int UNSIGNALLED  = 1 << 31;       // must be negative
     static final int SS_SEQ       = 1 << 16;       // version count
+    static final int QLOCK        = 1;             // must be 1
 
-    // Mode bits for ForkJoinPool.config and WorkQueue.config
-    static final int MODE_MASK    = 0xffff << 16;  // top half of int
-    static final int SPARE_WORKER = 1 << 17;       // set if tc > 0 on creation
-    static final int UNREGISTERED = 1 << 18;       // to skip some of deregister
-    static final int FIFO_QUEUE   = 1 << 31;       // must be negative
-    static final int LIFO_QUEUE   = 0;             // for clarity
-    static final int IS_OWNED     = 1;             // low bit 0 if shared
+    // Mode bits and sentinels, some also used in WorkQueue id and.source fields
+    static final int OWNED        = 1;             // queue has owner thread
+    static final int FIFO         = 1 << 16;       // fifo queue or access mode
+    static final int SHUTDOWN     = 1 << 18;
+    static final int TERMINATED   = 1 << 19;
+    static final int STOP         = 1 << 31;       // must be negative
+    static final int QUIET        = 1 << 30;       // not scanning or working
+    static final int DORMANT      = QUIET | UNSIGNALLED;
 
     /**
-     * The maximum number of task executions from the same queue
-     * before checking other queues, bounding unfairness and impact of
-     * infinite user task recursion.  Must be a power of two minus 1.
+     * Initial capacity of work-stealing queue array.
+     * Must be a power of two, at least 2.
      */
-    static final int POLL_LIMIT = (1 << 10) - 1;
+    static final int INITIAL_QUEUE_CAPACITY = 1 << 13;
+
+    /**
+     * Maximum capacity for queue arrays. Must be a power of two less
+     * than or equal to 1 << (31 - width of array entry) to ensure
+     * lack of wraparound of index calculations, but defined to a
+     * value a bit less than this to help users trap runaway programs
+     * before saturating systems.
+     */
+    static final int MAXIMUM_QUEUE_CAPACITY = 1 << 26; // 64M
+
+    /**
+     * The maximum number of top-level polls per worker before
+     * checking other queues, expressed as a bit shift to, in effect,
+     * multiply by pool size, and then use as random value mask, so
+     * average bound is about poolSize*(1<<TOP_BOUND_SHIFT).  See
+     * above for rationale.
+     */
+    static final int TOP_BOUND_SHIFT = 10;
 
     /**
      * Queues supporting work-stealing as well as external task
      * submission. See above for descriptions and algorithms.
-     * Performance on most platforms is very sensitive to placement of
-     * instances of both WorkQueues and their arrays -- we absolutely
-     * do not want multiple WorkQueue instances or multiple queue
-     * arrays sharing cache lines. The @Contended annotation alerts
-     * JVMs to try to keep instances apart.
      */
     // Android-removed: @Contended, this hint is not used by the Android runtime.
     //@jdk.internal.vm.annotation.Contended
     static final class WorkQueue {
-
-        /**
-         * Capacity of work-stealing queue array upon initialization.
-         * Must be a power of two; at least 4, but should be larger to
-         * reduce or eliminate cacheline sharing among queues.
-         * Currently, it is much larger, as a partial workaround for
-         * the fact that JVMs often place arrays in locations that
-         * share GC bookkeeping (especially cardmarks) such that
-         * per-write accesses encounter serious memory contention.
-         */
-        static final int INITIAL_QUEUE_CAPACITY = 1 << 13;
-
-        /**
-         * Maximum size for queue arrays. Must be a power of two less
-         * than or equal to 1 << (31 - width of array entry) to ensure
-         * lack of wraparound of index calculations, but defined to a
-         * value a bit less than this to help users trap runaway
-         * programs before saturating systems.
-         */
-        static final int MAXIMUM_QUEUE_CAPACITY = 1 << 26; // 64M
-
-        // Instance fields
-
-        volatile int scanState;    // versioned, negative if inactive
-        int stackPred;             // pool stack (ctl) predecessor
-        int nsteals;               // number of steals
-        int hint;                  // randomization and stealer index hint
-        int config;                // pool index and mode
-        volatile int qlock;        // 1: locked, < 0: terminate; else 0
-        volatile int base;         // index of next slot for poll
+        volatile int source;       // source queue id, or sentinel
+        int id;                    // pool index, mode, tag
+        int base;                  // index of next slot for poll
         int top;                   // index of next slot for push
-        ForkJoinTask<?>[] array;   // the elements (initially unallocated)
+        volatile int phase;        // versioned, negative: queued, 1: locked
+        int stackPred;             // pool stack (ctl) predecessor link
+        int nsteals;               // number of steals
+        ForkJoinTask<?>[] array;   // the queued tasks; power of 2 size
         final ForkJoinPool pool;   // the containing pool (may be null)
         final ForkJoinWorkerThread owner; // owning thread or null if shared
-        volatile Thread parker;    // == owner during call to park; else null
-        volatile ForkJoinTask<?> currentJoin; // task being joined in awaitJoin
-
-      // Android-removed: @Contended, this hint is not used by the Android runtime.
-      // @jdk.internal.vm.annotation.Contended("group2") // segregate
-        volatile ForkJoinTask<?> currentSteal; // nonnull when running some task
 
         WorkQueue(ForkJoinPool pool, ForkJoinWorkerThread owner) {
             this.pool = pool;
@@ -834,17 +797,28 @@
         }
 
         /**
+         * Tries to lock shared queue by CASing phase field.
+         */
+        final boolean tryLockPhase() {
+            return PHASE.compareAndSet(this, 0, 1);
+        }
+
+        final void releasePhaseLock() {
+            PHASE.setRelease(this, 0);
+        }
+
+        /**
          * Returns an exportable index (used by ForkJoinWorkerThread).
          */
         final int getPoolIndex() {
-            return (config & 0xffff) >>> 1; // ignore odd/even tag bit
+            return (id & 0xffff) >>> 1; // ignore odd/even tag bit
         }
 
         /**
          * Returns the approximate number of tasks in the queue.
          */
         final int queueSize() {
-            int n = base - top;       // read base first
+            int n = (int)BASE.getAcquire(this) - top;
             return (n >= 0) ? 0 : -n; // ignore transient negative
         }
 
@@ -854,11 +828,12 @@
          * near-empty queue has at least one unclaimed task.
          */
         final boolean isEmpty() {
-            ForkJoinTask<?>[] a; int n, al, s;
-            return ((n = base - (s = top)) >= 0 || // possibly one task
+            ForkJoinTask<?>[] a; int n, cap, b;
+            VarHandle.acquireFence(); // needed by external callers
+            return ((n = (b = base) - top) >= 0 || // possibly one task
                     (n == -1 && ((a = array) == null ||
-                                 (al = a.length) == 0 ||
-                                 a[(al - 1) & (s - 1)] == null)));
+                                 (cap = a.length) == 0 ||
+                                 a[(cap - 1) & b] == null)));
         }
 
         /**
@@ -868,116 +843,99 @@
          * @throws RejectedExecutionException if array cannot be resized
          */
         final void push(ForkJoinTask<?> task) {
-            U.storeFence();              // ensure safe publication
-            int s = top, al, d; ForkJoinTask<?>[] a;
-            if ((a = array) != null && (al = a.length) > 0) {
-                a[(al - 1) & s] = task;  // relaxed writes OK
+            ForkJoinTask<?>[] a;
+            int s = top, d, cap, m;
+            ForkJoinPool p = pool;
+            if ((a = array) != null && (cap = a.length) > 0) {
+                QA.setRelease(a, (m = cap - 1) & s, task);
                 top = s + 1;
-                ForkJoinPool p = pool;
-                if ((d = base - s) == 0 && p != null) {
-                    U.fullFence();
+                if (((d = s - (int)BASE.getAcquire(this)) & ~1) == 0 &&
+                    p != null) {                 // size 0 or 1
+                    VarHandle.fullFence();
                     p.signalWork();
                 }
-                else if (al + d == 1)
-                    growArray();
+                else if (d == m)
+                    growArray(false);
             }
         }
 
         /**
-         * Initializes or doubles the capacity of array. Call either
-         * by owner or with lock held -- it is OK for base, but not
-         * top, to move while resizings are in progress.
+         * Version of push for shared queues. Call only with phase lock held.
+         * @return true if should signal work
          */
-        final ForkJoinTask<?>[] growArray() {
-            ForkJoinTask<?>[] oldA = array;
-            int size = oldA != null ? oldA.length << 1 : INITIAL_QUEUE_CAPACITY;
-            if (size < INITIAL_QUEUE_CAPACITY || size > MAXIMUM_QUEUE_CAPACITY)
+        final boolean lockedPush(ForkJoinTask<?> task) {
+            ForkJoinTask<?>[] a;
+            boolean signal = false;
+            int s = top, b = base, cap, d;
+            if ((a = array) != null && (cap = a.length) > 0) {
+                a[(cap - 1) & s] = task;
+                top = s + 1;
+                if (b - s + cap - 1 == 0)
+                    growArray(true);
+                else {
+                    phase = 0; // full volatile unlock
+                    if (((s - base) & ~1) == 0) // size 0 or 1
+                        signal = true;
+                }
+            }
+            return signal;
+        }
+
+        /**
+         * Doubles the capacity of array. Call either by owner or with
+         * lock held -- it is OK for base, but not top, to move while
+         * resizings are in progress.
+         */
+        final void growArray(boolean locked) {
+            ForkJoinTask<?>[] newA = null;
+            try {
+                ForkJoinTask<?>[] oldA; int oldSize, newSize;
+                if ((oldA = array) != null && (oldSize = oldA.length) > 0 &&
+                    (newSize = oldSize << 1) <= MAXIMUM_QUEUE_CAPACITY &&
+                    newSize > 0) {
+                    try {
+                        newA = new ForkJoinTask<?>[newSize];
+                    } catch (OutOfMemoryError ex) {
+                    }
+                    if (newA != null) { // poll from old array, push to new
+                        int oldMask = oldSize - 1, newMask = newSize - 1;
+                        for (int s = top - 1, k = oldMask; k >= 0; --k) {
+                            ForkJoinTask<?> x = (ForkJoinTask<?>)
+                                QA.getAndSet(oldA, s & oldMask, null);
+                            if (x != null)
+                                newA[s-- & newMask] = x;
+                            else
+                                break;
+                        }
+                        array = newA;
+                        VarHandle.releaseFence();
+                    }
+                }
+            } finally {
+                if (locked)
+                    phase = 0;
+            }
+            if (newA == null)
                 throw new RejectedExecutionException("Queue capacity exceeded");
-            int oldMask, t, b;
-            ForkJoinTask<?>[] a = array = new ForkJoinTask<?>[size];
-            if (oldA != null && (oldMask = oldA.length - 1) > 0 &&
-                (t = top) - (b = base) > 0) {
-                int mask = size - 1;
-                do { // emulate poll from old array, push to new array
-                    int index = b & oldMask;
-                    long offset = ((long)index << ASHIFT) + ABASE;
-                    ForkJoinTask<?> x = (ForkJoinTask<?>)
-                        U.getObjectVolatile(oldA, offset);
-                    if (x != null &&
-                        U.compareAndSwapObject(oldA, offset, x, null))
-                        a[b & mask] = x;
-                } while (++b != t);
-                U.storeFence();
-            }
-            return a;
-        }
-
-        /**
-         * Takes next task, if one exists, in LIFO order.  Call only
-         * by owner in unshared queues.
-         */
-        final ForkJoinTask<?> pop() {
-            int b = base, s = top, al, i; ForkJoinTask<?>[] a;
-            if ((a = array) != null && b != s && (al = a.length) > 0) {
-                int index = (al - 1) & --s;
-                long offset = ((long)index << ASHIFT) + ABASE;
-                ForkJoinTask<?> t = (ForkJoinTask<?>)
-                    U.getObject(a, offset);
-                if (t != null &&
-                    U.compareAndSwapObject(a, offset, t, null)) {
-                    top = s;
-                    return t;
-                }
-            }
-            return null;
-        }
-
-        /**
-         * Takes a task in FIFO order if b is base of queue and a task
-         * can be claimed without contention. Specialized versions
-         * appear in ForkJoinPool methods scan and helpStealer.
-         */
-        final ForkJoinTask<?> pollAt(int b) {
-            ForkJoinTask<?>[] a; int al;
-            if ((a = array) != null && (al = a.length) > 0) {
-                int index = (al - 1) & b;
-                long offset = ((long)index << ASHIFT) + ABASE;
-                ForkJoinTask<?> t = (ForkJoinTask<?>)
-                    U.getObjectVolatile(a, offset);
-                if (t != null && b++ == base &&
-                    U.compareAndSwapObject(a, offset, t, null)) {
-                    base = b;
-                    return t;
-                }
-            }
-            return null;
         }
 
         /**
          * Takes next task, if one exists, in FIFO order.
          */
         final ForkJoinTask<?> poll() {
-            for (;;) {
-                int b = base, s = top, d, al; ForkJoinTask<?>[] a;
-                if ((a = array) != null && (d = b - s) < 0 &&
-                    (al = a.length) > 0) {
-                    int index = (al - 1) & b;
-                    long offset = ((long)index << ASHIFT) + ABASE;
-                    ForkJoinTask<?> t = (ForkJoinTask<?>)
-                        U.getObjectVolatile(a, offset);
-                    if (b++ == base) {
-                        if (t != null) {
-                            if (U.compareAndSwapObject(a, offset, t, null)) {
-                                base = b;
-                                return t;
-                            }
-                        }
-                        else if (d == -1)
-                            break; // now empty
+            int b, k, cap; ForkJoinTask<?>[] a;
+            while ((a = array) != null && (cap = a.length) > 0 &&
+                   top - (b = base) > 0) {
+                ForkJoinTask<?> t = (ForkJoinTask<?>)
+                    QA.getAcquire(a, k = (cap - 1) & b);
+                if (base == b++) {
+                    if (t == null)
+                        Thread.yield(); // await index advance
+                    else if (QA.compareAndSet(a, k, t, null)) {
+                        BASE.setOpaque(this, b);
+                        return t;
                     }
                 }
-                else
-                    break;
             }
             return null;
         }
@@ -986,96 +944,59 @@
          * Takes next task, if one exists, in order specified by mode.
          */
         final ForkJoinTask<?> nextLocalTask() {
-            return (config < 0) ? poll() : pop();
+            ForkJoinTask<?> t = null;
+            int md = id, b, s, d, cap; ForkJoinTask<?>[] a;
+            if ((a = array) != null && (cap = a.length) > 0 &&
+                (d = (s = top) - (b = base)) > 0) {
+                if ((md & FIFO) == 0 || d == 1) {
+                    if ((t = (ForkJoinTask<?>)
+                         QA.getAndSet(a, (cap - 1) & --s, null)) != null)
+                        TOP.setOpaque(this, s);
+                }
+                else if ((t = (ForkJoinTask<?>)
+                          QA.getAndSet(a, (cap - 1) & b++, null)) != null) {
+                    BASE.setOpaque(this, b);
+                }
+                else // on contention in FIFO mode, use regular poll
+                    t = poll();
+            }
+            return t;
         }
 
         /**
          * Returns next task, if one exists, in order specified by mode.
          */
         final ForkJoinTask<?> peek() {
-            int al; ForkJoinTask<?>[] a;
-            return ((a = array) != null && (al = a.length) > 0) ?
-                a[(al - 1) & (config < 0 ? base : top - 1)] : null;
+            int cap; ForkJoinTask<?>[] a;
+            return ((a = array) != null && (cap = a.length) > 0) ?
+                a[(cap - 1) & ((id & FIFO) != 0 ? base : top - 1)] : null;
         }
 
         /**
          * Pops the given task only if it is at the current top.
          */
         final boolean tryUnpush(ForkJoinTask<?> task) {
-            int b = base, s = top, al; ForkJoinTask<?>[] a;
-            if ((a = array) != null && b != s && (al = a.length) > 0) {
-                int index = (al - 1) & --s;
-                long offset = ((long)index << ASHIFT) + ABASE;
-                if (U.compareAndSwapObject(a, offset, task, null)) {
-                    top = s;
-                    return true;
-                }
-            }
-            return false;
-        }
-
-        /**
-         * Shared version of push. Fails if already locked.
-         *
-         * @return status: > 0 locked, 0 possibly was empty, < 0 was nonempty
-         */
-        final int sharedPush(ForkJoinTask<?> task) {
-            int stat;
-            if (U.compareAndSwapInt(this, QLOCK, 0, 1)) {
-                int b = base, s = top, al, d; ForkJoinTask<?>[] a;
-                if ((a = array) != null && (al = a.length) > 0 &&
-                    al - 1 + (d = b - s) > 0) {
-                    a[(al - 1) & s] = task;
-                    top = s + 1;                 // relaxed writes OK here
-                    qlock = 0;
-                    stat = (d < 0 && b == base) ? d : 0;
-                }
-                else {
-                    growAndSharedPush(task);
-                    stat = 0;
-                }
-            }
-            else
-                stat = 1;
-            return stat;
-        }
-
-        /**
-         * Helper for sharedPush; called only when locked and resize
-         * needed.
-         */
-        private void growAndSharedPush(ForkJoinTask<?> task) {
-            try {
-                growArray();
-                int s = top, al; ForkJoinTask<?>[] a;
-                if ((a = array) != null && (al = a.length) > 0) {
-                    a[(al - 1) & s] = task;
-                    top = s + 1;
-                }
-            } finally {
-                qlock = 0;
-            }
+            boolean popped = false;
+            int s, cap; ForkJoinTask<?>[] a;
+            if ((a = array) != null && (cap = a.length) > 0 &&
+                (s = top) != base &&
+                (popped = QA.compareAndSet(a, (cap - 1) & --s, task, null)))
+                TOP.setOpaque(this, s);
+            return popped;
         }
 
         /**
          * Shared version of tryUnpush.
          */
-        final boolean trySharedUnpush(ForkJoinTask<?> task) {
+        final boolean tryLockedUnpush(ForkJoinTask<?> task) {
             boolean popped = false;
-            int s = top - 1, al; ForkJoinTask<?>[] a;
-            if ((a = array) != null && (al = a.length) > 0) {
-                int index = (al - 1) & s;
-                long offset = ((long)index << ASHIFT) + ABASE;
-                ForkJoinTask<?> t = (ForkJoinTask<?>) U.getObject(a, offset);
-                if (t == task &&
-                    U.compareAndSwapInt(this, QLOCK, 0, 1)) {
-                    if (top == s + 1 && array == a &&
-                        U.compareAndSwapObject(a, offset, task, null)) {
-                        popped = true;
-                        top = s;
-                    }
-                    U.putOrderedInt(this, QLOCK, 0);
-                }
+            int s = top - 1, k, cap; ForkJoinTask<?>[] a;
+            if ((a = array) != null && (cap = a.length) > 0 &&
+                a[k = (cap - 1) & s] == task && tryLockPhase()) {
+                if (top == s + 1 && array == a &&
+                    (popped = QA.compareAndSet(a, k, task, null)))
+                    top = s;
+                releasePhaseLock();
             }
             return popped;
         }
@@ -1084,252 +1005,150 @@
          * Removes and cancels all known tasks, ignoring any exceptions.
          */
         final void cancelAll() {
-            ForkJoinTask<?> t;
-            if ((t = currentJoin) != null) {
-                currentJoin = null;
-                ForkJoinTask.cancelIgnoringExceptions(t);
-            }
-            if ((t = currentSteal) != null) {
-                currentSteal = null;
-                ForkJoinTask.cancelIgnoringExceptions(t);
-            }
-            while ((t = poll()) != null)
+            for (ForkJoinTask<?> t; (t = poll()) != null; )
                 ForkJoinTask.cancelIgnoringExceptions(t);
         }
 
         // Specialized execution methods
 
         /**
-         * Pops and executes up to POLL_LIMIT tasks or until empty.
+         * Runs the given (stolen) task if nonnull, as well as
+         * remaining local tasks and others available from the given
+         * queue, up to bound n (to avoid infinite unfairness).
          */
-        final void localPopAndExec() {
-            for (int nexec = 0;;) {
-                int b = base, s = top, al; ForkJoinTask<?>[] a;
-                if ((a = array) != null && b != s && (al = a.length) > 0) {
-                    int index = (al - 1) & --s;
-                    long offset = ((long)index << ASHIFT) + ABASE;
-                    ForkJoinTask<?> t = (ForkJoinTask<?>)
-                        U.getAndSetObject(a, offset, null);
-                    if (t != null) {
-                        top = s;
-                        (currentSteal = t).doExec();
-                        if (++nexec > POLL_LIMIT)
-                            break;
-                    }
-                    else
+        final void topLevelExec(ForkJoinTask<?> t, WorkQueue q, int n) {
+            if (t != null && q != null) { // hoist checks
+                int nstolen = 1;
+                for (;;) {
+                    t.doExec();
+                    if (n-- < 0)
                         break;
-                }
-                else
-                    break;
-            }
-        }
-
-        /**
-         * Polls and executes up to POLL_LIMIT tasks or until empty.
-         */
-        final void localPollAndExec() {
-            for (int nexec = 0;;) {
-                int b = base, s = top, al; ForkJoinTask<?>[] a;
-                if ((a = array) != null && b != s && (al = a.length) > 0) {
-                    int index = (al - 1) & b++;
-                    long offset = ((long)index << ASHIFT) + ABASE;
-                    ForkJoinTask<?> t = (ForkJoinTask<?>)
-                        U.getAndSetObject(a, offset, null);
-                    if (t != null) {
-                        base = b;
-                        t.doExec();
-                        if (++nexec > POLL_LIMIT)
+                    else if ((t = nextLocalTask()) == null) {
+                        if ((t = q.poll()) == null)
                             break;
+                        else
+                            ++nstolen;
                     }
                 }
-                else
-                    break;
-            }
-        }
-
-        /**
-         * Executes the given task and (some) remaining local tasks.
-         */
-        final void runTask(ForkJoinTask<?> task) {
-            if (task != null) {
-                task.doExec();
-                if (config < 0)
-                    localPollAndExec();
-                else
-                    localPopAndExec();
-                int ns = ++nsteals;
                 ForkJoinWorkerThread thread = owner;
-                currentSteal = null;
-                if (ns < 0)           // collect on overflow
-                    transferStealCount(pool);
+                nsteals += nstolen;
+                source = 0;
                 if (thread != null)
                     thread.afterTopLevelExec();
             }
         }
 
         /**
-         * Adds steal count to pool steal count if it exists, and resets.
+         * If present, removes task from queue and executes it.
          */
-        final void transferStealCount(ForkJoinPool p) {
-            AuxState aux;
-            if (p != null && (aux = p.auxState) != null) {
-                long s = nsteals;
-                nsteals = 0;            // if negative, correct for overflow
-                if (s < 0) s = Integer.MAX_VALUE;
-                aux.lock();
-                try {
-                    aux.stealCount += s;
-                } finally {
-                    aux.unlock();
-                }
-            }
-        }
-
-        /**
-         * If present, removes from queue and executes the given task,
-         * or any other cancelled task. Used only by awaitJoin.
-         *
-         * @return true if queue empty and task not known to be done
-         */
-        final boolean tryRemoveAndExec(ForkJoinTask<?> task) {
-            if (task != null && task.status >= 0) {
-                int b, s, d, al; ForkJoinTask<?>[] a;
-                while ((d = (b = base) - (s = top)) < 0 &&
-                       (a = array) != null && (al = a.length) > 0) {
-                    for (;;) {      // traverse from s to b
-                        int index = --s & (al - 1);
-                        long offset = (index << ASHIFT) + ABASE;
-                        ForkJoinTask<?> t = (ForkJoinTask<?>)
-                            U.getObjectVolatile(a, offset);
-                        if (t == null)
-                            break;                   // restart
-                        else if (t == task) {
-                            boolean removed = false;
-                            if (s + 1 == top) {      // pop
-                                if (U.compareAndSwapObject(a, offset, t, null)) {
-                                    top = s;
-                                    removed = true;
-                                }
+        final void tryRemoveAndExec(ForkJoinTask<?> task) {
+            ForkJoinTask<?>[] a; int s, cap;
+            if ((a = array) != null && (cap = a.length) > 0 &&
+                (s = top) - base > 0) { // traverse from top
+                for (int m = cap - 1, ns = s - 1, i = ns; ; --i) {
+                    int index = i & m;
+                    ForkJoinTask<?> t = (ForkJoinTask<?>)QA.get(a, index);
+                    if (t == null)
+                        break;
+                    else if (t == task) {
+                        if (QA.compareAndSet(a, index, t, null)) {
+                            top = ns;   // safely shift down
+                            for (int j = i; j != ns; ++j) {
+                                ForkJoinTask<?> f;
+                                int pindex = (j + 1) & m;
+                                f = (ForkJoinTask<?>)QA.get(a, pindex);
+                                QA.setVolatile(a, pindex, null);
+                                int jindex = j & m;
+                                QA.setRelease(a, jindex, f);
                             }
-                            else if (base == b)      // replace with proxy
-                                removed = U.compareAndSwapObject(a, offset, t,
-                                                                 new EmptyTask());
-                            if (removed) {
-                                ForkJoinTask<?> ps = currentSteal;
-                                (currentSteal = task).doExec();
-                                currentSteal = ps;
-                            }
-                            break;
+                            VarHandle.releaseFence();
+                            t.doExec();
                         }
-                        else if (t.status < 0 && s + 1 == top) {
-                            if (U.compareAndSwapObject(a, offset, t, null)) {
-                                top = s;
-                            }
-                            break;                  // was cancelled
-                        }
-                        else if (++d == 0) {
-                            if (base != b)          // rescan
-                                break;
-                            return false;
-                        }
+                        break;
                     }
-                    if (task.status < 0)
-                        return false;
                 }
             }
-            return true;
         }
 
         /**
-         * Pops task if in the same CC computation as the given task,
-         * in either shared or owned mode. Used only by helpComplete.
+         * Tries to pop and run tasks within the target's computation
+         * until done, not found, or limit exceeded.
+         *
+         * @param task root of CountedCompleter computation
+         * @param limit max runs, or zero for no limit
+         * @param shared true if must lock to extract task
+         * @return task status on exit
          */
-        final CountedCompleter<?> popCC(CountedCompleter<?> task, int mode) {
-            int b = base, s = top, al; ForkJoinTask<?>[] a;
-            if ((a = array) != null && b != s && (al = a.length) > 0) {
-                int index = (al - 1) & (s - 1);
-                long offset = ((long)index << ASHIFT) + ABASE;
-                ForkJoinTask<?> o = (ForkJoinTask<?>)
-                    U.getObjectVolatile(a, offset);
-                if (o instanceof CountedCompleter) {
-                    CountedCompleter<?> t = (CountedCompleter<?>)o;
-                    for (CountedCompleter<?> r = t;;) {
-                        if (r == task) {
-                            if ((mode & IS_OWNED) == 0) {
-                                boolean popped = false;
-                                if (U.compareAndSwapInt(this, QLOCK, 0, 1)) {
+        final int helpCC(CountedCompleter<?> task, int limit, boolean shared) {
+            int status = 0;
+            if (task != null && (status = task.status) >= 0) {
+                int s, k, cap; ForkJoinTask<?>[] a;
+                while ((a = array) != null && (cap = a.length) > 0 &&
+                       (s = top) - base > 0) {
+                    CountedCompleter<?> v = null;
+                    ForkJoinTask<?> o = a[k = (cap - 1) & (s - 1)];
+                    if (o instanceof CountedCompleter) {
+                        CountedCompleter<?> t = (CountedCompleter<?>)o;
+                        for (CountedCompleter<?> f = t;;) {
+                            if (f != task) {
+                                if ((f = f.completer) == null)
+                                    break;
+                            }
+                            else if (shared) {
+                                if (tryLockPhase()) {
                                     if (top == s && array == a &&
-                                        U.compareAndSwapObject(a, offset,
-                                                               t, null)) {
-                                        popped = true;
+                                        QA.compareAndSet(a, k, t, null)) {
                                         top = s - 1;
+                                        v = t;
                                     }
-                                    U.putOrderedInt(this, QLOCK, 0);
-                                    if (popped)
-                                        return t;
+                                    releasePhaseLock();
                                 }
+                                break;
                             }
-                            else if (U.compareAndSwapObject(a, offset,
-                                                            t, null)) {
-                                top = s - 1;
-                                return t;
+                            else {
+                                if (QA.compareAndSet(a, k, t, null)) {
+                                    top = s - 1;
+                                    v = t;
+                                }
+                                break;
                             }
-                            break;
                         }
-                        else if ((r = r.completer) == null) // try parent
-                            break;
                     }
+                    if (v != null)
+                        v.doExec();
+                    if ((status = task.status) < 0 || v == null ||
+                        (limit != 0 && --limit == 0))
+                        break;
                 }
             }
-            return null;
+            return status;
         }
 
         /**
-         * Steals and runs a task in the same CC computation as the
-         * given task if one exists and can be taken without
-         * contention. Otherwise returns a checksum/control value for
-         * use by method helpComplete.
+         * Tries to poll and run AsynchronousCompletionTasks until
+         * none found or blocker is released
          *
-         * @return 1 if successful, 2 if retryable (lost to another
-         * stealer), -1 if non-empty but no matching task found, else
-         * the base index, forced negative.
+         * @param blocker the blocker
          */
-        final int pollAndExecCC(CountedCompleter<?> task) {
-            ForkJoinTask<?>[] a;
-            int b = base, s = top, al, h;
-            if ((a = array) != null && b != s && (al = a.length) > 0) {
-                int index = (al - 1) & b;
-                long offset = ((long)index << ASHIFT) + ABASE;
-                ForkJoinTask<?> o = (ForkJoinTask<?>)
-                    U.getObjectVolatile(a, offset);
-                if (o == null)
-                    h = 2;                      // retryable
-                else if (!(o instanceof CountedCompleter))
-                    h = -1;                     // unmatchable
-                else {
-                    CountedCompleter<?> t = (CountedCompleter<?>)o;
-                    for (CountedCompleter<?> r = t;;) {
-                        if (r == task) {
-                            if (b++ == base &&
-                                U.compareAndSwapObject(a, offset, t, null)) {
-                                base = b;
-                                t.doExec();
-                                h = 1;          // success
-                            }
-                            else
-                                h = 2;          // lost CAS
+        final void helpAsyncBlocker(ManagedBlocker blocker) {
+            if (blocker != null) {
+                int b, k, cap; ForkJoinTask<?>[] a; ForkJoinTask<?> t;
+                while ((a = array) != null && (cap = a.length) > 0 &&
+                       top - (b = base) > 0) {
+                    t = (ForkJoinTask<?>)QA.getAcquire(a, k = (cap - 1) & b);
+                    if (blocker.isReleasable())
+                        break;
+                    else if (base == b++ && t != null) {
+                        if (!(t instanceof CompletableFuture.
+                              AsynchronousCompletionTask))
                             break;
-                        }
-                        else if ((r = r.completer) == null) {
-                            h = -1;             // unmatched
-                            break;
+                        else if (QA.compareAndSet(a, k, t, null)) {
+                            BASE.setOpaque(this, b);
+                            t.doExec();
                         }
                     }
                 }
             }
-            else
-                h = b | Integer.MIN_VALUE;      // to sense movement on re-poll
-            return h;
         }
 
         /**
@@ -1337,29 +1156,24 @@
          */
         final boolean isApparentlyUnblocked() {
             Thread wt; Thread.State s;
-            return (scanState >= 0 &&
-                    (wt = owner) != null &&
+            return ((wt = owner) != null &&
                     (s = wt.getState()) != Thread.State.BLOCKED &&
                     s != Thread.State.WAITING &&
                     s != Thread.State.TIMED_WAITING);
         }
 
-        // Unsafe mechanics. Note that some are (and must be) the same as in FJP
-        private static final sun.misc.Unsafe U = sun.misc.Unsafe.getUnsafe();
-        private static final long QLOCK;
-        private static final int ABASE;
-        private static final int ASHIFT;
+        // VarHandle mechanics.
+        static final VarHandle PHASE;
+        static final VarHandle BASE;
+        static final VarHandle TOP;
         static {
             try {
-                QLOCK = U.objectFieldOffset
-                    (WorkQueue.class.getDeclaredField("qlock"));
-                ABASE = U.arrayBaseOffset(ForkJoinTask[].class);
-                int scale = U.arrayIndexScale(ForkJoinTask[].class);
-                if ((scale & (scale - 1)) != 0)
-                    throw new Error("array index scale not a power of two");
-                ASHIFT = 31 - Integer.numberOfLeadingZeros(scale);
+                MethodHandles.Lookup l = MethodHandles.lookup();
+                PHASE = l.findVarHandle(WorkQueue.class, "phase", int.class);
+                BASE = l.findVarHandle(WorkQueue.class, "base", int.class);
+                TOP = l.findVarHandle(WorkQueue.class, "top", int.class);
             } catch (ReflectiveOperationException e) {
-                throw new Error(e);
+                throw new ExceptionInInitializerError(e);
             }
         }
     }
@@ -1375,7 +1189,7 @@
 
     /**
      * Permission required for callers of methods that may start or
-     * kill threads.  Also used as a static lock in tryInitialize.
+     * kill threads.
      */
     static final RuntimePermission modifyThreadPermission;
 
@@ -1416,18 +1230,15 @@
     // static configuration constants
 
     /**
-     * Initial timeout value (in milliseconds) for the thread
-     * triggering quiescence to park waiting for new work. On timeout,
-     * the thread will instead try to shrink the number of workers.
-     * The value should be large enough to avoid overly aggressive
-     * shrinkage during most transient stalls (long GCs etc).
+     * Default idle timeout value (in milliseconds) for the thread
+     * triggering quiescence to park waiting for new work
      */
-    private static final long IDLE_TIMEOUT_MS = 2000L; // 2sec
+    private static final long DEFAULT_KEEPALIVE = 60_000L;
 
     /**
-     * Tolerance for idle timeouts, to cope with timer undershoots.
+     * Undershoot tolerance for idle timeouts
      */
-    private static final long TIMEOUT_SLOP_MS =   20L; // 20ms
+    private static final long TIMEOUT_SLOP = 20L;
 
     /**
      * The default value for COMMON_MAX_SPARES.  Overridable using the
@@ -1447,7 +1258,7 @@
 
     /*
      * Bits and masks for field ctl, packed with 4 16 bit subfields:
-     * AC: Number of active running workers minus target parallelism
+     * RC: Number of released (unqueued) workers minus target parallelism
      * TC: Number of total workers minus target parallelism
      * SS: version count and status of top waiting thread
      * ID: poolIndex of top of Treiber stack of waiters
@@ -1456,26 +1267,30 @@
      * (including version bits) as sp=(int)ctl.  The offsets of counts
      * by the target parallelism and the positionings of fields makes
      * it possible to perform the most common checks via sign tests of
-     * fields: When ac is negative, there are not enough active
+     * fields: When ac is negative, there are not enough unqueued
      * workers, when tc is negative, there are not enough total
      * workers.  When sp is non-zero, there are waiting workers.  To
      * deal with possibly negative fields, we use casts in and out of
      * "short" and/or signed shifts to maintain signedness.
      *
-     * Because it occupies uppermost bits, we can add one active count
-     * using getAndAddLong of AC_UNIT, rather than CAS, when returning
+     * Because it occupies uppermost bits, we can add one release count
+     * using getAndAddLong of RC_UNIT, rather than CAS, when returning
      * from a blocked join.  Other updates entail multiple subfields
      * and masking, requiring CAS.
+     *
+     * The limits packed in field "bounds" are also offset by the
+     * parallelism level to make them comparable to the ctl rc and tc
+     * fields.
      */
 
     // Lower and upper word masks
     private static final long SP_MASK    = 0xffffffffL;
     private static final long UC_MASK    = ~SP_MASK;
 
-    // Active counts
-    private static final int  AC_SHIFT   = 48;
-    private static final long AC_UNIT    = 0x0001L << AC_SHIFT;
-    private static final long AC_MASK    = 0xffffL << AC_SHIFT;
+    // Release counts
+    private static final int  RC_SHIFT   = 48;
+    private static final long RC_UNIT    = 0x0001L << RC_SHIFT;
+    private static final long RC_MASK    = 0xffffL << RC_SHIFT;
 
     // Total counts
     private static final int  TC_SHIFT   = 32;
@@ -1483,52 +1298,21 @@
     private static final long TC_MASK    = 0xffffL << TC_SHIFT;
     private static final long ADD_WORKER = 0x0001L << (TC_SHIFT + 15); // sign
 
-    // runState bits: SHUTDOWN must be negative, others arbitrary powers of two
-    private static final int  STARTED    = 1;
-    private static final int  STOP       = 1 << 1;
-    private static final int  TERMINATED = 1 << 2;
-    private static final int  SHUTDOWN   = 1 << 31;
-
     // Instance fields
-    volatile long ctl;                   // main pool control
-    volatile int runState;
-    final int config;                    // parallelism, mode
-    AuxState auxState;                   // lock, steal counts
-    volatile WorkQueue[] workQueues;     // main registry
-    final String workerNamePrefix;       // to create worker name string
+
+    volatile long stealCount;            // collects worker nsteals
+    final long keepAlive;                // milliseconds before dropping if idle
+    int indexSeed;                       // next worker index
+    final int bounds;                    // min, max threads packed as shorts
+    volatile int mode;                   // parallelism, runstate, queue mode
+    WorkQueue[] workQueues;              // main registry
+    final String workerNamePrefix;       // for worker thread string; sync lock
     final ForkJoinWorkerThreadFactory factory;
     final UncaughtExceptionHandler ueh;  // per-worker UEH
+    final Predicate<? super ForkJoinPool> saturate;
 
-    /**
-     * Instantiates fields upon first submission, or upon shutdown if
-     * no submissions. If checkTermination true, also responds to
-     * termination by external calls submitting tasks.
-     */
-    private void tryInitialize(boolean checkTermination) {
-        if (runState == 0) { // bootstrap by locking static field
-            int p = config & SMASK;
-            int n = (p > 1) ? p - 1 : 1; // ensure at least 2 slots
-            n |= n >>> 1;    // create workQueues array with size a power of two
-            n |= n >>> 2;
-            n |= n >>> 4;
-            n |= n >>> 8;
-            n |= n >>> 16;
-            n = ((n + 1) << 1) & SMASK;
-            AuxState aux = new AuxState();
-            WorkQueue[] ws = new WorkQueue[n];
-            synchronized (modifyThreadPermission) { // double-check
-                if (runState == 0) {
-                    workQueues = ws;
-                    auxState = aux;
-                    runState = STARTED;
-                }
-            }
-        }
-        if (checkTermination && runState < 0) {
-            tryTerminate(false, false); // help terminate
-            throw new RejectedExecutionException();
-        }
-    }
+    @jdk.internal.vm.annotation.Contended("fjpctl") // segregate
+    volatile long ctl;                   // main pool control
 
     // Creating, registering and deregistering workers
 
@@ -1537,18 +1321,14 @@
      * count has already been incremented as a reservation.  Invokes
      * deregisterWorker on any failure.
      *
-     * @param isSpare true if this is a spare thread
      * @return true if successful
      */
-    private boolean createWorker(boolean isSpare) {
+    private boolean createWorker() {
         ForkJoinWorkerThreadFactory fac = factory;
         Throwable ex = null;
         ForkJoinWorkerThread wt = null;
-        WorkQueue q;
         try {
             if (fac != null && (wt = fac.newThread(this)) != null) {
-                if (isSpare && (q = wt.workQueue) != null)
-                    q.config |= SPARE_WORKER;
                 wt.start();
                 return true;
             }
@@ -1569,10 +1349,10 @@
      */
     private void tryAddWorker(long c) {
         do {
-            long nc = ((AC_MASK & (c + AC_UNIT)) |
+            long nc = ((RC_MASK & (c + RC_UNIT)) |
                        (TC_MASK & (c + TC_UNIT)));
-            if (ctl == c && U.compareAndSwapLong(this, CTL, c, nc)) {
-                createWorker(false);
+            if (ctl == c && CTL.compareAndSet(this, c, nc)) {
+                createWorker();
                 break;
             }
         } while (((c = ctl) & ADD_WORKER) != 0L && (int)c == 0);
@@ -1587,41 +1367,55 @@
      */
     final WorkQueue registerWorker(ForkJoinWorkerThread wt) {
         UncaughtExceptionHandler handler;
-        AuxState aux;
-        wt.setDaemon(true);                           // configure thread
+        wt.setDaemon(true);                             // configure thread
         if ((handler = ueh) != null)
             wt.setUncaughtExceptionHandler(handler);
+        int tid = 0;                                    // for thread name
+        int idbits = mode & FIFO;
+        String prefix = workerNamePrefix;
         WorkQueue w = new WorkQueue(this, wt);
-        int i = 0;                                    // assign a pool index
-        int mode = config & MODE_MASK;
-        if ((aux = auxState) != null) {
-            aux.lock();
-            try {
-                int s = (int)(aux.indexSeed += SEED_INCREMENT), n, m;
-                WorkQueue[] ws = workQueues;
-                if (ws != null && (n = ws.length) > 0) {
-                    i = (m = n - 1) & ((s << 1) | 1); // odd-numbered indices
-                    if (ws[i] != null) {              // collision
-                        int probes = 0;               // step by approx half n
-                        int step = (n <= 4) ? 2 : ((n >>> 1) & EVENMASK) + 2;
-                        while (ws[i = (i + step) & m] != null) {
-                            if (++probes >= n) {
-                                workQueues = ws = Arrays.copyOf(ws, n <<= 1);
-                                m = n - 1;
-                                probes = 0;
-                            }
+        if (prefix != null) {
+            synchronized (prefix) {
+                WorkQueue[] ws = workQueues; int n;
+                int s = indexSeed += SEED_INCREMENT;
+                idbits |= (s & ~(SMASK | FIFO | DORMANT));
+                if (ws != null && (n = ws.length) > 1) {
+                    int m = n - 1;
+                    tid = m & ((s << 1) | 1);           // odd-numbered indices
+                    for (int probes = n >>> 1;;) {      // find empty slot
+                        WorkQueue q;
+                        if ((q = ws[tid]) == null || q.phase == QUIET)
+                            break;
+                        else if (--probes == 0) {
+                            tid = n | 1;                // resize below
+                            break;
                         }
+                        else
+                            tid = (tid + 2) & m;
                     }
-                    w.hint = s;                       // use as random seed
-                    w.config = i | mode;
-                    w.scanState = i | (s & 0x7fff0000); // random seq bits
-                    ws[i] = w;
+                    w.phase = w.id = tid | idbits;      // now publishable
+
+                    if (tid < n)
+                        ws[tid] = w;
+                    else {                              // expand array
+                        int an = n << 1;
+                        WorkQueue[] as = new WorkQueue[an];
+                        as[tid] = w;
+                        int am = an - 1;
+                        for (int j = 0; j < n; ++j) {
+                            WorkQueue v;                // copy external queue
+                            if ((v = ws[j]) != null)    // position may change
+                                as[v.id & am & SQMASK] = v;
+                            if (++j >= n)
+                                break;
+                            as[j] = ws[j];              // copy worker
+                        }
+                        workQueues = as;
+                    }
                 }
-            } finally {
-                aux.unlock();
             }
+            wt.setName(prefix.concat(Integer.toString(tid)));
         }
-        wt.setName(workerNamePrefix.concat(Integer.toString(i >>> 1)));
         return w;
     }
 
@@ -1636,64 +1430,48 @@
      */
     final void deregisterWorker(ForkJoinWorkerThread wt, Throwable ex) {
         WorkQueue w = null;
+        int phase = 0;
         if (wt != null && (w = wt.workQueue) != null) {
-            AuxState aux; WorkQueue[] ws;          // remove index from array
-            int idx = w.config & SMASK;
-            int ns = w.nsteals;
-            if ((aux = auxState) != null) {
-                aux.lock();
-                try {
-                    if ((ws = workQueues) != null && ws.length > idx &&
-                        ws[idx] == w)
-                        ws[idx] = null;
-                    aux.stealCount += ns;
-                } finally {
-                    aux.unlock();
+            Object lock = workerNamePrefix;
+            int wid = w.id;
+            long ns = (long)w.nsteals & 0xffffffffL;
+            if (lock != null) {
+                synchronized (lock) {
+                    WorkQueue[] ws; int n, i;         // remove index from array
+                    if ((ws = workQueues) != null && (n = ws.length) > 0 &&
+                        ws[i = wid & (n - 1)] == w)
+                        ws[i] = null;
+                    stealCount += ns;
                 }
             }
+            phase = w.phase;
         }
-        if (w == null || (w.config & UNREGISTERED) == 0) { // else pre-adjusted
+        if (phase != QUIET) {                         // else pre-adjusted
             long c;                                   // decrement counts
-            do {} while (!U.compareAndSwapLong
-                         (this, CTL, c = ctl, ((AC_MASK & (c - AC_UNIT)) |
-                                               (TC_MASK & (c - TC_UNIT)) |
-                                               (SP_MASK & c))));
+            do {} while (!CTL.weakCompareAndSet
+                         (this, c = ctl, ((RC_MASK & (c - RC_UNIT)) |
+                                          (TC_MASK & (c - TC_UNIT)) |
+                                          (SP_MASK & c))));
         }
-        if (w != null) {
-            w.currentSteal = null;
-            w.qlock = -1;                             // ensure set
+        if (w != null)
             w.cancelAll();                            // cancel remaining tasks
-        }
-        while (tryTerminate(false, false) >= 0) {     // possibly replace
-            WorkQueue[] ws; int wl, sp; long c;
-            if (w == null || w.array == null ||
-                (ws = workQueues) == null || (wl = ws.length) <= 0)
-                break;
-            else if ((sp = (int)(c = ctl)) != 0) {    // wake up replacement
-                if (tryRelease(c, ws[(wl - 1) & sp], AC_UNIT))
-                    break;
-            }
-            else if (ex != null && (c & ADD_WORKER) != 0L) {
-                tryAddWorker(c);                      // create replacement
-                break;
-            }
-            else                                      // don't need replacement
-                break;
-        }
+
+        if (!tryTerminate(false, false) &&            // possibly replace worker
+            w != null && w.array != null)             // avoid repeated failures
+            signalWork();
+
         if (ex == null)                               // help clean on way out
             ForkJoinTask.helpExpungeStaleExceptions();
         else                                          // rethrow
             ForkJoinTask.rethrow(ex);
     }
 
-    // Signalling
-
     /**
-     * Tries to create or activate a worker if too few are active.
+     * Tries to create or release a worker if too few are running.
      */
     final void signalWork() {
         for (;;) {
-            long c; int sp, i; WorkQueue v; WorkQueue[] ws;
+            long c; int sp; WorkQueue[] ws; int i; WorkQueue v;
             if ((c = ctl) >= 0L)                      // enough workers
                 break;
             else if ((sp = (int)c) == 0) {            // no idle workers
@@ -1708,12 +1486,14 @@
             else if ((v = ws[i]) == null)
                 break;                                // terminating
             else {
-                int ns = sp & ~UNSIGNALLED;
-                int vs = v.scanState;
-                long nc = (v.stackPred & SP_MASK) | (UC_MASK & (c + AC_UNIT));
-                if (sp == vs && U.compareAndSwapLong(this, CTL, c, nc)) {
-                    v.scanState = ns;
-                    LockSupport.unpark(v.parker);
+                int np = sp & ~UNSIGNALLED;
+                int vp = v.phase;
+                long nc = (v.stackPred & SP_MASK) | (UC_MASK & (c + RC_UNIT));
+                Thread vt = v.owner;
+                if (sp == vp && CTL.compareAndSet(this, c, nc)) {
+                    v.phase = np;
+                    if (vt != null && v.source < 0)
+                        LockSupport.unpark(vt);
                     break;
                 }
             }
@@ -1721,502 +1501,181 @@
     }
 
     /**
-     * Signals and releases worker v if it is top of idle worker
-     * stack.  This performs a one-shot version of signalWork only if
-     * there is (apparently) at least one idle worker.
+     * Tries to decrement counts (sometimes implicitly) and possibly
+     * arrange for a compensating worker in preparation for blocking:
+     * If not all core workers yet exist, creates one, else if any are
+     * unreleased (possibly including caller) releases one, else if
+     * fewer than the minimum allowed number of workers running,
+     * checks to see that they are all active, and if so creates an
+     * extra worker unless over maximum limit and policy is to
+     * saturate.  Most of these steps can fail due to interference, in
+     * which case 0 is returned so caller will retry. A negative
+     * return value indicates that the caller doesn't need to
+     * re-adjust counts when later unblocked.
      *
-     * @param c incoming ctl value
-     * @param v if non-null, a worker
-     * @param inc the increment to active count (zero when compensating)
-     * @return true if successful
+     * @return 1: block then adjust, -1: block without adjust, 0 : retry
      */
-    private boolean tryRelease(long c, WorkQueue v, long inc) {
-        int sp = (int)c, ns = sp & ~UNSIGNALLED;
-        if (v != null) {
-            int vs = v.scanState;
-            long nc = (v.stackPred & SP_MASK) | (UC_MASK & (c + inc));
-            if (sp == vs && U.compareAndSwapLong(this, CTL, c, nc)) {
-                v.scanState = ns;
-                LockSupport.unpark(v.parker);
-                return true;
-            }
-        }
-        return false;
-    }
-
-    /**
-     * With approx probability of a missed signal, tries (once) to
-     * reactivate worker w (or some other worker), failing if stale or
-     * known to be already active.
-     *
-     * @param w the worker
-     * @param ws the workQueue array to use
-     * @param r random seed
-     */
-    private void tryReactivate(WorkQueue w, WorkQueue[] ws, int r) {
-        long c; int sp, wl; WorkQueue v;
-        if ((sp = (int)(c = ctl)) != 0 && w != null &&
-            ws != null && (wl = ws.length) > 0 &&
-            ((sp ^ r) & SS_SEQ) == 0 &&
-            (v = ws[(wl - 1) & sp]) != null) {
-            long nc = (v.stackPred & SP_MASK) | (UC_MASK & (c + AC_UNIT));
-            int ns = sp & ~UNSIGNALLED;
-            if (w.scanState < 0 &&
-                v.scanState == sp &&
-                U.compareAndSwapLong(this, CTL, c, nc)) {
-                v.scanState = ns;
-                LockSupport.unpark(v.parker);
-            }
-        }
-    }
-
-    /**
-     * If worker w exists and is active, enqueues and sets status to inactive.
-     *
-     * @param w the worker
-     * @param ss current (non-negative) scanState
-     */
-    private void inactivate(WorkQueue w, int ss) {
-        int ns = (ss + SS_SEQ) | UNSIGNALLED;
-        long lc = ns & SP_MASK, nc, c;
-        if (w != null) {
-            w.scanState = ns;
-            do {
-                nc = lc | (UC_MASK & ((c = ctl) - AC_UNIT));
-                w.stackPred = (int)c;
-            } while (!U.compareAndSwapLong(this, CTL, c, nc));
-        }
-    }
-
-    /**
-     * Possibly blocks worker w waiting for signal, or returns
-     * negative status if the worker should terminate. May return
-     * without status change if multiple stale unparks and/or
-     * interrupts occur.
-     *
-     * @param w the calling worker
-     * @return negative if w should terminate
-     */
-    private int awaitWork(WorkQueue w) {
-        int stat = 0;
-        if (w != null && w.scanState < 0) {
-            long c = ctl;
-            if ((int)(c >> AC_SHIFT) + (config & SMASK) <= 0)
-                stat = timedAwaitWork(w, c);     // possibly quiescent
-            else if ((runState & STOP) != 0)
-                stat = w.qlock = -1;             // pool terminating
-            else if (w.scanState < 0) {
-                w.parker = Thread.currentThread();
-                if (w.scanState < 0)             // recheck after write
-                    LockSupport.park(this);
-                w.parker = null;
-                if ((runState & STOP) != 0)
-                    stat = w.qlock = -1;         // recheck
-                else if (w.scanState < 0)
-                    Thread.interrupted();        // clear status
-            }
-        }
-        return stat;
-    }
-
-    /**
-     * Possibly triggers shutdown and tries (once) to block worker
-     * when pool is (or may be) quiescent. Waits up to a duration
-     * determined by number of workers.  On timeout, if ctl has not
-     * changed, terminates the worker, which will in turn wake up
-     * another worker to possibly repeat this process.
-     *
-     * @param w the calling worker
-     * @return negative if w should terminate
-     */
-    private int timedAwaitWork(WorkQueue w, long c) {
-        int stat = 0;
-        int scale = 1 - (short)(c >>> TC_SHIFT);
-        long deadline = (((scale <= 0) ? 1 : scale) * IDLE_TIMEOUT_MS +
-                         System.currentTimeMillis());
-        if ((runState >= 0 || (stat = tryTerminate(false, false)) > 0) &&
-            w != null && w.scanState < 0) {
-            int ss; AuxState aux;
-            w.parker = Thread.currentThread();
-            if (w.scanState < 0)
-                LockSupport.parkUntil(this, deadline);
-            w.parker = null;
-            if ((runState & STOP) != 0)
-                stat = w.qlock = -1;         // pool terminating
-            else if ((ss = w.scanState) < 0 && !Thread.interrupted() &&
-                     (int)c == ss && (aux = auxState) != null && ctl == c &&
-                     deadline - System.currentTimeMillis() <= TIMEOUT_SLOP_MS) {
-                aux.lock();
-                try {                        // pre-deregister
-                    WorkQueue[] ws;
-                    int cfg = w.config, idx = cfg & SMASK;
-                    long nc = ((UC_MASK & (c - TC_UNIT)) |
-                               (SP_MASK & w.stackPred));
-                    if ((runState & STOP) == 0 &&
-                        (ws = workQueues) != null &&
-                        idx < ws.length && idx >= 0 && ws[idx] == w &&
-                        U.compareAndSwapLong(this, CTL, c, nc)) {
-                        ws[idx] = null;
-                        w.config = cfg | UNREGISTERED;
-                        stat = w.qlock = -1;
+    private int tryCompensate(WorkQueue w) {
+        int t, n, sp;
+        long c = ctl;
+        WorkQueue[] ws = workQueues;
+        if ((t = (short)(c >>> TC_SHIFT)) >= 0) {
+            if (ws == null || (n = ws.length) <= 0 || w == null)
+                return 0;                        // disabled
+            else if ((sp = (int)c) != 0) {       // replace or release
+                WorkQueue v = ws[sp & (n - 1)];
+                int wp = w.phase;
+                long uc = UC_MASK & ((wp < 0) ? c + RC_UNIT : c);
+                int np = sp & ~UNSIGNALLED;
+                if (v != null) {
+                    int vp = v.phase;
+                    Thread vt = v.owner;
+                    long nc = ((long)v.stackPred & SP_MASK) | uc;
+                    if (vp == sp && CTL.compareAndSet(this, c, nc)) {
+                        v.phase = np;
+                        if (vt != null && v.source < 0)
+                            LockSupport.unpark(vt);
+                        return (wp < 0) ? -1 : 1;
                     }
-                } finally {
-                    aux.unlock();
                 }
+                return 0;
             }
-        }
-        return stat;
-    }
-
-    /**
-     * If the given worker is a spare with no queued tasks, and there
-     * are enough existing workers, drops it from ctl counts and sets
-     * its state to terminated.
-     *
-     * @param w the calling worker -- must be a spare
-     * @return true if dropped (in which case it must not process more tasks)
-     */
-    private boolean tryDropSpare(WorkQueue w) {
-        if (w != null && w.isEmpty()) {           // no local tasks
-            long c; int sp, wl; WorkQueue[] ws; WorkQueue v;
-            while ((short)((c = ctl) >> TC_SHIFT) > 0 &&
-                   ((sp = (int)c) != 0 || (int)(c >> AC_SHIFT) > 0) &&
-                   (ws = workQueues) != null && (wl = ws.length) > 0) {
-                boolean dropped, canDrop;
-                if (sp == 0) {                    // no queued workers
-                    long nc = ((AC_MASK & (c - AC_UNIT)) |
-                               (TC_MASK & (c - TC_UNIT)) | (SP_MASK & c));
-                    dropped = U.compareAndSwapLong(this, CTL, c, nc);
+            else if ((int)(c >> RC_SHIFT) -      // reduce parallelism
+                     (short)(bounds & SMASK) > 0) {
+                long nc = ((RC_MASK & (c - RC_UNIT)) | (~RC_MASK & c));
+                return CTL.compareAndSet(this, c, nc) ? 1 : 0;
+            }
+            else {                               // validate
+                int md = mode, pc = md & SMASK, tc = pc + t, bc = 0;
+                boolean unstable = false;
+                for (int i = 1; i < n; i += 2) {
+                    WorkQueue q; Thread wt; Thread.State ts;
+                    if ((q = ws[i]) != null) {
+                        if (q.source == 0) {
+                            unstable = true;
+                            break;
+                        }
+                        else {
+                            --tc;
+                            if ((wt = q.owner) != null &&
+                                ((ts = wt.getState()) == Thread.State.BLOCKED ||
+                                 ts == Thread.State.WAITING))
+                                ++bc;            // worker is blocking
+                        }
+                    }
                 }
-                else if (
-                    (v = ws[(wl - 1) & sp]) == null || v.scanState != sp)
-                    dropped = false;              // stale; retry
-                else {
-                    long nc = v.stackPred & SP_MASK;
-                    if (w == v || w.scanState >= 0) {
-                        canDrop = true;           // w unqueued or topmost
-                        nc |= ((AC_MASK & c) |    // ensure replacement
-                               (TC_MASK & (c - TC_UNIT)));
-                    }
-                    else {                        // w may be queued
-                        canDrop = false;          // help uncover
-                        nc |= ((AC_MASK & (c + AC_UNIT)) |
-                               (TC_MASK & c));
-                    }
-                    if (U.compareAndSwapLong(this, CTL, c, nc)) {
-                        v.scanState = sp & ~UNSIGNALLED;
-                        LockSupport.unpark(v.parker);
-                        dropped = canDrop;
+                if (unstable || tc != 0 || ctl != c)
+                    return 0;                    // inconsistent
+                else if (t + pc >= MAX_CAP || t >= (bounds >>> SWIDTH)) {
+                    Predicate<? super ForkJoinPool> sat;
+                    if ((sat = saturate) != null && sat.test(this))
+                        return -1;
+                    else if (bc < pc) {          // lagging
+                        Thread.yield();          // for retry spins
+                        return 0;
                     }
                     else
-                        dropped = false;
-                }
-                if (dropped) {                    // pre-deregister
-                    int cfg = w.config, idx = cfg & SMASK;
-                    if (idx >= 0 && idx < ws.length && ws[idx] == w)
-                        ws[idx] = null;
-                    w.config = cfg | UNREGISTERED;
-                    w.qlock = -1;
-                    return true;
+                        throw new RejectedExecutionException(
+                            "Thread limit exceeded replacing blocked worker");
                 }
             }
         }
-        return false;
+
+        long nc = ((c + TC_UNIT) & TC_MASK) | (c & ~TC_MASK); // expand pool
+        return CTL.compareAndSet(this, c, nc) && createWorker() ? 1 : 0;
     }
 
     /**
      * Top-level runloop for workers, called by ForkJoinWorkerThread.run.
+     * See above for explanation.
      */
     final void runWorker(WorkQueue w) {
-        w.growArray();                                  // allocate queue
-        int bound = (w.config & SPARE_WORKER) != 0 ? 0 : POLL_LIMIT;
-        long seed = w.hint * 0xdaba0b6eb09322e3L;       // initial random seed
-        if ((runState & STOP) == 0) {
-            for (long r = (seed == 0L) ? 1L : seed;;) { // ensure nonzero
-                if (bound == 0 && tryDropSpare(w))
-                    break;
-                // high bits of prev seed for step; current low bits for idx
-                int step = (int)(r >>> 48) | 1;
-                r ^= r >>> 12; r ^= r << 25; r ^= r >>> 27; // xorshift
-                if (scan(w, bound, step, (int)r) < 0 && awaitWork(w) < 0)
-                    break;
+        int r = (w.id ^ ThreadLocalRandom.nextSecondarySeed()) | FIFO; // rng
+        w.array = new ForkJoinTask<?>[INITIAL_QUEUE_CAPACITY]; // initialize
+        for (;;) {
+            int phase;
+            if (scan(w, r)) {                     // scan until apparently empty
+                r ^= r << 13; r ^= r >>> 17; r ^= r << 5; // move (xorshift)
             }
-        }
-    }
-
-    // Scanning for tasks
-
-    /**
-     * Repeatedly scans for and tries to steal and execute (via
-     * workQueue.runTask) a queued task. Each scan traverses queues in
-     * pseudorandom permutation. Upon finding a non-empty queue, makes
-     * at most the given bound attempts to re-poll (fewer if
-     * contended) on the same queue before returning (impossible
-     * scanState value) 0 to restart scan. Else returns after at least
-     * 1 and at most 32 full scans.
-     *
-     * @param w the worker (via its WorkQueue)
-     * @param bound repoll bound as bitmask (0 if spare)
-     * @param step (circular) index increment per iteration (must be odd)
-     * @param r a random seed for origin index
-     * @return negative if should await signal
-     */
-    private int scan(WorkQueue w, int bound, int step, int r) {
-        int stat = 0, wl; WorkQueue[] ws;
-        if ((ws = workQueues) != null && w != null && (wl = ws.length) > 0) {
-            for (int m = wl - 1,
-                     origin = m & r, idx = origin,
-                     npolls = 0,
-                     ss = w.scanState;;) {         // negative if inactive
-                WorkQueue q; ForkJoinTask<?>[] a; int b, al;
-                if ((q = ws[idx]) != null && (b = q.base) - q.top < 0 &&
-                    (a = q.array) != null && (al = a.length) > 0) {
-                    int index = (al - 1) & b;
-                    long offset = ((long)index << ASHIFT) + ABASE;
-                    ForkJoinTask<?> t = (ForkJoinTask<?>)
-                        U.getObjectVolatile(a, offset);
-                    if (t == null)
-                        break;                     // empty or busy
-                    else if (b++ != q.base)
-                        break;                     // busy
-                    else if (ss < 0) {
-                        tryReactivate(w, ws, r);
-                        break;                     // retry upon rescan
-                    }
-                    else if (!U.compareAndSwapObject(a, offset, t, null))
-                        break;                     // contended
-                    else {
-                        q.base = b;
-                        w.currentSteal = t;
-                        if (b != q.top)            // propagate signal
-                            signalWork();
-                        w.runTask(t);
-                        if (++npolls > bound)
-                            break;
-                    }
-                }
-                else if (npolls != 0)              // rescan
+            else if ((phase = w.phase) >= 0) {    // enqueue, then rescan
+                long np = (w.phase = (phase + SS_SEQ) | UNSIGNALLED) & SP_MASK;
+                long c, nc;
+                do {
+                    w.stackPred = (int)(c = ctl);
+                    nc = ((c - RC_UNIT) & UC_MASK) | np;
+                } while (!CTL.weakCompareAndSet(this, c, nc));
+            }
+            else {                                // already queued
+                int pred = w.stackPred;
+                Thread.interrupted();             // clear before park
+                w.source = DORMANT;               // enable signal
+                long c = ctl;
+                int md = mode, rc = (md & SMASK) + (int)(c >> RC_SHIFT);
+                if (md < 0)                       // terminating
                     break;
-                else if ((idx = (idx + step) & m) == origin) {
-                    if (ss < 0) {                  // await signal
-                        stat = ss;
+                else if (rc <= 0 && (md & SHUTDOWN) != 0 &&
+                         tryTerminate(false, false))
+                    break;                        // quiescent shutdown
+                else if (rc <= 0 && pred != 0 && phase == (int)c) {
+                    long nc = (UC_MASK & (c - TC_UNIT)) | (SP_MASK & pred);
+                    long d = keepAlive + System.currentTimeMillis();
+                    LockSupport.parkUntil(this, d);
+                    if (ctl == c &&               // drop on timeout if all idle
+                        d - System.currentTimeMillis() <= TIMEOUT_SLOP &&
+                        CTL.compareAndSet(this, c, nc)) {
+                        w.phase = QUIET;
                         break;
                     }
-                    else if (r >= 0) {
-                        inactivate(w, ss);
-                        break;
-                    }
-                    else
-                        r <<= 1;                   // at most 31 rescans
                 }
+                else if (w.phase < 0)
+                    LockSupport.park(this);       // OK if spuriously woken
+                w.source = 0;                     // disable signal
             }
         }
-        return stat;
     }
 
-    // Joining tasks
-
     /**
-     * Tries to steal and run tasks within the target's computation.
-     * Uses a variant of the top-level algorithm, restricted to tasks
-     * with the given task as ancestor: It prefers taking and running
-     * eligible tasks popped from the worker's own queue (via
-     * popCC). Otherwise it scans others, randomly moving on
-     * contention or execution, deciding to give up based on a
-     * checksum (via return codes from pollAndExecCC). The maxTasks
-     * argument supports external usages; internal calls use zero,
-     * allowing unbounded steps (external calls trap non-positive
-     * values).
+     * Scans for and if found executes one or more top-level tasks from a queue.
      *
-     * @param w caller
-     * @param maxTasks if non-zero, the maximum number of other tasks to run
-     * @return task status on exit
+     * @return true if found an apparently non-empty queue, and
+     * possibly ran task(s).
      */
-    final int helpComplete(WorkQueue w, CountedCompleter<?> task,
-                           int maxTasks) {
-        WorkQueue[] ws; int s = 0, wl;
-        if ((ws = workQueues) != null && (wl = ws.length) > 1 &&
-            task != null && w != null) {
-            for (int m = wl - 1,
-                     mode = w.config,
-                     r = ~mode,                  // scanning seed
-                     origin = r & m, k = origin, // first queue to scan
-                     step = 3,                   // first scan step
-                     h = 1,                      // 1:ran, >1:contended, <0:hash
-                     oldSum = 0, checkSum = 0;;) {
-                CountedCompleter<?> p; WorkQueue q; int i;
-                if ((s = task.status) < 0)
+    private boolean scan(WorkQueue w, int r) {
+        WorkQueue[] ws; int n;
+        if ((ws = workQueues) != null && (n = ws.length) > 0 && w != null) {
+            for (int m = n - 1, j = r & m;;) {
+                WorkQueue q; int b;
+                if ((q = ws[j]) != null && q.top != (b = q.base)) {
+                    int qid = q.id;
+                    ForkJoinTask<?>[] a; int cap, k; ForkJoinTask<?> t;
+                    if ((a = q.array) != null && (cap = a.length) > 0) {
+                        t = (ForkJoinTask<?>)QA.getAcquire(a, k = (cap - 1) & b);
+                        if (q.base == b++ && t != null &&
+                            QA.compareAndSet(a, k, t, null)) {
+                            q.base = b;
+                            w.source = qid;
+                            if (q.top - b > 0)
+                                signalWork();
+                            w.topLevelExec(t, q,  // random fairness bound
+                                           r & ((n << TOP_BOUND_SHIFT) - 1));
+                        }
+                    }
+                    return true;
+                }
+                else if (--n > 0)
+                    j = (j + 1) & m;
+                else
                     break;
-                if (h == 1 && (p = w.popCC(task, mode)) != null) {
-                    p.doExec();                  // run local task
-                    if (maxTasks != 0 && --maxTasks == 0)
-                        break;
-                    origin = k;                  // reset
-                    oldSum = checkSum = 0;
-                }
-                else {                           // poll other worker queues
-                    if ((i = k | 1) < 0 || i > m || (q = ws[i]) == null)
-                        h = 0;
-                    else if ((h = q.pollAndExecCC(task)) < 0)
-                        checkSum += h;
-                    if (h > 0) {
-                        if (h == 1 && maxTasks != 0 && --maxTasks == 0)
-                            break;
-                        step = (r >>> 16) | 3;
-                        r ^= r << 13; r ^= r >>> 17; r ^= r << 5; // xorshift
-                        k = origin = r & m;      // move and restart
-                        oldSum = checkSum = 0;
-                    }
-                    else if ((k = (k + step) & m) == origin) {
-                        if (oldSum == (oldSum = checkSum))
-                            break;
-                        checkSum = 0;
-                    }
-                }
             }
         }
-        return s;
-    }
-
-    /**
-     * Tries to locate and execute tasks for a stealer of the given
-     * task, or in turn one of its stealers. Traces currentSteal ->
-     * currentJoin links looking for a thread working on a descendant
-     * of the given task and with a non-empty queue to steal back and
-     * execute tasks from. The first call to this method upon a
-     * waiting join will often entail scanning/search, (which is OK
-     * because the joiner has nothing better to do), but this method
-     * leaves hints in workers to speed up subsequent calls.
-     *
-     * @param w caller
-     * @param task the task to join
-     */
-    private void helpStealer(WorkQueue w, ForkJoinTask<?> task) {
-        if (task != null && w != null) {
-            ForkJoinTask<?> ps = w.currentSteal;
-            WorkQueue[] ws; int wl, oldSum = 0;
-            outer: while (w.tryRemoveAndExec(task) && task.status >= 0 &&
-                          (ws = workQueues) != null && (wl = ws.length) > 0) {
-                ForkJoinTask<?> subtask;
-                int m = wl - 1, checkSum = 0;          // for stability check
-                WorkQueue j = w, v;                    // v is subtask stealer
-                descent: for (subtask = task; subtask.status >= 0; ) {
-                    for (int h = j.hint | 1, k = 0, i;;) {
-                        if ((v = ws[i = (h + (k << 1)) & m]) != null) {
-                            if (v.currentSteal == subtask) {
-                                j.hint = i;
-                                break;
-                            }
-                            checkSum += v.base;
-                        }
-                        if (++k > m)                   // can't find stealer
-                            break outer;
-                    }
-
-                    for (;;) {                         // help v or descend
-                        ForkJoinTask<?>[] a; int b, al;
-                        if (subtask.status < 0)        // too late to help
-                            break descent;
-                        checkSum += (b = v.base);
-                        ForkJoinTask<?> next = v.currentJoin;
-                        ForkJoinTask<?> t = null;
-                        if ((a = v.array) != null && (al = a.length) > 0) {
-                            int index = (al - 1) & b;
-                            long offset = ((long)index << ASHIFT) + ABASE;
-                            t = (ForkJoinTask<?>)
-                                U.getObjectVolatile(a, offset);
-                            if (t != null && b++ == v.base) {
-                                if (j.currentJoin != subtask ||
-                                    v.currentSteal != subtask ||
-                                    subtask.status < 0)
-                                    break descent;     // stale
-                                if (U.compareAndSwapObject(a, offset, t, null)) {
-                                    v.base = b;
-                                    w.currentSteal = t;
-                                    for (int top = w.top;;) {
-                                        t.doExec();    // help
-                                        w.currentSteal = ps;
-                                        if (task.status < 0)
-                                            break outer;
-                                        if (w.top == top)
-                                            break;     // run local tasks
-                                        if ((t = w.pop()) == null)
-                                            break descent;
-                                        w.currentSteal = t;
-                                    }
-                                }
-                            }
-                        }
-                        if (t == null && b == v.base && b - v.top >= 0) {
-                            if ((subtask = next) == null) {  // try to descend
-                                if (next == v.currentJoin &&
-                                    oldSum == (oldSum = checkSum))
-                                    break outer;
-                                break descent;
-                            }
-                            j = v;
-                            break;
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    /**
-     * Tries to decrement active count (sometimes implicitly) and
-     * possibly release or create a compensating worker in preparation
-     * for blocking. Returns false (retryable by caller), on
-     * contention, detected staleness, instability, or termination.
-     *
-     * @param w caller
-     */
-    private boolean tryCompensate(WorkQueue w) {
-        boolean canBlock; int wl;
-        long c = ctl;
-        WorkQueue[] ws = workQueues;
-        int pc = config & SMASK;
-        int ac = pc + (int)(c >> AC_SHIFT);
-        int tc = pc + (short)(c >> TC_SHIFT);
-        if (w == null || w.qlock < 0 || pc == 0 ||  // terminating or disabled
-            ws == null || (wl = ws.length) <= 0)
-            canBlock = false;
-        else {
-            int m = wl - 1, sp;
-            boolean busy = true;                    // validate ac
-            for (int i = 0; i <= m; ++i) {
-                int k; WorkQueue v;
-                if ((k = (i << 1) | 1) <= m && k >= 0 && (v = ws[k]) != null &&
-                    v.scanState >= 0 && v.currentSteal == null) {
-                    busy = false;
-                    break;
-                }
-            }
-            if (!busy || ctl != c)
-                canBlock = false;                   // unstable or stale
-            else if ((sp = (int)c) != 0)            // release idle worker
-                canBlock = tryRelease(c, ws[m & sp], 0L);
-            else if (tc >= pc && ac > 1 && w.isEmpty()) {
-                long nc = ((AC_MASK & (c - AC_UNIT)) |
-                           (~AC_MASK & c));         // uncompensated
-                canBlock = U.compareAndSwapLong(this, CTL, c, nc);
-            }
-            else if (tc >= MAX_CAP ||
-                     (this == common && tc >= pc + COMMON_MAX_SPARES))
-                throw new RejectedExecutionException(
-                    "Thread limit exceeded replacing blocked worker");
-            else {                                  // similar to tryAddWorker
-                boolean isSpare = (tc >= pc);
-                long nc = (AC_MASK & c) | (TC_MASK & (c + TC_UNIT));
-                canBlock = (U.compareAndSwapLong(this, CTL, c, nc) &&
-                            createWorker(isSpare)); // throws on exception
-            }
-        }
-        return canBlock;
+        return false;
     }
 
     /**
      * Helps and/or blocks until the given task is done or timeout.
+     * First tries locally helping, then scans other queues for a task
+     * produced by one of w's stealers; compensating and blocking if
+     * none are found (rescanning if tryCompensate fails).
      *
      * @param w caller
      * @param task the task
@@ -2225,62 +1684,165 @@
      */
     final int awaitJoin(WorkQueue w, ForkJoinTask<?> task, long deadline) {
         int s = 0;
-        if (w != null) {
-            ForkJoinTask<?> prevJoin = w.currentJoin;
-            if (task != null && (s = task.status) >= 0) {
-                w.currentJoin = task;
-                CountedCompleter<?> cc = (task instanceof CountedCompleter) ?
-                    (CountedCompleter<?>)task : null;
-                for (;;) {
-                    if (cc != null)
-                        helpComplete(w, cc, 0);
-                    else
-                        helpStealer(w, task);
-                    if ((s = task.status) < 0)
+        int seed = ThreadLocalRandom.nextSecondarySeed();
+        if (w != null && task != null &&
+            (!(task instanceof CountedCompleter) ||
+             (s = w.helpCC((CountedCompleter<?>)task, 0, false)) >= 0)) {
+            w.tryRemoveAndExec(task);
+            int src = w.source, id = w.id;
+            int r = (seed >>> 16) | 1, step = (seed & ~1) | 2;
+            s = task.status;
+            while (s >= 0) {
+                WorkQueue[] ws;
+                int n = (ws = workQueues) == null ? 0 : ws.length, m = n - 1;
+                while (n > 0) {
+                    WorkQueue q; int b;
+                    if ((q = ws[r & m]) != null && q.source == id &&
+                        q.top != (b = q.base)) {
+                        ForkJoinTask<?>[] a; int cap, k;
+                        int qid = q.id;
+                        if ((a = q.array) != null && (cap = a.length) > 0) {
+                            ForkJoinTask<?> t = (ForkJoinTask<?>)
+                                QA.getAcquire(a, k = (cap - 1) & b);
+                            if (q.source == id && q.base == b++ &&
+                                t != null && QA.compareAndSet(a, k, t, null)) {
+                                q.base = b;
+                                w.source = qid;
+                                t.doExec();
+                                w.source = src;
+                            }
+                        }
                         break;
-                    long ms, ns;
-                    if (deadline == 0L)
-                        ms = 0L;
-                    else if ((ns = deadline - System.nanoTime()) <= 0L)
-                        break;
-                    else if ((ms = TimeUnit.NANOSECONDS.toMillis(ns)) <= 0L)
-                        ms = 1L;
-                    if (tryCompensate(w)) {
-                        task.internalWait(ms);
-                        U.getAndAddLong(this, CTL, AC_UNIT);
                     }
-                    if ((s = task.status) < 0)
-                        break;
+                    else {
+                        r += step;
+                        --n;
+                    }
                 }
-                w.currentJoin = prevJoin;
+                if ((s = task.status) < 0)
+                    break;
+                else if (n == 0) { // empty scan
+                    long ms, ns; int block;
+                    if (deadline == 0L)
+                        ms = 0L;                       // untimed
+                    else if ((ns = deadline - System.nanoTime()) <= 0L)
+                        break;                         // timeout
+                    else if ((ms = TimeUnit.NANOSECONDS.toMillis(ns)) <= 0L)
+                        ms = 1L;                       // avoid 0 for timed wait
+                    if ((block = tryCompensate(w)) != 0) {
+                        task.internalWait(ms);
+                        CTL.getAndAdd(this, (block > 0) ? RC_UNIT : 0L);
+                    }
+                    s = task.status;
+                }
             }
         }
         return s;
     }
 
-    // Specialized scanning
+    /**
+     * Runs tasks until {@code isQuiescent()}. Rather than blocking
+     * when tasks cannot be found, rescans until all others cannot
+     * find tasks either.
+     */
+    final void helpQuiescePool(WorkQueue w) {
+        int prevSrc = w.source;
+        int seed = ThreadLocalRandom.nextSecondarySeed();
+        int r = seed >>> 16, step = r | 1;
+        for (int source = prevSrc, released = -1;;) { // -1 until known
+            ForkJoinTask<?> localTask; WorkQueue[] ws;
+            while ((localTask = w.nextLocalTask()) != null)
+                localTask.doExec();
+            if (w.phase >= 0 && released == -1)
+                released = 1;
+            boolean quiet = true, empty = true;
+            int n = (ws = workQueues) == null ? 0 : ws.length;
+            for (int m = n - 1; n > 0; r += step, --n) {
+                WorkQueue q; int b;
+                if ((q = ws[r & m]) != null) {
+                    int qs = q.source;
+                    if (q.top != (b = q.base)) {
+                        quiet = empty = false;
+                        ForkJoinTask<?>[] a; int cap, k;
+                        int qid = q.id;
+                        if ((a = q.array) != null && (cap = a.length) > 0) {
+                            if (released == 0) {    // increment
+                                released = 1;
+                                CTL.getAndAdd(this, RC_UNIT);
+                            }
+                            ForkJoinTask<?> t = (ForkJoinTask<?>)
+                                QA.getAcquire(a, k = (cap - 1) & b);
+                            if (q.base == b++ && t != null &&
+                                QA.compareAndSet(a, k, t, null)) {
+                                q.base = b;
+                                w.source = qid;
+                                t.doExec();
+                                w.source = source = prevSrc;
+                            }
+                        }
+                        break;
+                    }
+                    else if ((qs & QUIET) == 0)
+                        quiet = false;
+                }
+            }
+            if (quiet) {
+                if (released == 0)
+                    CTL.getAndAdd(this, RC_UNIT);
+                w.source = prevSrc;
+                break;
+            }
+            else if (empty) {
+                if (source != QUIET)
+                    w.source = source = QUIET;
+                if (released == 1) {                 // decrement
+                    released = 0;
+                    CTL.getAndAdd(this, RC_MASK & -RC_UNIT);
+                }
+            }
+        }
+    }
 
     /**
-     * Returns a (probably) non-empty steal queue, if one is found
-     * during a scan, else null.  This method must be retried by
-     * caller if, by the time it tries to use the queue, it is empty.
+     * Scans for and returns a polled task, if available.
+     * Used only for untracked polls.
+     *
+     * @param submissionsOnly if true, only scan submission queues
      */
-    private WorkQueue findNonEmptyStealQueue() {
-        WorkQueue[] ws; int wl;  // one-shot version of scan loop
-        int r = ThreadLocalRandom.nextSecondarySeed();
-        if ((ws = workQueues) != null && (wl = ws.length) > 0) {
-            int m = wl - 1, origin = r & m;
-            for (int k = origin, oldSum = 0, checkSum = 0;;) {
-                WorkQueue q; int b;
-                if ((q = ws[k]) != null) {
-                    if ((b = q.base) - q.top < 0)
-                        return q;
-                    checkSum += b;
+    private ForkJoinTask<?> pollScan(boolean submissionsOnly) {
+        WorkQueue[] ws; int n;
+        rescan: while ((mode & STOP) == 0 && (ws = workQueues) != null &&
+                      (n = ws.length) > 0) {
+            int m = n - 1;
+            int r = ThreadLocalRandom.nextSecondarySeed();
+            int h = r >>> 16;
+            int origin, step;
+            if (submissionsOnly) {
+                origin = (r & ~1) & m;         // even indices and steps
+                step = (h & ~1) | 2;
+            }
+            else {
+                origin = r & m;
+                step = h | 1;
+            }
+            boolean nonempty = false;
+            for (int i = origin, oldSum = 0, checkSum = 0;;) {
+                WorkQueue q;
+                if ((q = ws[i]) != null) {
+                    int b; ForkJoinTask<?> t;
+                    if (q.top - (b = q.base) > 0) {
+                        nonempty = true;
+                        if ((t = q.poll()) != null)
+                            return t;
+                    }
+                    else
+                        checkSum += b + q.id;
                 }
-                if ((k = (k + 1) & m) == origin) {
-                    if (oldSum == (oldSum = checkSum))
-                        break;
+                if ((i = (i + step) & m) == origin) {
+                    if (!nonempty && oldSum == (oldSum = checkSum))
+                        break rescan;
                     checkSum = 0;
+                    nonempty = false;
                 }
             }
         }
@@ -2288,58 +1850,129 @@
     }
 
     /**
-     * Runs tasks until {@code isQuiescent()}. We piggyback on
-     * active count ctl maintenance, but rather than blocking
-     * when tasks cannot be found, we rescan until all others cannot
-     * find tasks either.
-     */
-    final void helpQuiescePool(WorkQueue w) {
-        ForkJoinTask<?> ps = w.currentSteal; // save context
-        int wc = w.config;
-        for (boolean active = true;;) {
-            long c; WorkQueue q; ForkJoinTask<?> t;
-            if (wc >= 0 && (t = w.pop()) != null) { // run locals if LIFO
-                (w.currentSteal = t).doExec();
-                w.currentSteal = ps;
-            }
-            else if ((q = findNonEmptyStealQueue()) != null) {
-                if (!active) {      // re-establish active count
-                    active = true;
-                    U.getAndAddLong(this, CTL, AC_UNIT);
-                }
-                if ((t = q.pollAt(q.base)) != null) {
-                    (w.currentSteal = t).doExec();
-                    w.currentSteal = ps;
-                    if (++w.nsteals < 0)
-                        w.transferStealCount(this);
-                }
-            }
-            else if (active) {      // decrement active count without queuing
-                long nc = (AC_MASK & ((c = ctl) - AC_UNIT)) | (~AC_MASK & c);
-                if (U.compareAndSwapLong(this, CTL, c, nc))
-                    active = false;
-            }
-            else if ((int)((c = ctl) >> AC_SHIFT) + (config & SMASK) <= 0 &&
-                     U.compareAndSwapLong(this, CTL, c, c + AC_UNIT))
-                break;
-        }
-    }
-
-    /**
      * Gets and removes a local or stolen task for the given worker.
      *
      * @return a task, if available
      */
     final ForkJoinTask<?> nextTaskFor(WorkQueue w) {
-        for (ForkJoinTask<?> t;;) {
-            WorkQueue q;
-            if ((t = w.nextLocalTask()) != null)
-                return t;
-            if ((q = findNonEmptyStealQueue()) == null)
-                return null;
-            if ((t = q.pollAt(q.base)) != null)
-                return t;
+        ForkJoinTask<?> t;
+        if (w == null || (t = w.nextLocalTask()) == null)
+            t = pollScan(false);
+        return t;
+    }
+
+    // External operations
+
+    /**
+     * Adds the given task to a submission queue at submitter's
+     * current queue, creating one if null or contended.
+     *
+     * @param task the task. Caller must ensure non-null.
+     */
+    final void externalPush(ForkJoinTask<?> task) {
+        int r;                                // initialize caller's probe
+        if ((r = ThreadLocalRandom.getProbe()) == 0) {
+            ThreadLocalRandom.localInit();
+            r = ThreadLocalRandom.getProbe();
         }
+        for (;;) {
+            WorkQueue q;
+            int md = mode, n;
+            WorkQueue[] ws = workQueues;
+            if ((md & SHUTDOWN) != 0 || ws == null || (n = ws.length) <= 0)
+                throw new RejectedExecutionException();
+            else if ((q = ws[(n - 1) & r & SQMASK]) == null) { // add queue
+                int qid = (r | QUIET) & ~(FIFO | OWNED);
+                Object lock = workerNamePrefix;
+                ForkJoinTask<?>[] qa =
+                    new ForkJoinTask<?>[INITIAL_QUEUE_CAPACITY];
+                q = new WorkQueue(this, null);
+                q.array = qa;
+                q.id = qid;
+                q.source = QUIET;
+                if (lock != null) {     // unless disabled, lock pool to install
+                    synchronized (lock) {
+                        WorkQueue[] vs; int i, vn;
+                        if ((vs = workQueues) != null && (vn = vs.length) > 0 &&
+                            vs[i = qid & (vn - 1) & SQMASK] == null)
+                            vs[i] = q;  // else another thread already installed
+                    }
+                }
+            }
+            else if (!q.tryLockPhase()) // move if busy
+                r = ThreadLocalRandom.advanceProbe(r);
+            else {
+                if (q.lockedPush(task))
+                    signalWork();
+                return;
+            }
+        }
+    }
+
+    /**
+     * Pushes a possibly-external submission.
+     */
+    private <T> ForkJoinTask<T> externalSubmit(ForkJoinTask<T> task) {
+        Thread t; ForkJoinWorkerThread w; WorkQueue q;
+        if (task == null)
+            throw new NullPointerException();
+        if (((t = Thread.currentThread()) instanceof ForkJoinWorkerThread) &&
+            (w = (ForkJoinWorkerThread)t).pool == this &&
+            (q = w.workQueue) != null)
+            q.push(task);
+        else
+            externalPush(task);
+        return task;
+    }
+
+    /**
+     * Returns common pool queue for an external thread.
+     */
+    static WorkQueue commonSubmitterQueue() {
+        ForkJoinPool p = common;
+        int r = ThreadLocalRandom.getProbe();
+        WorkQueue[] ws; int n;
+        return (p != null && (ws = p.workQueues) != null &&
+                (n = ws.length) > 0) ?
+            ws[(n - 1) & r & SQMASK] : null;
+    }
+
+    /**
+     * Performs tryUnpush for an external submitter.
+     */
+    final boolean tryExternalUnpush(ForkJoinTask<?> task) {
+        int r = ThreadLocalRandom.getProbe();
+        WorkQueue[] ws; WorkQueue w; int n;
+        return ((ws = workQueues) != null &&
+                (n = ws.length) > 0 &&
+                (w = ws[(n - 1) & r & SQMASK]) != null &&
+                w.tryLockedUnpush(task));
+    }
+
+    /**
+     * Performs helpComplete for an external submitter.
+     */
+    final int externalHelpComplete(CountedCompleter<?> task, int maxTasks) {
+        int r = ThreadLocalRandom.getProbe();
+        WorkQueue[] ws; WorkQueue w; int n;
+        return ((ws = workQueues) != null && (n = ws.length) > 0 &&
+                (w = ws[(n - 1) & r & SQMASK]) != null) ?
+            w.helpCC(task, maxTasks, true) : 0;
+    }
+
+    /**
+     * Tries to steal and run tasks within the target's computation.
+     * The maxTasks argument supports external usages; internal calls
+     * use zero, allowing unbounded steps (external calls trap
+     * non-positive values).
+     *
+     * @param w caller
+     * @param maxTasks if non-zero, the maximum number of other tasks to run
+     * @return task status on exit
+     */
+    final int helpComplete(WorkQueue w, CountedCompleter<?> task,
+                           int maxTasks) {
+        return (w == null) ? 0 : w.helpCC(task, maxTasks, false);
     }
 
     /**
@@ -2386,10 +2019,12 @@
      */
     static int getSurplusQueuedTaskCount() {
         Thread t; ForkJoinWorkerThread wt; ForkJoinPool pool; WorkQueue q;
-        if ((t = Thread.currentThread()) instanceof ForkJoinWorkerThread) {
-            int p = (pool = (wt = (ForkJoinWorkerThread)t).pool).config & SMASK;
-            int n = (q = wt.workQueue).top - q.base;
-            int a = (int)(pool.ctl >> AC_SHIFT) + p;
+        if (((t = Thread.currentThread()) instanceof ForkJoinWorkerThread) &&
+            (pool = (wt = (ForkJoinWorkerThread)t).pool) != null &&
+            (q = wt.workQueue) != null) {
+            int p = pool.mode & SMASK;
+            int a = p + (int)(pool.ctl >> RC_SHIFT);
+            int n = q.top - q.base;
             return n - (a > (p >>>= 1) ? 0 :
                         a > (p >>>= 1) ? 1 :
                         a > (p >>>= 1) ? 2 :
@@ -2399,7 +2034,7 @@
         return 0;
     }
 
-    //  Termination
+    // Termination
 
     /**
      * Possibly initiates and/or completes termination.
@@ -2407,198 +2042,89 @@
      * @param now if true, unconditionally terminate, else only
      * if no work and no active workers
      * @param enable if true, terminate when next possible
-     * @return -1: terminating/terminated, 0: retry if internal caller, else 1
+     * @return true if terminating or terminated
      */
-    private int tryTerminate(boolean now, boolean enable) {
-        int rs; // 3 phases: try to set SHUTDOWN, then STOP, then TERMINATED
+    private boolean tryTerminate(boolean now, boolean enable) {
+        int md; // 3 phases: try to set SHUTDOWN, then STOP, then TERMINATED
 
-        while ((rs = runState) >= 0) {
+        while (((md = mode) & SHUTDOWN) == 0) {
             if (!enable || this == common)        // cannot shutdown
-                return 1;
-            else if (rs == 0)
-                tryInitialize(false);             // ensure initialized
+                return false;
             else
-                U.compareAndSwapInt(this, RUNSTATE, rs, rs | SHUTDOWN);
+                MODE.compareAndSet(this, md, md | SHUTDOWN);
         }
 
-        if ((rs & STOP) == 0) {                   // try to initiate termination
-            if (!now) {                           // check quiescence
+        while (((md = mode) & STOP) == 0) {       // try to initiate termination
+            if (!now) {                           // check if quiescent & empty
                 for (long oldSum = 0L;;) {        // repeat until stable
-                    WorkQueue[] ws; WorkQueue w; int b;
+                    boolean running = false;
                     long checkSum = ctl;
-                    if ((int)(checkSum >> AC_SHIFT) + (config & SMASK) > 0)
-                        return 0;                 // still active workers
-                    if ((ws = workQueues) != null) {
+                    WorkQueue[] ws = workQueues;
+                    if ((md & SMASK) + (int)(checkSum >> RC_SHIFT) > 0)
+                        running = true;
+                    else if (ws != null) {
+                        WorkQueue w;
                         for (int i = 0; i < ws.length; ++i) {
                             if ((w = ws[i]) != null) {
-                                checkSum += (b = w.base);
-                                if (w.currentSteal != null || b != w.top)
-                                    return 0;     // retry if internal caller
+                                int s = w.source, p = w.phase;
+                                int d = w.id, b = w.base;
+                                if (b != w.top ||
+                                    ((d & 1) == 1 && (s >= 0 || p >= 0))) {
+                                    running = true;
+                                    break;     // working, scanning, or have work
+                                }
+                                checkSum += (((long)s << 48) + ((long)p << 32) +
+                                             ((long)b << 16) + (long)d);
                             }
                         }
                     }
-                    if (oldSum == (oldSum = checkSum))
+                    if (((md = mode) & STOP) != 0)
+                        break;                 // already triggered
+                    else if (running)
+                        return false;
+                    else if (workQueues == ws && oldSum == (oldSum = checkSum))
                         break;
                 }
             }
-            do {} while (!U.compareAndSwapInt(this, RUNSTATE,
-                                              rs = runState, rs | STOP));
+            if ((md & STOP) == 0)
+                MODE.compareAndSet(this, md, md | STOP);
         }
 
-        for (long oldSum = 0L;;) {                // repeat until stable
-            WorkQueue[] ws; WorkQueue w; ForkJoinWorkerThread wt;
-            long checkSum = ctl;
-            if ((ws = workQueues) != null) {      // help terminate others
-                for (int i = 0; i < ws.length; ++i) {
-                    if ((w = ws[i]) != null) {
-                        w.cancelAll();            // clear queues
-                        checkSum += w.base;
-                        if (w.qlock >= 0) {
-                            w.qlock = -1;         // racy set OK
-                            if ((wt = w.owner) != null) {
+        while (((md = mode) & TERMINATED) == 0) { // help terminate others
+            for (long oldSum = 0L;;) {            // repeat until stable
+                WorkQueue[] ws; WorkQueue w;
+                long checkSum = ctl;
+                if ((ws = workQueues) != null) {
+                    for (int i = 0; i < ws.length; ++i) {
+                        if ((w = ws[i]) != null) {
+                            ForkJoinWorkerThread wt = w.owner;
+                            w.cancelAll();        // clear queues
+                            if (wt != null) {
                                 try {             // unblock join or park
                                     wt.interrupt();
                                 } catch (Throwable ignore) {
                                 }
                             }
+                            checkSum += ((long)w.phase << 32) + w.base;
                         }
                     }
                 }
+                if (((md = mode) & TERMINATED) != 0 ||
+                    (workQueues == ws && oldSum == (oldSum = checkSum)))
+                    break;
             }
-            if (oldSum == (oldSum = checkSum))
+            if ((md & TERMINATED) != 0)
                 break;
-        }
-
-        if ((short)(ctl >>> TC_SHIFT) + (config & SMASK) <= 0) {
-            runState = (STARTED | SHUTDOWN | STOP | TERMINATED); // final write
-            synchronized (this) {
-                notifyAll();                      // for awaitTermination
-            }
-        }
-
-        return -1;
-    }
-
-    // External operations
-
-    /**
-     * Constructs and tries to install a new external queue,
-     * failing if the workQueues array already has a queue at
-     * the given index.
-     *
-     * @param index the index of the new queue
-     */
-    private void tryCreateExternalQueue(int index) {
-        AuxState aux;
-        if ((aux = auxState) != null && index >= 0) {
-            WorkQueue q = new WorkQueue(this, null);
-            q.config = index;
-            q.scanState = ~UNSIGNALLED;
-            q.qlock = 1;                   // lock queue
-            boolean installed = false;
-            aux.lock();
-            try {                          // lock pool to install
-                WorkQueue[] ws;
-                if ((ws = workQueues) != null && index < ws.length &&
-                    ws[index] == null) {
-                    ws[index] = q;         // else throw away
-                    installed = true;
+            else if ((md & SMASK) + (short)(ctl >>> TC_SHIFT) > 0)
+                break;
+            else if (MODE.compareAndSet(this, md, md | TERMINATED)) {
+                synchronized (this) {
+                    notifyAll();                  // for awaitTermination
                 }
-            } finally {
-                aux.unlock();
-            }
-            if (installed) {
-                try {
-                    q.growArray();
-                } finally {
-                    q.qlock = 0;
-                }
-            }
-        }
-    }
-
-    /**
-     * Adds the given task to a submission queue at submitter's
-     * current queue. Also performs secondary initialization upon the
-     * first submission of the first task to the pool, and detects
-     * first submission by an external thread and creates a new shared
-     * queue if the one at index if empty or contended.
-     *
-     * @param task the task. Caller must ensure non-null.
-     */
-    final void externalPush(ForkJoinTask<?> task) {
-        int r;                            // initialize caller's probe
-        if ((r = ThreadLocalRandom.getProbe()) == 0) {
-            ThreadLocalRandom.localInit();
-            r = ThreadLocalRandom.getProbe();
-        }
-        for (;;) {
-            WorkQueue q; int wl, k, stat;
-            int rs = runState;
-            WorkQueue[] ws = workQueues;
-            if (rs <= 0 || ws == null || (wl = ws.length) <= 0)
-                tryInitialize(true);
-            else if ((q = ws[k = (wl - 1) & r & SQMASK]) == null)
-                tryCreateExternalQueue(k);
-            else if ((stat = q.sharedPush(task)) < 0)
-                break;
-            else if (stat == 0) {
-                signalWork();
                 break;
             }
-            else                          // move if busy
-                r = ThreadLocalRandom.advanceProbe(r);
         }
-    }
-
-    /**
-     * Pushes a possibly-external submission.
-     */
-    private <T> ForkJoinTask<T> externalSubmit(ForkJoinTask<T> task) {
-        Thread t; ForkJoinWorkerThread w; WorkQueue q;
-        if (task == null)
-            throw new NullPointerException();
-        if (((t = Thread.currentThread()) instanceof ForkJoinWorkerThread) &&
-            (w = (ForkJoinWorkerThread)t).pool == this &&
-            (q = w.workQueue) != null)
-            q.push(task);
-        else
-            externalPush(task);
-        return task;
-    }
-
-    /**
-     * Returns common pool queue for an external thread.
-     */
-    static WorkQueue commonSubmitterQueue() {
-        ForkJoinPool p = common;
-        int r = ThreadLocalRandom.getProbe();
-        WorkQueue[] ws; int wl;
-        return (p != null && (ws = p.workQueues) != null &&
-                (wl = ws.length) > 0) ?
-            ws[(wl - 1) & r & SQMASK] : null;
-    }
-
-    /**
-     * Performs tryUnpush for an external submitter.
-     */
-    final boolean tryExternalUnpush(ForkJoinTask<?> task) {
-        int r = ThreadLocalRandom.getProbe();
-        WorkQueue[] ws; WorkQueue w; int wl;
-        return ((ws = workQueues) != null &&
-                (wl = ws.length) > 0 &&
-                (w = ws[(wl - 1) & r & SQMASK]) != null &&
-                w.trySharedUnpush(task));
-    }
-
-    /**
-     * Performs helpComplete for an external submitter.
-     */
-    final int externalHelpComplete(CountedCompleter<?> task, int maxTasks) {
-        WorkQueue[] ws; int wl;
-        int r = ThreadLocalRandom.getProbe();
-        return ((ws = workQueues) != null && (wl = ws.length) > 0) ?
-            helpComplete(ws[(wl - 1) & r & SQMASK], task, maxTasks) : 0;
+        return true;
     }
 
     // Exported methods
@@ -2607,9 +2133,10 @@
 
     /**
      * Creates a {@code ForkJoinPool} with parallelism equal to {@link
-     * java.lang.Runtime#availableProcessors}, using the {@linkplain
-     * #defaultForkJoinWorkerThreadFactory default thread factory},
-     * no UncaughtExceptionHandler, and non-async LIFO processing mode.
+     * java.lang.Runtime#availableProcessors}, using defaults for all
+     * other parameters (see {@link #ForkJoinPool(int,
+     * ForkJoinWorkerThreadFactory, UncaughtExceptionHandler, boolean,
+     * int, int, int, Predicate, long, TimeUnit)}).
      *
      * @throws SecurityException if a security manager exists and
      *         the caller is not permitted to modify threads
@@ -2618,14 +2145,16 @@
      */
     public ForkJoinPool() {
         this(Math.min(MAX_CAP, Runtime.getRuntime().availableProcessors()),
-             defaultForkJoinWorkerThreadFactory, null, false);
+             defaultForkJoinWorkerThreadFactory, null, false,
+             0, MAX_CAP, 1, null, DEFAULT_KEEPALIVE, TimeUnit.MILLISECONDS);
     }
 
     /**
      * Creates a {@code ForkJoinPool} with the indicated parallelism
-     * level, the {@linkplain
-     * #defaultForkJoinWorkerThreadFactory default thread factory},
-     * no UncaughtExceptionHandler, and non-async LIFO processing mode.
+     * level, using defaults for all other parameters (see {@link
+     * #ForkJoinPool(int, ForkJoinWorkerThreadFactory,
+     * UncaughtExceptionHandler, boolean, int, int, int, Predicate,
+     * long, TimeUnit)}).
      *
      * @param parallelism the parallelism level
      * @throws IllegalArgumentException if parallelism less than or
@@ -2636,11 +2165,15 @@
      *         java.lang.RuntimePermission}{@code ("modifyThread")}
      */
     public ForkJoinPool(int parallelism) {
-        this(parallelism, defaultForkJoinWorkerThreadFactory, null, false);
+        this(parallelism, defaultForkJoinWorkerThreadFactory, null, false,
+             0, MAX_CAP, 1, null, DEFAULT_KEEPALIVE, TimeUnit.MILLISECONDS);
     }
 
     /**
-     * Creates a {@code ForkJoinPool} with the given parameters.
+     * Creates a {@code ForkJoinPool} with the given parameters (using
+     * defaults for others -- see {@link #ForkJoinPool(int,
+     * ForkJoinWorkerThreadFactory, UncaughtExceptionHandler, boolean,
+     * int, int, int, Predicate, long, TimeUnit)}).
      *
      * @param parallelism the parallelism level. For default value,
      * use {@link java.lang.Runtime#availableProcessors}.
@@ -2667,43 +2200,186 @@
                         ForkJoinWorkerThreadFactory factory,
                         UncaughtExceptionHandler handler,
                         boolean asyncMode) {
-        this(checkParallelism(parallelism),
-             checkFactory(factory),
-             handler,
-             asyncMode ? FIFO_QUEUE : LIFO_QUEUE,
-             "ForkJoinPool-" + nextPoolId() + "-worker-");
-        checkPermission();
-    }
-
-    private static int checkParallelism(int parallelism) {
-        if (parallelism <= 0 || parallelism > MAX_CAP)
-            throw new IllegalArgumentException();
-        return parallelism;
-    }
-
-    private static ForkJoinWorkerThreadFactory checkFactory
-        (ForkJoinWorkerThreadFactory factory) {
-        if (factory == null)
-            throw new NullPointerException();
-        return factory;
+        this(parallelism, factory, handler, asyncMode,
+             0, MAX_CAP, 1, null, DEFAULT_KEEPALIVE, TimeUnit.MILLISECONDS);
     }
 
     /**
-     * Creates a {@code ForkJoinPool} with the given parameters, without
-     * any security checks or parameter validation.  Invoked directly by
-     * makeCommonPool.
+     * Creates a {@code ForkJoinPool} with the given parameters.
+     *
+     * @param parallelism the parallelism level. For default value,
+     * use {@link java.lang.Runtime#availableProcessors}.
+     *
+     * @param factory the factory for creating new threads. For
+     * default value, use {@link #defaultForkJoinWorkerThreadFactory}.
+     *
+     * @param handler the handler for internal worker threads that
+     * terminate due to unrecoverable errors encountered while
+     * executing tasks. For default value, use {@code null}.
+     *
+     * @param asyncMode if true, establishes local first-in-first-out
+     * scheduling mode for forked tasks that are never joined. This
+     * mode may be more appropriate than default locally stack-based
+     * mode in applications in which worker threads only process
+     * event-style asynchronous tasks.  For default value, use {@code
+     * false}.
+     *
+     * @param corePoolSize the number of threads to keep in the pool
+     * (unless timed out after an elapsed keep-alive). Normally (and
+     * by default) this is the same value as the parallelism level,
+     * but may be set to a larger value to reduce dynamic overhead if
+     * tasks regularly block. Using a smaller value (for example
+     * {@code 0}) has the same effect as the default.
+     *
+     * @param maximumPoolSize the maximum number of threads allowed.
+     * When the maximum is reached, attempts to replace blocked
+     * threads fail.  (However, because creation and termination of
+     * different threads may overlap, and may be managed by the given
+     * thread factory, this value may be transiently exceeded.)  To
+     * arrange the same value as is used by default for the common
+     * pool, use {@code 256} plus the {@code parallelism} level. (By
+     * default, the common pool allows a maximum of 256 spare
+     * threads.)  Using a value (for example {@code
+     * Integer.MAX_VALUE}) larger than the implementation's total
+     * thread limit has the same effect as using this limit (which is
+     * the default).
+     *
+     * @param minimumRunnable the minimum allowed number of core
+     * threads not blocked by a join or {@link ManagedBlocker}.  To
+     * ensure progress, when too few unblocked threads exist and
+     * unexecuted tasks may exist, new threads are constructed, up to
+     * the given maximumPoolSize.  For the default value, use {@code
+     * 1}, that ensures liveness.  A larger value might improve
+     * throughput in the presence of blocked activities, but might
+     * not, due to increased overhead.  A value of zero may be
+     * acceptable when submitted tasks cannot have dependencies
+     * requiring additional threads.
+     *
+     * @param saturate if non-null, a predicate invoked upon attempts
+     * to create more than the maximum total allowed threads.  By
+     * default, when a thread is about to block on a join or {@link
+     * ManagedBlocker}, but cannot be replaced because the
+     * maximumPoolSize would be exceeded, a {@link
+     * RejectedExecutionException} is thrown.  But if this predicate
+     * returns {@code true}, then no exception is thrown, so the pool
+     * continues to operate with fewer than the target number of
+     * runnable threads, which might not ensure progress.
+     *
+     * @param keepAliveTime the elapsed time since last use before
+     * a thread is terminated (and then later replaced if needed).
+     * For the default value, use {@code 60, TimeUnit.SECONDS}.
+     *
+     * @param unit the time unit for the {@code keepAliveTime} argument
+     *
+     * @throws IllegalArgumentException if parallelism is less than or
+     *         equal to zero, or is greater than implementation limit,
+     *         or if maximumPoolSize is less than parallelism,
+     *         of if the keepAliveTime is less than or equal to zero.
+     * @throws NullPointerException if the factory is null
+     * @throws SecurityException if a security manager exists and
+     *         the caller is not permitted to modify threads
+     *         because it does not hold {@link
+     *         java.lang.RuntimePermission}{@code ("modifyThread")}
+     * @since 9
      */
-    private ForkJoinPool(int parallelism,
-                         ForkJoinWorkerThreadFactory factory,
-                         UncaughtExceptionHandler handler,
-                         int mode,
-                         String workerNamePrefix) {
-        this.workerNamePrefix = workerNamePrefix;
+    public ForkJoinPool(int parallelism,
+                        ForkJoinWorkerThreadFactory factory,
+                        UncaughtExceptionHandler handler,
+                        boolean asyncMode,
+                        int corePoolSize,
+                        int maximumPoolSize,
+                        int minimumRunnable,
+                        Predicate<? super ForkJoinPool> saturate,
+                        long keepAliveTime,
+                        TimeUnit unit) {
+        // check, encode, pack parameters
+        if (parallelism <= 0 || parallelism > MAX_CAP ||
+            maximumPoolSize < parallelism || keepAliveTime <= 0L)
+            throw new IllegalArgumentException();
+        if (factory == null)
+            throw new NullPointerException();
+        long ms = Math.max(unit.toMillis(keepAliveTime), TIMEOUT_SLOP);
+
+        int corep = Math.min(Math.max(corePoolSize, parallelism), MAX_CAP);
+        long c = ((((long)(-corep)       << TC_SHIFT) & TC_MASK) |
+                  (((long)(-parallelism) << RC_SHIFT) & RC_MASK));
+        int m = parallelism | (asyncMode ? FIFO : 0);
+        int maxSpares = Math.min(maximumPoolSize, MAX_CAP) - parallelism;
+        int minAvail = Math.min(Math.max(minimumRunnable, 0), MAX_CAP);
+        int b = ((minAvail - parallelism) & SMASK) | (maxSpares << SWIDTH);
+        int n = (parallelism > 1) ? parallelism - 1 : 1; // at least 2 slots
+        n |= n >>> 1; n |= n >>> 2; n |= n >>> 4; n |= n >>> 8; n |= n >>> 16;
+        n = (n + 1) << 1; // power of two, including space for submission queues
+
+        this.workerNamePrefix = "ForkJoinPool-" + nextPoolId() + "-worker-";
+        this.workQueues = new WorkQueue[n];
         this.factory = factory;
         this.ueh = handler;
-        this.config = (parallelism & SMASK) | mode;
-        long np = (long)(-parallelism); // offset ctl counts
-        this.ctl = ((np << AC_SHIFT) & AC_MASK) | ((np << TC_SHIFT) & TC_MASK);
+        this.saturate = saturate;
+        this.keepAlive = ms;
+        this.bounds = b;
+        this.mode = m;
+        this.ctl = c;
+        checkPermission();
+    }
+
+    private static Object newInstanceFromSystemProperty(String property)
+        throws ReflectiveOperationException {
+        String className = System.getProperty(property);
+        return (className == null)
+            ? null
+            : ClassLoader.getSystemClassLoader().loadClass(className)
+            .getConstructor().newInstance();
+    }
+
+    /**
+     * Constructor for common pool using parameters possibly
+     * overridden by system properties
+     */
+    private ForkJoinPool(byte forCommonPoolOnly) {
+        int parallelism = -1;
+        ForkJoinWorkerThreadFactory fac = null;
+        UncaughtExceptionHandler handler = null;
+        try {  // ignore exceptions in accessing/parsing properties
+            String pp = System.getProperty
+                ("java.util.concurrent.ForkJoinPool.common.parallelism");
+            if (pp != null)
+                parallelism = Integer.parseInt(pp);
+            fac = (ForkJoinWorkerThreadFactory) newInstanceFromSystemProperty(
+                "java.util.concurrent.ForkJoinPool.common.threadFactory");
+            handler = (UncaughtExceptionHandler) newInstanceFromSystemProperty(
+                "java.util.concurrent.ForkJoinPool.common.exceptionHandler");
+        } catch (Exception ignore) {
+        }
+
+        if (fac == null) {
+            if (System.getSecurityManager() == null)
+                fac = defaultForkJoinWorkerThreadFactory;
+            else // use security-managed default
+                fac = new InnocuousForkJoinWorkerThreadFactory();
+        }
+        if (parallelism < 0 && // default 1 less than #cores
+            (parallelism = Runtime.getRuntime().availableProcessors() - 1) <= 0)
+            parallelism = 1;
+        if (parallelism > MAX_CAP)
+            parallelism = MAX_CAP;
+
+        long c = ((((long)(-parallelism) << TC_SHIFT) & TC_MASK) |
+                  (((long)(-parallelism) << RC_SHIFT) & RC_MASK));
+        int b = ((1 - parallelism) & SMASK) | (COMMON_MAX_SPARES << SWIDTH);
+        int n = (parallelism > 1) ? parallelism - 1 : 1;
+        n |= n >>> 1; n |= n >>> 2; n |= n >>> 4; n |= n >>> 8; n |= n >>> 16;
+        n = (n + 1) << 1;
+
+        this.workerNamePrefix = "ForkJoinPool.commonPool-worker-";
+        this.workQueues = new WorkQueue[n];
+        this.factory = fac;
+        this.ueh = handler;
+        this.saturate = null;
+        this.keepAlive = DEFAULT_KEEPALIVE;
+        this.bounds = b;
+        this.mode = parallelism;
+        this.ctl = c;
     }
 
     /**
@@ -2817,15 +2493,13 @@
      * @throws RejectedExecutionException if the task cannot be
      *         scheduled for execution
      */
+    @SuppressWarnings("unchecked")
     public ForkJoinTask<?> submit(Runnable task) {
         if (task == null)
             throw new NullPointerException();
-        ForkJoinTask<?> job;
-        if (task instanceof ForkJoinTask<?>) // avoid re-wrap
-            job = (ForkJoinTask<?>) task;
-        else
-            job = new ForkJoinTask.AdaptedRunnableAction(task);
-        return externalSubmit(job);
+        return externalSubmit((task instanceof ForkJoinTask<?>)
+            ? (ForkJoinTask<Void>) task // avoid re-wrap
+            : new ForkJoinTask.AdaptedRunnableAction(task));
     }
 
     /**
@@ -2879,8 +2553,8 @@
      * @return the targeted parallelism level of this pool
      */
     public int getParallelism() {
-        int par;
-        return ((par = config & SMASK) > 0) ? par : 1;
+        int par = mode & SMASK;
+        return (par > 0) ? par : 1;
     }
 
     /**
@@ -2902,7 +2576,7 @@
      * @return the number of worker threads
      */
     public int getPoolSize() {
-        return (config & SMASK) + (short)(ctl >>> TC_SHIFT);
+        return ((mode & SMASK) + (short)(ctl >>> TC_SHIFT));
     }
 
     /**
@@ -2912,7 +2586,7 @@
      * @return {@code true} if this pool uses async mode
      */
     public boolean getAsyncMode() {
-        return (config & FIFO_QUEUE) != 0;
+        return (mode & FIFO) != 0;
     }
 
     /**
@@ -2924,8 +2598,9 @@
      * @return the number of worker threads
      */
     public int getRunningThreadCount() {
-        int rc = 0;
         WorkQueue[] ws; WorkQueue w;
+        VarHandle.acquireFence();
+        int rc = 0;
         if ((ws = workQueues) != null) {
             for (int i = 1; i < ws.length; i += 2) {
                 if ((w = ws[i]) != null && w.isApparentlyUnblocked())
@@ -2943,7 +2618,7 @@
      * @return the number of active threads
      */
     public int getActiveThreadCount() {
-        int r = (config & SMASK) + (int)(ctl >> AC_SHIFT);
+        int r = (mode & SMASK) + (int)(ctl >> RC_SHIFT);
         return (r <= 0) ? 0 : r; // suppress momentarily negative values
     }
 
@@ -2959,7 +2634,30 @@
      * @return {@code true} if all threads are currently idle
      */
     public boolean isQuiescent() {
-        return (config & SMASK) + (int)(ctl >> AC_SHIFT) <= 0;
+        for (;;) {
+            long c = ctl;
+            int md = mode, pc = md & SMASK;
+            int tc = pc + (short)(c >>> TC_SHIFT);
+            int rc = pc + (int)(c >> RC_SHIFT);
+            if ((md & (STOP | TERMINATED)) != 0)
+                return true;
+            else if (rc > 0)
+                return false;
+            else {
+                WorkQueue[] ws; WorkQueue v;
+                if ((ws = workQueues) != null) {
+                    for (int i = 1; i < ws.length; i += 2) {
+                        if ((v = ws[i]) != null) {
+                            if (v.source > 0)
+                                return false;
+                            --tc;
+                        }
+                    }
+                }
+                if (tc == 0 && ctl == c)
+                    return true;
+            }
+        }
     }
 
     /**
@@ -2974,13 +2672,12 @@
      * @return the number of steals
      */
     public long getStealCount() {
-        AuxState sc = auxState;
-        long count = (sc == null) ? 0L : sc.stealCount;
+        long count = stealCount;
         WorkQueue[] ws; WorkQueue w;
         if ((ws = workQueues) != null) {
             for (int i = 1; i < ws.length; i += 2) {
                 if ((w = ws[i]) != null)
-                    count += w.nsteals;
+                    count += (long)w.nsteals & 0xffffffffL;
             }
         }
         return count;
@@ -2997,8 +2694,9 @@
      * @return the number of queued tasks
      */
     public long getQueuedTaskCount() {
-        long count = 0;
         WorkQueue[] ws; WorkQueue w;
+        VarHandle.acquireFence();
+        int count = 0;
         if ((ws = workQueues) != null) {
             for (int i = 1; i < ws.length; i += 2) {
                 if ((w = ws[i]) != null)
@@ -3016,8 +2714,9 @@
      * @return the number of queued submissions
      */
     public int getQueuedSubmissionCount() {
-        int count = 0;
         WorkQueue[] ws; WorkQueue w;
+        VarHandle.acquireFence();
+        int count = 0;
         if ((ws = workQueues) != null) {
             for (int i = 0; i < ws.length; i += 2) {
                 if ((w = ws[i]) != null)
@@ -3035,6 +2734,7 @@
      */
     public boolean hasQueuedSubmissions() {
         WorkQueue[] ws; WorkQueue w;
+        VarHandle.acquireFence();
         if ((ws = workQueues) != null) {
             for (int i = 0; i < ws.length; i += 2) {
                 if ((w = ws[i]) != null && !w.isEmpty())
@@ -3052,15 +2752,7 @@
      * @return the next submission, or {@code null} if none
      */
     protected ForkJoinTask<?> pollSubmission() {
-        WorkQueue[] ws; int wl; WorkQueue w; ForkJoinTask<?> t;
-        int r = ThreadLocalRandom.nextSecondarySeed();
-        if ((ws = workQueues) != null && (wl = ws.length) > 0) {
-            for (int m = wl - 1, i = 0; i < wl; ++i) {
-                if ((w = ws[(i << 1) & m]) != null && (t = w.poll()) != null)
-                    return t;
-            }
-        }
-        return null;
+        return pollScan(true);
     }
 
     /**
@@ -3081,8 +2773,9 @@
      * @return the number of elements transferred
      */
     protected int drainTasksTo(Collection<? super ForkJoinTask<?>> c) {
-        int count = 0;
         WorkQueue[] ws; WorkQueue w; ForkJoinTask<?> t;
+        VarHandle.acquireFence();
+        int count = 0;
         if ((ws = workQueues) != null) {
             for (int i = 0; i < ws.length; ++i) {
                 if ((w = ws[i]) != null) {
@@ -3105,10 +2798,10 @@
      */
     public String toString() {
         // Use a single pass through workQueues to collect counts
-        long qt = 0L, qs = 0L; int rc = 0;
-        AuxState sc = auxState;
-        long st = (sc == null) ? 0L : sc.stealCount;
+        int md = mode; // read volatile fields first
         long c = ctl;
+        long st = stealCount;
+        long qt = 0L, qs = 0L; int rc = 0;
         WorkQueue[] ws; WorkQueue w;
         if ((ws = workQueues) != null) {
             for (int i = 0; i < ws.length; ++i) {
@@ -3118,22 +2811,22 @@
                         qs += size;
                     else {
                         qt += size;
-                        st += w.nsteals;
+                        st += (long)w.nsteals & 0xffffffffL;
                         if (w.isApparentlyUnblocked())
                             ++rc;
                     }
                 }
             }
         }
-        int pc = (config & SMASK);
+
+        int pc = (md & SMASK);
         int tc = pc + (short)(c >>> TC_SHIFT);
-        int ac = pc + (int)(c >> AC_SHIFT);
+        int ac = pc + (int)(c >> RC_SHIFT);
         if (ac < 0) // ignore transient negative
             ac = 0;
-        int rs = runState;
-        String level = ((rs & TERMINATED) != 0 ? "Terminated" :
-                        (rs & STOP)       != 0 ? "Terminating" :
-                        (rs & SHUTDOWN)   != 0 ? "Shutting down" :
+        String level = ((md & TERMINATED) != 0 ? "Terminated" :
+                        (md & STOP)       != 0 ? "Terminating" :
+                        (md & SHUTDOWN)   != 0 ? "Shutting down" :
                         "Running");
         return super.toString() +
             "[" + level +
@@ -3196,7 +2889,7 @@
      * @return {@code true} if all tasks have completed following shut down
      */
     public boolean isTerminated() {
-        return (runState & TERMINATED) != 0;
+        return (mode & TERMINATED) != 0;
     }
 
     /**
@@ -3213,8 +2906,8 @@
      * @return {@code true} if terminating but not yet terminated
      */
     public boolean isTerminating() {
-        int rs = runState;
-        return (rs & STOP) != 0 && (rs & TERMINATED) == 0;
+        int md = mode;
+        return (md & STOP) != 0 && (md & TERMINATED) == 0;
     }
 
     /**
@@ -3223,7 +2916,7 @@
      * @return {@code true} if this pool has been shut down
      */
     public boolean isShutdown() {
-        return (runState & SHUTDOWN) != 0;
+        return (mode & SHUTDOWN) != 0;
     }
 
     /**
@@ -3287,30 +2980,19 @@
             helpQuiescePool(wt.workQueue);
             return true;
         }
-        long startTime = System.nanoTime();
-        WorkQueue[] ws;
-        int r = 0, wl;
-        boolean found = true;
-        while (!isQuiescent() && (ws = workQueues) != null &&
-               (wl = ws.length) > 0) {
-            if (!found) {
-                if ((System.nanoTime() - startTime) > nanos)
+        else {
+            for (long startTime = System.nanoTime();;) {
+                ForkJoinTask<?> t;
+                if ((t = pollScan(false)) != null)
+                    t.doExec();
+                else if (isQuiescent())
+                    return true;
+                else if ((System.nanoTime() - startTime) > nanos)
                     return false;
-                Thread.yield(); // cannot block
-            }
-            found = false;
-            for (int m = wl - 1, j = (m + 1) << 2; j >= 0; --j) {
-                ForkJoinTask<?> t; WorkQueue q; int b, k;
-                if ((k = r++ & m) <= m && k >= 0 && (q = ws[k]) != null &&
-                    (b = q.base) - q.top < 0) {
-                    found = true;
-                    if ((t = q.pollAt(b)) != null)
-                        t.doExec();
-                    break;
-                }
+                else
+                    Thread.yield(); // cannot block
             }
         }
-        return true;
     }
 
     /**
@@ -3423,19 +3105,22 @@
      */
     public static void managedBlock(ManagedBlocker blocker)
         throws InterruptedException {
+        if (blocker == null) throw new NullPointerException();
         ForkJoinPool p;
         ForkJoinWorkerThread wt;
+        WorkQueue w;
         Thread t = Thread.currentThread();
         if ((t instanceof ForkJoinWorkerThread) &&
-            (p = (wt = (ForkJoinWorkerThread)t).pool) != null) {
-            WorkQueue w = wt.workQueue;
+            (p = (wt = (ForkJoinWorkerThread)t).pool) != null &&
+            (w = wt.workQueue) != null) {
+            int block;
             while (!blocker.isReleasable()) {
-                if (p.tryCompensate(w)) {
+                if ((block = p.tryCompensate(w)) != 0) {
                     try {
                         do {} while (!blocker.isReleasable() &&
                                      !blocker.block());
                     } finally {
-                        U.getAndAddLong(p, CTL, AC_UNIT);
+                        CTL.getAndAdd(p, (block > 0) ? RC_UNIT : 0L);
                     }
                     break;
                 }
@@ -3447,6 +3132,29 @@
         }
     }
 
+    /**
+     * If the given executor is a ForkJoinPool, poll and execute
+     * AsynchronousCompletionTasks from worker's queue until none are
+     * available or blocker is released.
+     */
+    static void helpAsyncBlocker(Executor e, ManagedBlocker blocker) {
+        if (e instanceof ForkJoinPool) {
+            WorkQueue w; ForkJoinWorkerThread wt; WorkQueue[] ws; int r, n;
+            ForkJoinPool p = (ForkJoinPool)e;
+            Thread thread = Thread.currentThread();
+            if (thread instanceof ForkJoinWorkerThread &&
+                (wt = (ForkJoinWorkerThread)thread).pool == p)
+                w = wt.workQueue;
+            else if ((r = ThreadLocalRandom.getProbe()) != 0 &&
+                     (ws = p.workQueues) != null && (n = ws.length) > 0)
+                w = ws[(n - 1) & r & SQMASK];
+            else
+                w = null;
+            if (w != null)
+                w.helpAsyncBlocker(blocker);
+        }
+    }
+
     // AbstractExecutorService overrides.  These rely on undocumented
     // fact that ForkJoinTask.adapt returns ForkJoinTasks that also
     // implement RunnableFuture.
@@ -3459,26 +3167,19 @@
         return new ForkJoinTask.AdaptedCallable<T>(callable);
     }
 
-    // Unsafe mechanics
-    private static final sun.misc.Unsafe U = sun.misc.Unsafe.getUnsafe();
-    private static final long CTL;
-    private static final long RUNSTATE;
-    private static final int ABASE;
-    private static final int ASHIFT;
+    // VarHandle mechanics
+    private static final VarHandle CTL;
+    private static final VarHandle MODE;
+    static final VarHandle QA;
 
     static {
         try {
-            CTL = U.objectFieldOffset
-                (ForkJoinPool.class.getDeclaredField("ctl"));
-            RUNSTATE = U.objectFieldOffset
-                (ForkJoinPool.class.getDeclaredField("runState"));
-            ABASE = U.arrayBaseOffset(ForkJoinTask[].class);
-            int scale = U.arrayIndexScale(ForkJoinTask[].class);
-            if ((scale & (scale - 1)) != 0)
-                throw new Error("array index scale not a power of two");
-            ASHIFT = 31 - Integer.numberOfLeadingZeros(scale);
+            MethodHandles.Lookup l = MethodHandles.lookup();
+            CTL = l.findVarHandle(ForkJoinPool.class, "ctl", long.class);
+            MODE = l.findVarHandle(ForkJoinPool.class, "mode", int.class);
+            QA = MethodHandles.arrayElementVarHandle(ForkJoinTask[].class);
         } catch (ReflectiveOperationException e) {
-            throw new Error(e);
+            throw new ExceptionInInitializerError(e);
         }
 
         // Reduce the risk of rare disastrous classloading in first call to
@@ -3498,52 +3199,11 @@
             new DefaultForkJoinWorkerThreadFactory();
         modifyThreadPermission = new RuntimePermission("modifyThread");
 
-        common = java.security.AccessController.doPrivileged
-            (new java.security.PrivilegedAction<ForkJoinPool>() {
-                public ForkJoinPool run() { return makeCommonPool(); }});
+        common = AccessController.doPrivileged(new PrivilegedAction<>() {
+            public ForkJoinPool run() {
+                return new ForkJoinPool((byte)0); }});
 
-        // report 1 even if threads disabled
-        COMMON_PARALLELISM = Math.max(common.config & SMASK, 1);
-    }
-
-    /**
-     * Creates and returns the common pool, respecting user settings
-     * specified via system properties.
-     */
-    static ForkJoinPool makeCommonPool() {
-        int parallelism = -1;
-        ForkJoinWorkerThreadFactory factory = null;
-        UncaughtExceptionHandler handler = null;
-        try {  // ignore exceptions in accessing/parsing properties
-            String pp = System.getProperty
-                ("java.util.concurrent.ForkJoinPool.common.parallelism");
-            String fp = System.getProperty
-                ("java.util.concurrent.ForkJoinPool.common.threadFactory");
-            String hp = System.getProperty
-                ("java.util.concurrent.ForkJoinPool.common.exceptionHandler");
-            if (pp != null)
-                parallelism = Integer.parseInt(pp);
-            if (fp != null)
-                factory = ((ForkJoinWorkerThreadFactory)ClassLoader.
-                           getSystemClassLoader().loadClass(fp).newInstance());
-            if (hp != null)
-                handler = ((UncaughtExceptionHandler)ClassLoader.
-                           getSystemClassLoader().loadClass(hp).newInstance());
-        } catch (Exception ignore) {
-        }
-        if (factory == null) {
-            if (System.getSecurityManager() == null)
-                factory = defaultForkJoinWorkerThreadFactory;
-            else // use security-managed default
-                factory = new InnocuousForkJoinWorkerThreadFactory();
-        }
-        if (parallelism < 0 && // default 1 less than #cores
-            (parallelism = Runtime.getRuntime().availableProcessors() - 1) <= 0)
-            parallelism = 1;
-        if (parallelism > MAX_CAP)
-            parallelism = MAX_CAP;
-        return new ForkJoinPool(parallelism, factory, handler, LIFO_QUEUE,
-                                "ForkJoinPool.commonPool-worker-");
+        COMMON_PARALLELISM = Math.max(common.mode & SMASK, 1);
     }
 
     /**
@@ -3556,27 +3216,20 @@
          * An ACC to restrict permissions for the factory itself.
          * The constructed workers have no permissions set.
          */
-        private static final AccessControlContext innocuousAcc;
-        static {
-            Permissions innocuousPerms = new Permissions();
-            innocuousPerms.add(modifyThreadPermission);
-            innocuousPerms.add(new RuntimePermission(
-                                   "enableContextClassLoaderOverride"));
-            innocuousPerms.add(new RuntimePermission(
-                                   "modifyThreadGroup"));
-            innocuousAcc = new AccessControlContext(new ProtectionDomain[] {
-                    new ProtectionDomain(null, innocuousPerms)
-                });
-        }
+        private static final AccessControlContext ACC = contextWithPermissions(
+            modifyThreadPermission,
+            new RuntimePermission("enableContextClassLoaderOverride"),
+            new RuntimePermission("modifyThreadGroup"),
+            new RuntimePermission("getClassLoader"),
+            new RuntimePermission("setContextClassLoader"));
 
         public final ForkJoinWorkerThread newThread(ForkJoinPool pool) {
-            return java.security.AccessController.doPrivileged(
-                new java.security.PrivilegedAction<ForkJoinWorkerThread>() {
+            return AccessController.doPrivileged(
+                new PrivilegedAction<>() {
                     public ForkJoinWorkerThread run() {
                         return new ForkJoinWorkerThread.
-                            InnocuousForkJoinWorkerThread(pool);
-                    }}, innocuousAcc);
+                            InnocuousForkJoinWorkerThread(pool); }},
+                ACC);
         }
     }
-
 }
diff --git a/ojluni/src/main/java/java/util/concurrent/ForkJoinTask.java b/ojluni/src/main/java/java/util/concurrent/ForkJoinTask.java
index efccfa5..fd28e84 100644
--- a/ojluni/src/main/java/java/util/concurrent/ForkJoinTask.java
+++ b/ojluni/src/main/java/java/util/concurrent/ForkJoinTask.java
@@ -36,6 +36,8 @@
 package java.util.concurrent;
 
 import java.io.Serializable;
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.VarHandle;
 import java.lang.ref.ReferenceQueue;
 import java.lang.ref.WeakReference;
 import java.lang.reflect.Constructor;
@@ -96,7 +98,7 @@
  * encountering the exception; minimally only the latter.
  *
  * <p>It is possible to define and use ForkJoinTasks that may block,
- * but doing do requires three further considerations: (1) Completion
+ * but doing so requires three further considerations: (1) Completion
  * of few if any <em>other</em> tasks should be dependent on a task
  * that blocks on external synchronization or I/O. Event-style async
  * tasks that are never joined (for example, those subclassing {@link
@@ -136,11 +138,11 @@
  * {@link #isCompletedNormally} is true if a task completed without
  * cancellation or encountering an exception; {@link #isCancelled} is
  * true if the task was cancelled (in which case {@link #getException}
- * returns a {@link java.util.concurrent.CancellationException}); and
+ * returns a {@link CancellationException}); and
  * {@link #isCompletedAbnormally} is true if a task was either
  * cancelled or encountered an exception, in which case {@link
  * #getException} will return either the encountered exception or
- * {@link java.util.concurrent.CancellationException}.
+ * {@link CancellationException}.
  *
  * <p>The ForkJoinTask class is not usually directly subclassed.
  * Instead, you subclass one of the abstract classes that support a
@@ -221,52 +223,59 @@
      * methods in a way that flows well in javadocs.
      */
 
-    /*
+    /**
      * The status field holds run control status bits packed into a
-     * single int to minimize footprint and to ensure atomicity (via
-     * CAS).  Status is initially zero, and takes on nonnegative
-     * values until completed, upon which status (anded with
-     * DONE_MASK) holds value NORMAL, CANCELLED, or EXCEPTIONAL. Tasks
-     * undergoing blocking waits by other threads have the SIGNAL bit
-     * set.  Completion of a stolen task with SIGNAL set awakens any
-     * waiters via notifyAll. Even though suboptimal for some
-     * purposes, we use basic builtin wait/notify to take advantage of
-     * "monitor inflation" in JVMs that we would otherwise need to
-     * emulate to avoid adding further per-task bookkeeping overhead.
-     * We want these monitors to be "fat", i.e., not use biasing or
-     * thin-lock techniques, so use some odd coding idioms that tend
-     * to avoid them, mainly by arranging that every synchronized
-     * block performs a wait, notifyAll or both.
+     * single int to ensure atomicity.  Status is initially zero, and
+     * takes on nonnegative values until completed, upon which it
+     * holds (sign bit) DONE, possibly with ABNORMAL (cancelled or
+     * exceptional) and THROWN (in which case an exception has been
+     * stored). Tasks with dependent blocked waiting joiners have the
+     * SIGNAL bit set.  Completion of a task with SIGNAL set awakens
+     * any waiters via notifyAll. (Waiters also help signal others
+     * upon completion.)
      *
      * These control bits occupy only (some of) the upper half (16
      * bits) of status field. The lower bits are used for user-defined
      * tags.
      */
-
-    /** The run status of this task */
     volatile int status; // accessed directly by pool and workers
-    static final int DONE_MASK   = 0xf0000000;  // mask out non-completion bits
-    static final int NORMAL      = 0xf0000000;  // must be negative
-    static final int CANCELLED   = 0xc0000000;  // must be < NORMAL
-    static final int EXCEPTIONAL = 0x80000000;  // must be < CANCELLED
-    static final int SIGNAL      = 0x00010000;  // must be >= 1 << 16
-    static final int SMASK       = 0x0000ffff;  // short bits for tags
+
+    private static final int DONE     = 1 << 31; // must be negative
+    private static final int ABNORMAL = 1 << 18; // set atomically with DONE
+    private static final int THROWN   = 1 << 17; // set atomically with ABNORMAL
+    private static final int SIGNAL   = 1 << 16; // true if joiner waiting
+    private static final int SMASK    = 0xffff;  // short bits for tags
+
+    static boolean isExceptionalStatus(int s) {  // needed by subclasses
+        return (s & THROWN) != 0;
+    }
 
     /**
-     * Marks completion and wakes up threads waiting to join this
-     * task.
+     * Sets DONE status and wakes up threads waiting to join this task.
      *
-     * @param completion one of NORMAL, CANCELLED, EXCEPTIONAL
-     * @return completion status on exit
+     * @return status on exit
      */
-    private int setCompletion(int completion) {
-        for (int s;;) {
+    private int setDone() {
+        int s;
+        if (((s = (int)STATUS.getAndBitwiseOr(this, DONE)) & SIGNAL) != 0)
+            synchronized (this) { notifyAll(); }
+        return s | DONE;
+    }
+
+    /**
+     * Marks cancelled or exceptional completion unless already done.
+     *
+     * @param completion must be DONE | ABNORMAL, ORed with THROWN if exceptional
+     * @return status on exit
+     */
+    private int abnormalCompletion(int completion) {
+        for (int s, ns;;) {
             if ((s = status) < 0)
                 return s;
-            if (U.compareAndSwapInt(this, STATUS, s, s | completion)) {
-                if ((s >>> 16) != 0)
+            else if (STATUS.weakCompareAndSet(this, s, ns = s | completion)) {
+                if ((s & SIGNAL) != 0)
                     synchronized (this) { notifyAll(); }
-                return completion;
+                return ns;
             }
         }
     }
@@ -284,10 +293,11 @@
             try {
                 completed = exec();
             } catch (Throwable rex) {
-                return setExceptionalCompletion(rex);
+                completed = false;
+                s = setExceptionalCompletion(rex);
             }
             if (completed)
-                s = setCompletion(NORMAL);
+                s = setDone();
         }
         return s;
     }
@@ -299,9 +309,7 @@
      * @param timeout using Object.wait conventions.
      */
     final void internalWait(long timeout) {
-        int s;
-        if ((s = status) >= 0 && // force completer to issue notify
-            U.compareAndSwapInt(this, STATUS, s, s | SIGNAL)) {
+        if ((int)STATUS.getAndBitwiseOr(this, SIGNAL) >= 0) {
             synchronized (this) {
                 if (status >= 0)
                     try { wait(timeout); } catch (InterruptedException ie) { }
@@ -316,27 +324,24 @@
      * @return status upon completion
      */
     private int externalAwaitDone() {
-        int s = ((this instanceof CountedCompleter) ? // try helping
-                 ForkJoinPool.common.externalHelpComplete(
-                     (CountedCompleter<?>)this, 0) :
-                 ForkJoinPool.common.tryExternalUnpush(this) ? doExec() : 0);
-        if (s >= 0 && (s = status) >= 0) {
+        int s = tryExternalHelp();
+        if (s >= 0 && (s = (int)STATUS.getAndBitwiseOr(this, SIGNAL)) >= 0) {
             boolean interrupted = false;
-            do {
-                if (U.compareAndSwapInt(this, STATUS, s, s | SIGNAL)) {
-                    synchronized (this) {
-                        if (status >= 0) {
-                            try {
-                                wait(0L);
-                            } catch (InterruptedException ie) {
-                                interrupted = true;
-                            }
+            synchronized (this) {
+                for (;;) {
+                    if ((s = status) >= 0) {
+                        try {
+                            wait(0L);
+                        } catch (InterruptedException ie) {
+                            interrupted = true;
                         }
-                        else
-                            notifyAll();
+                    }
+                    else {
+                        notifyAll();
+                        break;
                     }
                 }
-            } while ((s = status) >= 0);
+            }
             if (interrupted)
                 Thread.currentThread().interrupt();
         }
@@ -347,30 +352,40 @@
      * Blocks a non-worker-thread until completion or interruption.
      */
     private int externalInterruptibleAwaitDone() throws InterruptedException {
-        int s;
-        if (Thread.interrupted())
-            throw new InterruptedException();
-        if ((s = status) >= 0 &&
-            (s = ((this instanceof CountedCompleter) ?
-                  ForkJoinPool.common.externalHelpComplete(
-                      (CountedCompleter<?>)this, 0) :
-                  ForkJoinPool.common.tryExternalUnpush(this) ? doExec() :
-                  0)) >= 0) {
-            while ((s = status) >= 0) {
-                if (U.compareAndSwapInt(this, STATUS, s, s | SIGNAL)) {
-                    synchronized (this) {
-                        if (status >= 0)
-                            wait(0L);
-                        else
-                            notifyAll();
+        int s = tryExternalHelp();
+        if (s >= 0 && (s = (int)STATUS.getAndBitwiseOr(this, SIGNAL)) >= 0) {
+            synchronized (this) {
+                for (;;) {
+                    if ((s = status) >= 0)
+                        wait(0L);
+                    else {
+                        notifyAll();
+                        break;
                     }
                 }
             }
         }
+        else if (Thread.interrupted())
+            throw new InterruptedException();
         return s;
     }
 
     /**
+     * Tries to help with tasks allowed for external callers.
+     *
+     * @return current status
+     */
+    private int tryExternalHelp() {
+        int s;
+        return ((s = status) < 0 ? s:
+                (this instanceof CountedCompleter) ?
+                ForkJoinPool.common.externalHelpComplete(
+                    (CountedCompleter<?>)this, 0) :
+                ForkJoinPool.common.tryExternalUnpush(this) ?
+                doExec() : 0);
+    }
+
+    /**
      * Implementation for join, get, quietlyJoin. Directly handles
      * only cases of already-completed, external wait, and
      * unfork+exec.  Others are relayed to ForkJoinPool.awaitJoin.
@@ -404,22 +419,24 @@
     // Exception table support
 
     /**
-     * Table of exceptions thrown by tasks, to enable reporting by
-     * callers. Because exceptions are rare, we don't directly keep
+     * Hash table of exceptions thrown by tasks, to enable reporting
+     * by callers. Because exceptions are rare, we don't directly keep
      * them with task objects, but instead use a weak ref table.  Note
      * that cancellation exceptions don't appear in the table, but are
      * instead recorded as status values.
      *
-     * Note: These statics are initialized below in static block.
+     * The exception table has a fixed capacity.
      */
-    private static final ExceptionNode[] exceptionTable;
-    private static final ReentrantLock exceptionTableLock;
-    private static final ReferenceQueue<Object> exceptionTableRefQueue;
+    private static final ExceptionNode[] exceptionTable
+        = new ExceptionNode[32];
 
-    /**
-     * Fixed capacity for exceptionTable.
-     */
-    private static final int EXCEPTION_MAP_CAPACITY = 32;
+    /** Lock protecting access to exceptionTable. */
+    private static final ReentrantLock exceptionTableLock
+        = new ReentrantLock();
+
+    /** Reference queue of stale exceptionally completed tasks. */
+    private static final ReferenceQueue<ForkJoinTask<?>> exceptionTableRefQueue
+        = new ReferenceQueue<>();
 
     /**
      * Key-value nodes for exception table.  The chained hash table
@@ -439,7 +456,7 @@
         final long thrower;  // use id not ref to avoid weak cycles
         final int hashCode;  // store task hashCode before weak ref disappears
         ExceptionNode(ForkJoinTask<?> task, Throwable ex, ExceptionNode next,
-                      ReferenceQueue<Object> exceptionTableRefQueue) {
+                      ReferenceQueue<ForkJoinTask<?>> exceptionTableRefQueue) {
             super(task, exceptionTableRefQueue);
             this.ex = ex;
             this.next = next;
@@ -475,7 +492,7 @@
             } finally {
                 lock.unlock();
             }
-            s = setCompletion(EXCEPTIONAL);
+            s = abnormalCompletion(DONE | ABNORMAL | THROWN);
         }
         return s;
     }
@@ -487,7 +504,7 @@
      */
     private int setExceptionalCompletion(Throwable ex) {
         int s = recordExceptionalCompletion(ex);
-        if ((s & DONE_MASK) == EXCEPTIONAL)
+        if ((s & THROWN) != 0)
             internalPropagateException(ex);
         return s;
     }
@@ -603,9 +620,8 @@
     private static void expungeStaleExceptions() {
         for (Object x; (x = exceptionTableRefQueue.poll()) != null;) {
             if (x instanceof ExceptionNode) {
-                int hashCode = ((ExceptionNode)x).hashCode;
                 ExceptionNode[] t = exceptionTable;
-                int i = hashCode & (t.length - 1);
+                int i = ((ExceptionNode)x).hashCode & (t.length - 1);
                 ExceptionNode e = t[i];
                 ExceptionNode pred = null;
                 while (e != null) {
@@ -663,10 +679,8 @@
      * Throws exception, if any, associated with the given status.
      */
     private void reportException(int s) {
-        if (s == CANCELLED)
-            throw new CancellationException();
-        if (s == EXCEPTIONAL)
-            rethrow(getThrowableException());
+        rethrow((s & THROWN) != 0 ? getThrowableException() :
+                new CancellationException());
     }
 
     // public methods
@@ -696,19 +710,19 @@
     }
 
     /**
-     * Returns the result of the computation when it {@link #isDone is
-     * done}.  This method differs from {@link #get()} in that
-     * abnormal completion results in {@code RuntimeException} or
-     * {@code Error}, not {@code ExecutionException}, and that
-     * interrupts of the calling thread do <em>not</em> cause the
-     * method to abruptly return by throwing {@code
-     * InterruptedException}.
+     * Returns the result of the computation when it
+     * {@linkplain #isDone is done}.
+     * This method differs from {@link #get()} in that abnormal
+     * completion results in {@code RuntimeException} or {@code Error},
+     * not {@code ExecutionException}, and that interrupts of the
+     * calling thread do <em>not</em> cause the method to abruptly
+     * return by throwing {@code InterruptedException}.
      *
      * @return the computed result
      */
     public final V join() {
         int s;
-        if ((s = doJoin() & DONE_MASK) != NORMAL)
+        if (((s = doJoin()) & ABNORMAL) != 0)
             reportException(s);
         return getRawResult();
     }
@@ -723,7 +737,7 @@
      */
     public final V invoke() {
         int s;
-        if ((s = doInvoke() & DONE_MASK) != NORMAL)
+        if (((s = doInvoke()) & ABNORMAL) != 0)
             reportException(s);
         return getRawResult();
     }
@@ -748,9 +762,9 @@
     public static void invokeAll(ForkJoinTask<?> t1, ForkJoinTask<?> t2) {
         int s1, s2;
         t2.fork();
-        if ((s1 = t1.doInvoke() & DONE_MASK) != NORMAL)
+        if (((s1 = t1.doInvoke()) & ABNORMAL) != 0)
             t1.reportException(s1);
-        if ((s2 = t2.doJoin() & DONE_MASK) != NORMAL)
+        if (((s2 = t2.doJoin()) & ABNORMAL) != 0)
             t2.reportException(s2);
     }
 
@@ -780,7 +794,7 @@
             }
             else if (i != 0)
                 t.fork();
-            else if (t.doInvoke() < NORMAL && ex == null)
+            else if ((t.doInvoke() & ABNORMAL) != 0 && ex == null)
                 ex = t.getException();
         }
         for (int i = 1; i <= last; ++i) {
@@ -788,7 +802,7 @@
             if (t != null) {
                 if (ex != null)
                     t.cancel(false);
-                else if (t.doJoin() < NORMAL)
+                else if ((t.doJoin() & ABNORMAL) != 0)
                     ex = t.getException();
             }
         }
@@ -816,7 +830,7 @@
      */
     public static <T extends ForkJoinTask<?>> Collection<T> invokeAll(Collection<T> tasks) {
         if (!(tasks instanceof RandomAccess) || !(tasks instanceof List<?>)) {
-            invokeAll(tasks.toArray(new ForkJoinTask<?>[tasks.size()]));
+            invokeAll(tasks.toArray(new ForkJoinTask<?>[0]));
             return tasks;
         }
         @SuppressWarnings("unchecked")
@@ -832,7 +846,7 @@
             }
             else if (i != 0)
                 t.fork();
-            else if (t.doInvoke() < NORMAL && ex == null)
+            else if ((t.doInvoke() & ABNORMAL) != 0 && ex == null)
                 ex = t.getException();
         }
         for (int i = 1; i <= last; ++i) {
@@ -840,7 +854,7 @@
             if (t != null) {
                 if (ex != null)
                     t.cancel(false);
-                else if (t.doJoin() < NORMAL)
+                else if ((t.doJoin() & ABNORMAL) != 0)
                     ex = t.getException();
             }
         }
@@ -877,7 +891,8 @@
      * @return {@code true} if this task is now cancelled
      */
     public boolean cancel(boolean mayInterruptIfRunning) {
-        return (setCompletion(CANCELLED) & DONE_MASK) == CANCELLED;
+        int s = abnormalCompletion(DONE | ABNORMAL);
+        return (s & (ABNORMAL | THROWN)) == ABNORMAL;
     }
 
     public final boolean isDone() {
@@ -885,7 +900,7 @@
     }
 
     public final boolean isCancelled() {
-        return (status & DONE_MASK) == CANCELLED;
+        return (status & (ABNORMAL | THROWN)) == ABNORMAL;
     }
 
     /**
@@ -894,7 +909,7 @@
      * @return {@code true} if this task threw an exception or was cancelled
      */
     public final boolean isCompletedAbnormally() {
-        return status < NORMAL;
+        return (status & ABNORMAL) != 0;
     }
 
     /**
@@ -905,7 +920,7 @@
      * exception and was not cancelled
      */
     public final boolean isCompletedNormally() {
-        return (status & DONE_MASK) == NORMAL;
+        return (status & (DONE | ABNORMAL)) == DONE;
     }
 
     /**
@@ -916,9 +931,9 @@
      * @return the exception, or {@code null} if none
      */
     public final Throwable getException() {
-        int s = status & DONE_MASK;
-        return ((s >= NORMAL)    ? null :
-                (s == CANCELLED) ? new CancellationException() :
+        int s = status;
+        return ((s & ABNORMAL) == 0 ? null :
+                (s & THROWN)   == 0 ? new CancellationException() :
                 getThrowableException());
     }
 
@@ -962,7 +977,7 @@
             setExceptionalCompletion(rex);
             return;
         }
-        setCompletion(NORMAL);
+        setDone();
     }
 
     /**
@@ -974,7 +989,7 @@
      * @since 1.8
      */
     public final void quietlyComplete() {
-        setCompletion(NORMAL);
+        setDone();
     }
 
     /**
@@ -991,11 +1006,12 @@
     public final V get() throws InterruptedException, ExecutionException {
         int s = (Thread.currentThread() instanceof ForkJoinWorkerThread) ?
             doJoin() : externalInterruptibleAwaitDone();
-        if ((s &= DONE_MASK) == CANCELLED)
-            throw new CancellationException();
-        if (s == EXCEPTIONAL)
+        if ((s & THROWN) != 0)
             throw new ExecutionException(getThrowableException());
-        return getRawResult();
+        else if ((s & ABNORMAL) != 0)
+            throw new CancellationException();
+        else
+            return getRawResult();
     }
 
     /**
@@ -1035,7 +1051,7 @@
                 while ((s = status) >= 0 &&
                        (ns = deadline - System.nanoTime()) > 0L) {
                     if ((ms = TimeUnit.NANOSECONDS.toMillis(ns)) > 0L &&
-                        U.compareAndSwapInt(this, STATUS, s, s | SIGNAL)) {
+                        (s = (int)STATUS.getAndBitwiseOr(this, SIGNAL)) >= 0) {
                         synchronized (this) {
                             if (status >= 0)
                                 wait(ms); // OK to throw InterruptedException
@@ -1047,15 +1063,13 @@
             }
         }
         if (s >= 0)
-            s = status;
-        if ((s &= DONE_MASK) != NORMAL) {
-            if (s == CANCELLED)
-                throw new CancellationException();
-            if (s != EXCEPTIONAL)
-                throw new TimeoutException();
+            throw new TimeoutException();
+        else if ((s & THROWN) != 0)
             throw new ExecutionException(getThrowableException());
-        }
-        return getRawResult();
+        else if ((s & ABNORMAL) != 0)
+            throw new CancellationException();
+        else
+            return getRawResult();
     }
 
     /**
@@ -1111,7 +1125,7 @@
      * setRawResult(null)}.
      */
     public void reinitialize() {
-        if ((status & DONE_MASK) == EXCEPTIONAL)
+        if ((status & THROWN) != 0)
             clearExceptionalCompletion();
         else
             status = 0;
@@ -1329,8 +1343,8 @@
      */
     public final short setForkJoinTaskTag(short newValue) {
         for (int s;;) {
-            if (U.compareAndSwapInt(this, STATUS, s = status,
-                                    (s & ~SMASK) | (newValue & SMASK)))
+            if (STATUS.weakCompareAndSet(this, s = status,
+                                         (s & ~SMASK) | (newValue & SMASK)))
                 return (short)s;
         }
     }
@@ -1353,8 +1367,8 @@
         for (int s;;) {
             if ((short)(s = status) != expect)
                 return false;
-            if (U.compareAndSwapInt(this, STATUS, s,
-                                    (s & ~SMASK) | (update & SMASK)))
+            if (STATUS.weakCompareAndSet(this, s,
+                                         (s & ~SMASK) | (update & SMASK)))
                 return true;
         }
     }
@@ -1377,6 +1391,9 @@
         public final void setRawResult(T v) { result = v; }
         public final boolean exec() { runnable.run(); return true; }
         public final void run() { invoke(); }
+        public String toString() {
+            return super.toString() + "[Wrapped task = " + runnable + "]";
+        }
         private static final long serialVersionUID = 5232453952276885070L;
     }
 
@@ -1394,6 +1411,9 @@
         public final void setRawResult(Void v) { }
         public final boolean exec() { runnable.run(); return true; }
         public final void run() { invoke(); }
+        public String toString() {
+            return super.toString() + "[Wrapped task = " + runnable + "]";
+        }
         private static final long serialVersionUID = 5232453952276885070L;
     }
 
@@ -1439,6 +1459,9 @@
             }
         }
         public final void run() { invoke(); }
+        public String toString() {
+            return super.toString() + "[Wrapped task = " + callable + "]";
+        }
         private static final long serialVersionUID = 2838392045355241008L;
     }
 
@@ -1515,19 +1538,14 @@
             setExceptionalCompletion((Throwable)ex);
     }
 
-    // Unsafe mechanics
-    private static final sun.misc.Unsafe U = sun.misc.Unsafe.getUnsafe();
-    private static final long STATUS;
-
+    // VarHandle mechanics
+    private static final VarHandle STATUS;
     static {
-        exceptionTableLock = new ReentrantLock();
-        exceptionTableRefQueue = new ReferenceQueue<Object>();
-        exceptionTable = new ExceptionNode[EXCEPTION_MAP_CAPACITY];
         try {
-            STATUS = U.objectFieldOffset
-                (ForkJoinTask.class.getDeclaredField("status"));
+            MethodHandles.Lookup l = MethodHandles.lookup();
+            STATUS = l.findVarHandle(ForkJoinTask.class, "status", int.class);
         } catch (ReflectiveOperationException e) {
-            throw new Error(e);
+            throw new ExceptionInInitializerError(e);
         }
     }
 
diff --git a/ojluni/src/main/java/java/util/concurrent/ForkJoinWorkerThread.java b/ojluni/src/main/java/java/util/concurrent/ForkJoinWorkerThread.java
index e98ba99..fa47c22 100644
--- a/ojluni/src/main/java/java/util/concurrent/ForkJoinWorkerThread.java
+++ b/ojluni/src/main/java/java/util/concurrent/ForkJoinWorkerThread.java
@@ -36,6 +36,8 @@
 package java.util.concurrent;
 
 import java.security.AccessControlContext;
+import java.security.AccessController;
+import java.security.PrivilegedAction;
 import java.security.ProtectionDomain;
 
 /**
@@ -47,7 +49,9 @@
  * and termination methods surrounding the main task processing loop.
  * If you do create such a subclass, you will also need to supply a
  * custom {@link ForkJoinPool.ForkJoinWorkerThreadFactory} to
- * {@linkplain ForkJoinPool#ForkJoinPool use it} in a {@code ForkJoinPool}.
+ * {@linkplain ForkJoinPool#ForkJoinPool(int, ForkJoinWorkerThreadFactory,
+ * UncaughtExceptionHandler, boolean, int, int, int, Predicate, long, TimeUnit)
+ * use it} in a {@code ForkJoinPool}.
  *
  * @since 1.7
  * @author Doug Lea
@@ -66,8 +70,9 @@
      * owning thread.
      *
      * Support for (non-public) subclass InnocuousForkJoinWorkerThread
-     * requires that we break quite a lot of encapsulation (via Unsafe)
-     * both here and in the subclass to access and set Thread fields.
+     * requires that we break quite a lot of encapsulation (via helper
+     * methods in ThreadLocalRandom) both here and in the subclass to
+     * access and set Thread fields.
      */
 
     final ForkJoinPool pool;                // the pool this thread works in
@@ -87,13 +92,28 @@
     }
 
     /**
+     * Version for use by the default pool.  Supports setting the
+     * context class loader.  This is a separate constructor to avoid
+     * affecting the protected constructor.
+     */
+    ForkJoinWorkerThread(ForkJoinPool pool, ClassLoader ccl) {
+        super("aForkJoinWorkerThread");
+        super.setContextClassLoader(ccl);
+        this.pool = pool;
+        this.workQueue = pool.registerWorker(this);
+    }
+
+    /**
      * Version for InnocuousForkJoinWorkerThread.
      */
-    ForkJoinWorkerThread(ForkJoinPool pool, ThreadGroup threadGroup,
+    ForkJoinWorkerThread(ForkJoinPool pool,
+                         ClassLoader ccl,
+                         ThreadGroup threadGroup,
                          AccessControlContext acc) {
         super(threadGroup, null, "aForkJoinWorkerThread");
-        U.putOrderedObject(this, INHERITEDACCESSCONTROLCONTEXT, acc);
-        eraseThreadLocals(); // clear before registering
+        super.setContextClassLoader(ccl);
+        ThreadLocalRandom.setInheritedAccessControlContext(this, acc);
+        ThreadLocalRandom.eraseThreadLocals(this); // clear before registering
         this.pool = pool;
         this.workQueue = pool.registerWorker(this);
     }
@@ -171,66 +191,44 @@
     }
 
     /**
-     * Erases ThreadLocals by nulling out Thread maps.
-     */
-    final void eraseThreadLocals() {
-        U.putObject(this, THREADLOCALS, null);
-        U.putObject(this, INHERITABLETHREADLOCALS, null);
-    }
-
-    /**
      * Non-public hook method for InnocuousForkJoinWorkerThread.
      */
     void afterTopLevelExec() {
     }
 
-    // Set up to allow setting thread fields in constructor
-    private static final sun.misc.Unsafe U = sun.misc.Unsafe.getUnsafe();
-    private static final long THREADLOCALS;
-    private static final long INHERITABLETHREADLOCALS;
-    private static final long INHERITEDACCESSCONTROLCONTEXT;
-    static {
-        try {
-            THREADLOCALS = U.objectFieldOffset
-                (Thread.class.getDeclaredField("threadLocals"));
-            INHERITABLETHREADLOCALS = U.objectFieldOffset
-                (Thread.class.getDeclaredField("inheritableThreadLocals"));
-            INHERITEDACCESSCONTROLCONTEXT = U.objectFieldOffset
-                (Thread.class.getDeclaredField("inheritedAccessControlContext"));
-        } catch (ReflectiveOperationException e) {
-            throw new Error(e);
-        }
-    }
-
     /**
      * A worker thread that has no permissions, is not a member of any
-     * user-defined ThreadGroup, and erases all ThreadLocals after
+     * user-defined ThreadGroup, uses the system class loader as
+     * thread context class loader, and erases all ThreadLocals after
      * running each top-level task.
      */
     static final class InnocuousForkJoinWorkerThread extends ForkJoinWorkerThread {
         /** The ThreadGroup for all InnocuousForkJoinWorkerThreads */
         private static final ThreadGroup innocuousThreadGroup =
-            createThreadGroup();
+            AccessController.doPrivileged(new PrivilegedAction<>() {
+                public ThreadGroup run() {
+                    ThreadGroup group = Thread.currentThread().getThreadGroup();
+                    for (ThreadGroup p; (p = group.getParent()) != null; )
+                        group = p;
+                    return new ThreadGroup(
+                        group, "InnocuousForkJoinWorkerThreadGroup");
+                }});
 
         /** An AccessControlContext supporting no privileges */
         private static final AccessControlContext INNOCUOUS_ACC =
             new AccessControlContext(
-                new ProtectionDomain[] {
-                    new ProtectionDomain(null, null)
-                });
+                new ProtectionDomain[] { new ProtectionDomain(null, null) });
 
         InnocuousForkJoinWorkerThread(ForkJoinPool pool) {
-            super(pool, innocuousThreadGroup, INNOCUOUS_ACC);
+            super(pool,
+                  ClassLoader.getSystemClassLoader(),
+                  innocuousThreadGroup,
+                  INNOCUOUS_ACC);
         }
 
         @Override // to erase ThreadLocals
         void afterTopLevelExec() {
-            eraseThreadLocals();
-        }
-
-        @Override // to always report system loader
-        public ClassLoader getContextClassLoader() {
-            return ClassLoader.getSystemClassLoader();
+            ThreadLocalRandom.eraseThreadLocals(this);
         }
 
         @Override // to silently fail
@@ -240,34 +238,5 @@
         public void setContextClassLoader(ClassLoader cl) {
             throw new SecurityException("setContextClassLoader");
         }
-
-        /**
-         * Returns a new group with the system ThreadGroup (the
-         * topmost, parent-less group) as parent.  Uses Unsafe to
-         * traverse Thread.group and ThreadGroup.parent fields.
-         */
-        private static ThreadGroup createThreadGroup() {
-            try {
-                sun.misc.Unsafe u = sun.misc.Unsafe.getUnsafe();
-                long tg = u.objectFieldOffset
-                    (Thread.class.getDeclaredField("group"));
-                long gp = u.objectFieldOffset
-                    (ThreadGroup.class.getDeclaredField("parent"));
-                ThreadGroup group = (ThreadGroup)
-                    u.getObject(Thread.currentThread(), tg);
-                while (group != null) {
-                    ThreadGroup parent = (ThreadGroup)u.getObject(group, gp);
-                    if (parent == null)
-                        return new ThreadGroup(group,
-                                               "InnocuousForkJoinWorkerThreadGroup");
-                    group = parent;
-                }
-            } catch (ReflectiveOperationException e) {
-                throw new Error(e);
-            }
-            // fall through if null as cannot-happen safeguard
-            throw new Error("Cannot create ThreadGroup");
-        }
     }
-
 }
diff --git a/ojluni/src/main/java/java/util/concurrent/Future.java b/ojluni/src/main/java/java/util/concurrent/Future.java
index 9bd05d6..6099c7c 100644
--- a/ojluni/src/main/java/java/util/concurrent/Future.java
+++ b/ojluni/src/main/java/java/util/concurrent/Future.java
@@ -50,8 +50,7 @@
  * declare types of the form {@code Future<?>} and
  * return {@code null} as a result of the underlying task.
  *
- * <p>
- * <b>Sample Usage</b> (Note that the following classes are all
+ * <p><b>Sample Usage</b> (Note that the following classes are all
  * made-up.)
  *
  * <pre> {@code
@@ -59,13 +58,9 @@
  * class App {
  *   ExecutorService executor = ...
  *   ArchiveSearcher searcher = ...
- *   void showSearch(final String target)
- *       throws InterruptedException {
- *     Future<String> future
- *       = executor.submit(new Callable<String>() {
- *         public String call() {
- *             return searcher.search(target);
- *         }});
+ *   void showSearch(String target) throws InterruptedException {
+ *     Callable<String> task = () -> searcher.search(target);
+ *     Future<String> future = executor.submit(task);
  *     displayOtherThings(); // do other things while searching
  *     try {
  *       displayText(future.get()); // use future
@@ -77,11 +72,7 @@
  * implements {@code Runnable}, and so may be executed by an {@code Executor}.
  * For example, the above construction with {@code submit} could be replaced by:
  * <pre> {@code
- * FutureTask<String> future =
- *   new FutureTask<>(new Callable<String>() {
- *     public String call() {
- *       return searcher.search(target);
- *   }});
+ * FutureTask<String> future = new FutureTask<>(task);
  * executor.execute(future);}</pre>
  *
  * <p>Memory consistency effects: Actions taken by the asynchronous computation
diff --git a/ojluni/src/main/java/java/util/concurrent/FutureTask.java b/ojluni/src/main/java/java/util/concurrent/FutureTask.java
index 62c2bfc..e913ef3 100644
--- a/ojluni/src/main/java/java/util/concurrent/FutureTask.java
+++ b/ojluni/src/main/java/java/util/concurrent/FutureTask.java
@@ -35,6 +35,8 @@
 
 package java.util.concurrent;
 
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.VarHandle;
 import java.util.concurrent.locks.LockSupport;
 
 /**
@@ -69,9 +71,6 @@
      * cancellation races. Sync control in the current design relies
      * on a "state" field updated via CAS to track completion, along
      * with a simple Treiber stack to hold waiting threads.
-     *
-     * Style note: As usual, we bypass overhead of using
-     * AtomicXFieldUpdaters and instead directly use Unsafe intrinsics.
      */
 
     /**
@@ -163,9 +162,8 @@
     }
 
     public boolean cancel(boolean mayInterruptIfRunning) {
-        if (!(state == NEW &&
-              U.compareAndSwapInt(this, STATE, NEW,
-                  mayInterruptIfRunning ? INTERRUPTING : CANCELLED)))
+        if (!(state == NEW && STATE.compareAndSet
+              (this, NEW, mayInterruptIfRunning ? INTERRUPTING : CANCELLED)))
             return false;
         try {    // in case call to interrupt throws exception
             if (mayInterruptIfRunning) {
@@ -174,7 +172,7 @@
                     if (t != null)
                         t.interrupt();
                 } finally { // final state
-                    U.putOrderedInt(this, STATE, INTERRUPTED);
+                    STATE.setRelease(this, INTERRUPTED);
                 }
             }
         } finally {
@@ -228,9 +226,9 @@
      * @param v the value
      */
     protected void set(V v) {
-        if (U.compareAndSwapInt(this, STATE, NEW, COMPLETING)) {
+        if (STATE.compareAndSet(this, NEW, COMPLETING)) {
             outcome = v;
-            U.putOrderedInt(this, STATE, NORMAL); // final state
+            STATE.setRelease(this, NORMAL); // final state
             finishCompletion();
         }
     }
@@ -246,16 +244,16 @@
      * @param t the cause of failure
      */
     protected void setException(Throwable t) {
-        if (U.compareAndSwapInt(this, STATE, NEW, COMPLETING)) {
+        if (STATE.compareAndSet(this, NEW, COMPLETING)) {
             outcome = t;
-            U.putOrderedInt(this, STATE, EXCEPTIONAL); // final state
+            STATE.setRelease(this, EXCEPTIONAL); // final state
             finishCompletion();
         }
     }
 
     public void run() {
         if (state != NEW ||
-            !U.compareAndSwapObject(this, RUNNER, null, Thread.currentThread()))
+            !RUNNER.compareAndSet(this, null, Thread.currentThread()))
             return;
         try {
             Callable<V> c = callable;
@@ -296,7 +294,7 @@
      */
     protected boolean runAndReset() {
         if (state != NEW ||
-            !U.compareAndSwapObject(this, RUNNER, null, Thread.currentThread()))
+            !RUNNER.compareAndSet(this, null, Thread.currentThread()))
             return false;
         boolean ran = false;
         int s = state;
@@ -363,7 +361,7 @@
     private void finishCompletion() {
         // assert state > COMPLETING;
         for (WaitNode q; (q = waiters) != null;) {
-            if (U.compareAndSwapObject(this, WAITERS, q, null)) {
+            if (WAITERS.weakCompareAndSet(this, q, null)) {
                 for (;;) {
                     Thread t = q.thread;
                     if (t != null) {
@@ -425,8 +423,7 @@
                 q = new WaitNode();
             }
             else if (!queued)
-                queued = U.compareAndSwapObject(this, WAITERS,
-                                                q.next = waiters, q);
+                queued = WAITERS.weakCompareAndSet(this, q.next = waiters, q);
             else if (timed) {
                 final long parkNanos;
                 if (startTime == 0L) { // first time
@@ -475,7 +472,7 @@
                         if (pred.thread == null) // check for race
                             continue retry;
                     }
-                    else if (!U.compareAndSwapObject(this, WAITERS, q, s))
+                    else if (!WAITERS.compareAndSet(this, q, s))
                         continue retry;
                 }
                 break;
@@ -483,21 +480,53 @@
         }
     }
 
-    // Unsafe mechanics
-    private static final sun.misc.Unsafe U = sun.misc.Unsafe.getUnsafe();
-    private static final long STATE;
-    private static final long RUNNER;
-    private static final long WAITERS;
+    /**
+     * Returns a string representation of this FutureTask.
+     *
+     * @implSpec
+     * The default implementation returns a string identifying this
+     * FutureTask, as well as its completion state.  The state, in
+     * brackets, contains one of the strings {@code "Completed Normally"},
+     * {@code "Completed Exceptionally"}, {@code "Cancelled"}, or {@code
+     * "Not completed"}.
+     *
+     * @return a string representation of this FutureTask
+     */
+    public String toString() {
+        final String status;
+        switch (state) {
+        case NORMAL:
+            status = "[Completed normally]";
+            break;
+        case EXCEPTIONAL:
+            status = "[Completed exceptionally: " + outcome + "]";
+            break;
+        case CANCELLED:
+        case INTERRUPTING:
+        case INTERRUPTED:
+            status = "[Cancelled]";
+            break;
+        default:
+            final Callable<?> callable = this.callable;
+            status = (callable == null)
+                ? "[Not completed]"
+                : "[Not completed, task = " + callable + "]";
+        }
+        return super.toString() + status;
+    }
+
+    // VarHandle mechanics
+    private static final VarHandle STATE;
+    private static final VarHandle RUNNER;
+    private static final VarHandle WAITERS;
     static {
         try {
-            STATE = U.objectFieldOffset
-                (FutureTask.class.getDeclaredField("state"));
-            RUNNER = U.objectFieldOffset
-                (FutureTask.class.getDeclaredField("runner"));
-            WAITERS = U.objectFieldOffset
-                (FutureTask.class.getDeclaredField("waiters"));
+            MethodHandles.Lookup l = MethodHandles.lookup();
+            STATE = l.findVarHandle(FutureTask.class, "state", int.class);
+            RUNNER = l.findVarHandle(FutureTask.class, "runner", Thread.class);
+            WAITERS = l.findVarHandle(FutureTask.class, "waiters", WaitNode.class);
         } catch (ReflectiveOperationException e) {
-            throw new Error(e);
+            throw new ExceptionInInitializerError(e);
         }
 
         // Reduce the risk of rare disastrous classloading in first call to
diff --git a/ojluni/src/main/java/java/util/concurrent/LinkedBlockingDeque.java b/ojluni/src/main/java/java/util/concurrent/LinkedBlockingDeque.java
index 9829c9c..f55998e 100644
--- a/ojluni/src/main/java/java/util/concurrent/LinkedBlockingDeque.java
+++ b/ojluni/src/main/java/java/util/concurrent/LinkedBlockingDeque.java
@@ -39,15 +39,13 @@
 import java.util.Collection;
 import java.util.Iterator;
 import java.util.NoSuchElementException;
+import java.util.Objects;
 import java.util.Spliterator;
 import java.util.Spliterators;
 import java.util.concurrent.locks.Condition;
 import java.util.concurrent.locks.ReentrantLock;
 import java.util.function.Consumer;
-
-// BEGIN android-note
-// removed link to collections framework docs
-// END android-note
+import java.util.function.Predicate;
 
 /**
  * An optionally-bounded {@linkplain BlockingDeque blocking deque} based on
@@ -66,9 +64,12 @@
  * contains}, {@link #iterator iterator.remove()}, and the bulk
  * operations, all of which run in linear time.
  *
- * <p>This class and its iterator implement all of the
- * <em>optional</em> methods of the {@link Collection} and {@link
- * Iterator} interfaces.
+ * <p>This class and its iterator implement all of the <em>optional</em>
+ * methods of the {@link Collection} and {@link Iterator} interfaces.
+ *
+ * <p>This class is a member of the
+ * <a href="{@docRoot}/java.base/java/util/package-summary.html#CollectionsFramework">
+ * Java Collections Framework</a>.
  *
  * @since 1.6
  * @author  Doug Lea
@@ -194,18 +195,7 @@
      */
     public LinkedBlockingDeque(Collection<? extends E> c) {
         this(Integer.MAX_VALUE);
-        final ReentrantLock lock = this.lock;
-        lock.lock(); // Never contended, but necessary for visibility
-        try {
-            for (E e : c) {
-                if (e == null)
-                    throw new NullPointerException();
-                if (!linkLast(new Node<E>(e)))
-                    throw new IllegalStateException("Deque full");
-            }
-        } finally {
-            lock.unlock();
-        }
+        addAll(c);
     }
 
 
@@ -298,6 +288,7 @@
      */
     void unlink(Node<E> x) {
         // assert lock.isHeldByCurrentThread();
+        // assert x.item != null;
         Node<E> p = x.prev;
         Node<E> n = x.next;
         if (p == null) {
@@ -660,7 +651,7 @@
 
     /**
      * Retrieves and removes the head of the queue represented by this deque.
-     * This method differs from {@link #poll poll} only in that it throws an
+     * This method differs from {@link #poll() poll()} only in that it throws an
      * exception if this deque is empty.
      *
      * <p>This method is equivalent to {@link #removeFirst() removeFirst}.
@@ -686,7 +677,7 @@
 
     /**
      * Retrieves, but does not remove, the head of the queue represented by
-     * this deque.  This method differs from {@link #peek peek} only in that
+     * this deque.  This method differs from {@link #peek() peek()} only in that
      * it throws an exception if this deque is empty.
      *
      * <p>This method is equivalent to {@link #getFirst() getFirst}.
@@ -740,8 +731,7 @@
      * @throws IllegalArgumentException      {@inheritDoc}
      */
     public int drainTo(Collection<? super E> c, int maxElements) {
-        if (c == null)
-            throw new NullPointerException();
+        Objects.requireNonNull(c);
         if (c == this)
             throw new IllegalArgumentException();
         if (maxElements <= 0)
@@ -834,46 +824,65 @@
         }
     }
 
-    /*
-     * TODO: Add support for more efficient bulk operations.
+    /**
+     * Appends all of the elements in the specified collection to the end of
+     * this deque, in the order that they are returned by the specified
+     * collection's iterator.  Attempts to {@code addAll} of a deque to
+     * itself result in {@code IllegalArgumentException}.
      *
-     * We don't want to acquire the lock for every iteration, but we
-     * also want other threads a chance to interact with the
-     * collection, especially when count is close to capacity.
+     * @param c the elements to be inserted into this deque
+     * @return {@code true} if this deque changed as a result of the call
+     * @throws NullPointerException if the specified collection or any
+     *         of its elements are null
+     * @throws IllegalArgumentException if the collection is this deque
+     * @throws IllegalStateException if this deque is full
+     * @see #add(Object)
      */
+    public boolean addAll(Collection<? extends E> c) {
+        if (c == this)
+            // As historically specified in AbstractQueue#addAll
+            throw new IllegalArgumentException();
 
-//     /**
-//      * Adds all of the elements in the specified collection to this
-//      * queue.  Attempts to addAll of a queue to itself result in
-//      * {@code IllegalArgumentException}. Further, the behavior of
-//      * this operation is undefined if the specified collection is
-//      * modified while the operation is in progress.
-//      *
-//      * @param c collection containing elements to be added to this queue
-//      * @return {@code true} if this queue changed as a result of the call
-//      * @throws ClassCastException            {@inheritDoc}
-//      * @throws NullPointerException          {@inheritDoc}
-//      * @throws IllegalArgumentException      {@inheritDoc}
-//      * @throws IllegalStateException if this deque is full
-//      * @see #add(Object)
-//      */
-//     public boolean addAll(Collection<? extends E> c) {
-//         if (c == null)
-//             throw new NullPointerException();
-//         if (c == this)
-//             throw new IllegalArgumentException();
-//         final ReentrantLock lock = this.lock;
-//         lock.lock();
-//         try {
-//             boolean modified = false;
-//             for (E e : c)
-//                 if (linkLast(e))
-//                     modified = true;
-//             return modified;
-//         } finally {
-//             lock.unlock();
-//         }
-//     }
+        // Copy c into a private chain of Nodes
+        Node<E> beg = null, end = null;
+        int n = 0;
+        for (E e : c) {
+            Objects.requireNonNull(e);
+            n++;
+            Node<E> newNode = new Node<E>(e);
+            if (beg == null)
+                beg = end = newNode;
+            else {
+                end.next = newNode;
+                newNode.prev = end;
+                end = newNode;
+            }
+        }
+        if (beg == null)
+            return false;
+
+        // Atomically append the chain at the end
+        final ReentrantLock lock = this.lock;
+        lock.lock();
+        try {
+            if (count + n <= capacity) {
+                beg.prev = last;
+                if (first == null)
+                    first = beg;
+                else
+                    last.next = beg;
+                last = end;
+                count += n;
+                notEmpty.signalAll();
+                return true;
+            }
+        } finally {
+            lock.unlock();
+        }
+        // Fall back to historic non-atomic implementation, failing
+        // with IllegalStateException when the capacity is exceeded.
+        return super.addAll(c);
+    }
 
     /**
      * Returns an array containing all of the elements in this deque, in
@@ -986,6 +995,18 @@
     }
 
     /**
+     * Used for any element traversal that is not entirely under lock.
+     * Such traversals must handle both:
+     * - dequeued nodes (p.next == p)
+     * - (possibly multiple) interior removed nodes (p.item == null)
+     */
+    Node<E> succ(Node<E> p) {
+        if (p == (p = p.next))
+            p = first;
+        return p;
+    }
+
+    /**
      * Returns an iterator over the elements in this deque in proper sequence.
      * The elements will be returned in order from first (head) to last (tail).
      *
@@ -1024,8 +1045,8 @@
         /**
          * nextItem holds on to item fields because once we claim that
          * an element exists in hasNext(), we must return item read
-         * under lock (in advance()) even if it was in the process of
-         * being removed when hasNext() was called.
+         * under lock even if it was in the process of being removed
+         * when hasNext() was called.
          */
         E nextItem;
 
@@ -1038,48 +1059,19 @@
         abstract Node<E> firstNode();
         abstract Node<E> nextNode(Node<E> n);
 
+        private Node<E> succ(Node<E> p) {
+            if (p == (p = nextNode(p)))
+                p = firstNode();
+            return p;
+        }
+
         AbstractItr() {
             // set to initial position
             final ReentrantLock lock = LinkedBlockingDeque.this.lock;
             lock.lock();
             try {
-                next = firstNode();
-                nextItem = (next == null) ? null : next.item;
-            } finally {
-                lock.unlock();
-            }
-        }
-
-        /**
-         * Returns the successor node of the given non-null, but
-         * possibly previously deleted, node.
-         */
-        private Node<E> succ(Node<E> n) {
-            // Chains of deleted nodes ending in null or self-links
-            // are possible if multiple interior nodes are removed.
-            for (;;) {
-                Node<E> s = nextNode(n);
-                if (s == null)
-                    return null;
-                else if (s.item != null)
-                    return s;
-                else if (s == n)
-                    return firstNode();
-                else
-                    n = s;
-            }
-        }
-
-        /**
-         * Advances next.
-         */
-        void advance() {
-            final ReentrantLock lock = LinkedBlockingDeque.this.lock;
-            lock.lock();
-            try {
-                // assert next != null;
-                next = succ(next);
-                nextItem = (next == null) ? null : next.item;
+                if ((next = firstNode()) != null)
+                    nextItem = next.item;
             } finally {
                 lock.unlock();
             }
@@ -1090,14 +1082,65 @@
         }
 
         public E next() {
-            if (next == null)
+            Node<E> p;
+            if ((p = next) == null)
                 throw new NoSuchElementException();
-            lastRet = next;
+            lastRet = p;
             E x = nextItem;
-            advance();
+            final ReentrantLock lock = LinkedBlockingDeque.this.lock;
+            lock.lock();
+            try {
+                E e = null;
+                for (p = nextNode(p); p != null && (e = p.item) == null; )
+                    p = succ(p);
+                next = p;
+                nextItem = e;
+            } finally {
+                lock.unlock();
+            }
             return x;
         }
 
+        public void forEachRemaining(Consumer<? super E> action) {
+            // A variant of forEachFrom
+            Objects.requireNonNull(action);
+            Node<E> p;
+            if ((p = next) == null) return;
+            lastRet = p;
+            next = null;
+            final ReentrantLock lock = LinkedBlockingDeque.this.lock;
+            final int batchSize = 64;
+            Object[] es = null;
+            int n, len = 1;
+            do {
+                lock.lock();
+                try {
+                    if (es == null) {
+                        p = nextNode(p);
+                        for (Node<E> q = p; q != null; q = succ(q))
+                            if (q.item != null && ++len == batchSize)
+                                break;
+                        es = new Object[len];
+                        es[0] = nextItem;
+                        nextItem = null;
+                        n = 1;
+                    } else
+                        n = 0;
+                    for (; p != null && n < len; p = succ(p))
+                        if ((es[n] = p.item) != null) {
+                            lastRet = p;
+                            n++;
+                        }
+                } finally {
+                    lock.unlock();
+                }
+                for (int i = 0; i < n; i++) {
+                    @SuppressWarnings("unchecked") E e = (E) es[i];
+                    action.accept(e);
+                }
+            } while (n > 0 && p != null);
+        }
+
         public void remove() {
             Node<E> n = lastRet;
             if (n == null)
@@ -1116,51 +1159,49 @@
 
     /** Forward iterator */
     private class Itr extends AbstractItr {
+        Itr() {}                        // prevent access constructor creation
         Node<E> firstNode() { return first; }
         Node<E> nextNode(Node<E> n) { return n.next; }
     }
 
     /** Descending iterator */
     private class DescendingItr extends AbstractItr {
+        DescendingItr() {}              // prevent access constructor creation
         Node<E> firstNode() { return last; }
         Node<E> nextNode(Node<E> n) { return n.prev; }
     }
 
-    /** A customized variant of Spliterators.IteratorSpliterator */
-    static final class LBDSpliterator<E> implements Spliterator<E> {
+    /**
+     * A customized variant of Spliterators.IteratorSpliterator.
+     * Keep this class in sync with (very similar) LBQSpliterator.
+     */
+    private final class LBDSpliterator implements Spliterator<E> {
         static final int MAX_BATCH = 1 << 25;  // max batch array size;
-        final LinkedBlockingDeque<E> queue;
         Node<E> current;    // current node; null until initialized
         int batch;          // batch size for splits
         boolean exhausted;  // true when no more nodes
-        long est;           // size estimate
-        LBDSpliterator(LinkedBlockingDeque<E> queue) {
-            this.queue = queue;
-            this.est = queue.size();
-        }
+        long est = size();  // size estimate
+
+        LBDSpliterator() {}
 
         public long estimateSize() { return est; }
 
         public Spliterator<E> trySplit() {
             Node<E> h;
-            final LinkedBlockingDeque<E> q = this.queue;
-            int b = batch;
-            int n = (b <= 0) ? 1 : (b >= MAX_BATCH) ? MAX_BATCH : b + 1;
             if (!exhausted &&
-                ((h = current) != null || (h = q.first) != null) &&
-                h.next != null) {
+                ((h = current) != null || (h = first) != null)
+                && h.next != null) {
+                int n = batch = Math.min(batch + 1, MAX_BATCH);
                 Object[] a = new Object[n];
-                final ReentrantLock lock = q.lock;
+                final ReentrantLock lock = LinkedBlockingDeque.this.lock;
                 int i = 0;
                 Node<E> p = current;
                 lock.lock();
                 try {
-                    if (p != null || (p = q.first) != null) {
-                        do {
+                    if (p != null || (p = first) != null)
+                        for (; p != null && i < n; p = succ(p))
                             if ((a[i] = p.item) != null)
-                                ++i;
-                        } while ((p = p.next) != null && i < n);
-                    }
+                                i++;
                 } finally {
                     lock.unlock();
                 }
@@ -1170,66 +1211,33 @@
                 }
                 else if ((est -= i) < 0L)
                     est = 0L;
-                if (i > 0) {
-                    batch = i;
+                if (i > 0)
                     return Spliterators.spliterator
                         (a, 0, i, (Spliterator.ORDERED |
                                    Spliterator.NONNULL |
                                    Spliterator.CONCURRENT));
-                }
             }
             return null;
         }
 
-        public void forEachRemaining(Consumer<? super E> action) {
-            if (action == null) throw new NullPointerException();
-            final LinkedBlockingDeque<E> q = this.queue;
-            final ReentrantLock lock = q.lock;
-            if (!exhausted) {
-                exhausted = true;
-                Node<E> p = current;
-                do {
-                    E e = null;
-                    lock.lock();
-                    try {
-                        if (p == null)
-                            p = q.first;
-                        while (p != null) {
-                            e = p.item;
-                            p = p.next;
-                            if (e != null)
-                                break;
-                        }
-                    } finally {
-                        lock.unlock();
-                    }
-                    if (e != null)
-                        action.accept(e);
-                } while (p != null);
-            }
-        }
-
         public boolean tryAdvance(Consumer<? super E> action) {
-            if (action == null) throw new NullPointerException();
-            final LinkedBlockingDeque<E> q = this.queue;
-            final ReentrantLock lock = q.lock;
+            Objects.requireNonNull(action);
             if (!exhausted) {
                 E e = null;
+                final ReentrantLock lock = LinkedBlockingDeque.this.lock;
                 lock.lock();
                 try {
-                    if (current == null)
-                        current = q.first;
-                    while (current != null) {
-                        e = current.item;
-                        current = current.next;
-                        if (e != null)
-                            break;
-                    }
+                    Node<E> p;
+                    if ((p = current) != null || (p = first) != null)
+                        do {
+                            e = p.item;
+                            p = succ(p);
+                        } while (e == null && p != null);
+                    if ((current = p) == null)
+                        exhausted = true;
                 } finally {
                     lock.unlock();
                 }
-                if (current == null)
-                    exhausted = true;
                 if (e != null) {
                     action.accept(e);
                     return true;
@@ -1238,9 +1246,20 @@
             return false;
         }
 
+        public void forEachRemaining(Consumer<? super E> action) {
+            Objects.requireNonNull(action);
+            if (!exhausted) {
+                exhausted = true;
+                Node<E> p = current;
+                current = null;
+                forEachFrom(action, p);
+            }
+        }
+
         public int characteristics() {
-            return Spliterator.ORDERED | Spliterator.NONNULL |
-                Spliterator.CONCURRENT;
+            return (Spliterator.ORDERED |
+                    Spliterator.NONNULL |
+                    Spliterator.CONCURRENT);
         }
     }
 
@@ -1261,7 +1280,127 @@
      * @since 1.8
      */
     public Spliterator<E> spliterator() {
-        return new LBDSpliterator<E>(this);
+        return new LBDSpliterator();
+    }
+
+    /**
+     * @throws NullPointerException {@inheritDoc}
+     */
+    public void forEach(Consumer<? super E> action) {
+        Objects.requireNonNull(action);
+        forEachFrom(action, null);
+    }
+
+    /**
+     * Runs action on each element found during a traversal starting at p.
+     * If p is null, traversal starts at head.
+     */
+    void forEachFrom(Consumer<? super E> action, Node<E> p) {
+        // Extract batches of elements while holding the lock; then
+        // run the action on the elements while not
+        final ReentrantLock lock = this.lock;
+        final int batchSize = 64;       // max number of elements per batch
+        Object[] es = null;             // container for batch of elements
+        int n, len = 0;
+        do {
+            lock.lock();
+            try {
+                if (es == null) {
+                    if (p == null) p = first;
+                    for (Node<E> q = p; q != null; q = succ(q))
+                        if (q.item != null && ++len == batchSize)
+                            break;
+                    es = new Object[len];
+                }
+                for (n = 0; p != null && n < len; p = succ(p))
+                    if ((es[n] = p.item) != null)
+                        n++;
+            } finally {
+                lock.unlock();
+            }
+            for (int i = 0; i < n; i++) {
+                @SuppressWarnings("unchecked") E e = (E) es[i];
+                action.accept(e);
+            }
+        } while (n > 0 && p != null);
+    }
+
+    /**
+     * @throws NullPointerException {@inheritDoc}
+     */
+    public boolean removeIf(Predicate<? super E> filter) {
+        Objects.requireNonNull(filter);
+        return bulkRemove(filter);
+    }
+
+    /**
+     * @throws NullPointerException {@inheritDoc}
+     */
+    public boolean removeAll(Collection<?> c) {
+        Objects.requireNonNull(c);
+        return bulkRemove(e -> c.contains(e));
+    }
+
+    /**
+     * @throws NullPointerException {@inheritDoc}
+     */
+    public boolean retainAll(Collection<?> c) {
+        Objects.requireNonNull(c);
+        return bulkRemove(e -> !c.contains(e));
+    }
+
+    /** Implementation of bulk remove methods. */
+    @SuppressWarnings("unchecked")
+    private boolean bulkRemove(Predicate<? super E> filter) {
+        boolean removed = false;
+        final ReentrantLock lock = this.lock;
+        Node<E> p = null;
+        Node<E>[] nodes = null;
+        int n, len = 0;
+        do {
+            // 1. Extract batch of up to 64 elements while holding the lock.
+            lock.lock();
+            try {
+                if (nodes == null) {  // first batch; initialize
+                    p = first;
+                    for (Node<E> q = p; q != null; q = succ(q))
+                        if (q.item != null && ++len == 64)
+                            break;
+                    nodes = (Node<E>[]) new Node<?>[len];
+                }
+                for (n = 0; p != null && n < len; p = succ(p))
+                    nodes[n++] = p;
+            } finally {
+                lock.unlock();
+            }
+
+            // 2. Run the filter on the elements while lock is free.
+            long deathRow = 0L;       // "bitset" of size 64
+            for (int i = 0; i < n; i++) {
+                final E e;
+                if ((e = nodes[i].item) != null && filter.test(e))
+                    deathRow |= 1L << i;
+            }
+
+            // 3. Remove any filtered elements while holding the lock.
+            if (deathRow != 0) {
+                lock.lock();
+                try {
+                    for (int i = 0; i < n; i++) {
+                        final Node<E> q;
+                        if ((deathRow & (1L << i)) != 0L
+                            && (q = nodes[i]).item != null) {
+                            unlink(q);
+                            removed = true;
+                        }
+                        nodes[i] = null; // help GC
+                    }
+                } finally {
+                    lock.unlock();
+                }
+            }
+        } while (n > 0 && p != null);
+        return removed;
     }
 
     /**
@@ -1304,12 +1443,21 @@
         last = null;
         // Read in all elements and place in queue
         for (;;) {
-            @SuppressWarnings("unchecked")
-            E item = (E)s.readObject();
+            @SuppressWarnings("unchecked") E item = (E)s.readObject();
             if (item == null)
                 break;
             add(item);
         }
     }
 
+    void checkInvariants() {
+        // assert lock.isHeldByCurrentThread();
+        // Nodes may get self-linked or lose their item, but only
+        // after being unlinked and becoming unreachable from first.
+        for (Node<E> p = first; p != null; p = p.next) {
+            // assert p.next != p;
+            // assert p.item != null;
+        }
+    }
+
 }
diff --git a/ojluni/src/main/java/java/util/concurrent/LinkedBlockingQueue.java b/ojluni/src/main/java/java/util/concurrent/LinkedBlockingQueue.java
index cf2d447..4ba6c1e 100644
--- a/ojluni/src/main/java/java/util/concurrent/LinkedBlockingQueue.java
+++ b/ojluni/src/main/java/java/util/concurrent/LinkedBlockingQueue.java
@@ -39,16 +39,14 @@
 import java.util.Collection;
 import java.util.Iterator;
 import java.util.NoSuchElementException;
+import java.util.Objects;
 import java.util.Spliterator;
 import java.util.Spliterators;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.locks.Condition;
 import java.util.concurrent.locks.ReentrantLock;
 import java.util.function.Consumer;
-
-// BEGIN android-note
-// removed link to collections framework docs
-// END android-note
+import java.util.function.Predicate;
 
 /**
  * An optionally-bounded {@linkplain BlockingQueue blocking queue} based on
@@ -69,9 +67,12 @@
  * dynamically created upon each insertion unless this would bring the
  * queue above capacity.
  *
- * <p>This class and its iterator implement all of the
- * <em>optional</em> methods of the {@link Collection} and {@link
- * Iterator} interfaces.
+ * <p>This class and its iterator implement all of the <em>optional</em>
+ * methods of the {@link Collection} and {@link Iterator} interfaces.
+ *
+ * <p>This class is a member of the
+ * <a href="{@docRoot}/java.base/java/util/package-summary.html#CollectionsFramework">
+ * Java Collections Framework</a>.
  *
  * @since 1.5
  * @author Doug Lea
@@ -234,14 +235,6 @@
         putLock.unlock();
     }
 
-//     /**
-//      * Tells whether both locks are held by current thread.
-//      */
-//     boolean isFullyLocked() {
-//         return (putLock.isHeldByCurrentThread() &&
-//                 takeLock.isHeldByCurrentThread());
-//     }
-
     /**
      * Creates a {@code LinkedBlockingQueue} with a capacity of
      * {@link Integer#MAX_VALUE}.
@@ -330,10 +323,8 @@
      */
     public void put(E e) throws InterruptedException {
         if (e == null) throw new NullPointerException();
-        // Note: convention in all put/take/etc is to preset local var
-        // holding count negative to indicate failure unless set.
-        int c = -1;
-        Node<E> node = new Node<E>(e);
+        final int c;
+        final Node<E> node = new Node<E>(e);
         final ReentrantLock putLock = this.putLock;
         final AtomicInteger count = this.count;
         putLock.lockInterruptibly();
@@ -374,7 +365,7 @@
 
         if (e == null) throw new NullPointerException();
         long nanos = unit.toNanos(timeout);
-        int c = -1;
+        final int c;
         final ReentrantLock putLock = this.putLock;
         final AtomicInteger count = this.count;
         putLock.lockInterruptibly();
@@ -412,28 +403,28 @@
         final AtomicInteger count = this.count;
         if (count.get() == capacity)
             return false;
-        int c = -1;
-        Node<E> node = new Node<E>(e);
+        final int c;
+        final Node<E> node = new Node<E>(e);
         final ReentrantLock putLock = this.putLock;
         putLock.lock();
         try {
-            if (count.get() < capacity) {
-                enqueue(node);
-                c = count.getAndIncrement();
-                if (c + 1 < capacity)
-                    notFull.signal();
-            }
+            if (count.get() == capacity)
+                return false;
+            enqueue(node);
+            c = count.getAndIncrement();
+            if (c + 1 < capacity)
+                notFull.signal();
         } finally {
             putLock.unlock();
         }
         if (c == 0)
             signalNotEmpty();
-        return c >= 0;
+        return true;
     }
 
     public E take() throws InterruptedException {
-        E x;
-        int c = -1;
+        final E x;
+        final int c;
         final AtomicInteger count = this.count;
         final ReentrantLock takeLock = this.takeLock;
         takeLock.lockInterruptibly();
@@ -454,8 +445,8 @@
     }
 
     public E poll(long timeout, TimeUnit unit) throws InterruptedException {
-        E x = null;
-        int c = -1;
+        final E x;
+        final int c;
         long nanos = unit.toNanos(timeout);
         final AtomicInteger count = this.count;
         final ReentrantLock takeLock = this.takeLock;
@@ -482,17 +473,17 @@
         final AtomicInteger count = this.count;
         if (count.get() == 0)
             return null;
-        E x = null;
-        int c = -1;
+        final E x;
+        final int c;
         final ReentrantLock takeLock = this.takeLock;
         takeLock.lock();
         try {
-            if (count.get() > 0) {
-                x = dequeue();
-                c = count.getAndDecrement();
-                if (c > 1)
-                    notEmpty.signal();
-            }
+            if (count.get() == 0)
+                return null;
+            x = dequeue();
+            c = count.getAndDecrement();
+            if (c > 1)
+                notEmpty.signal();
         } finally {
             takeLock.unlock();
         }
@@ -502,6 +493,7 @@
     }
 
     public E peek() {
+        final AtomicInteger count = this.count;
         if (count.get() == 0)
             return null;
         final ReentrantLock takeLock = this.takeLock;
@@ -514,16 +506,17 @@
     }
 
     /**
-     * Unlinks interior Node p with predecessor trail.
+     * Unlinks interior Node p with predecessor pred.
      */
-    void unlink(Node<E> p, Node<E> trail) {
-        // assert isFullyLocked();
+    void unlink(Node<E> p, Node<E> pred) {
+        // assert putLock.isHeldByCurrentThread();
+        // assert takeLock.isHeldByCurrentThread();
         // p.next is not changed, to allow iterators that are
         // traversing p to maintain their weak-consistency guarantee.
         p.item = null;
-        trail.next = p.next;
+        pred.next = p.next;
         if (last == p)
-            last = trail;
+            last = pred;
         if (count.getAndDecrement() == capacity)
             notFull.signal();
     }
@@ -543,11 +536,11 @@
         if (o == null) return false;
         fullyLock();
         try {
-            for (Node<E> trail = head, p = trail.next;
+            for (Node<E> pred = head, p = pred.next;
                  p != null;
-                 trail = p, p = p.next) {
+                 pred = p, p = p.next) {
                 if (o.equals(p.item)) {
-                    unlink(p, trail);
+                    unlink(p, pred);
                     return true;
                 }
             }
@@ -701,8 +694,7 @@
      * @throws IllegalArgumentException      {@inheritDoc}
      */
     public int drainTo(Collection<? super E> c, int maxElements) {
-        if (c == null)
-            throw new NullPointerException();
+        Objects.requireNonNull(c);
         if (c == this)
             throw new IllegalArgumentException();
         if (maxElements <= 0)
@@ -741,6 +733,18 @@
     }
 
     /**
+     * Used for any element traversal that is not entirely under lock.
+     * Such traversals must handle both:
+     * - dequeued nodes (p.next == p)
+     * - (possibly multiple) interior removed nodes (p.item == null)
+     */
+    Node<E> succ(Node<E> p) {
+        if (p == (p = p.next))
+            p = head.next;
+        return p;
+    }
+
+    /**
      * Returns an iterator over the elements in this queue in proper sequence.
      * The elements will be returned in order from first (head) to last (tail).
      *
@@ -753,71 +757,103 @@
         return new Itr();
     }
 
+    /**
+     * Weakly-consistent iterator.
+     *
+     * Lazily updated ancestor field provides expected O(1) remove(),
+     * but still O(n) in the worst case, whenever the saved ancestor
+     * is concurrently deleted.
+     */
     private class Itr implements Iterator<E> {
-        /*
-         * Basic weakly-consistent iterator.  At all times hold the next
-         * item to hand out so that if hasNext() reports true, we will
-         * still have it to return even if lost race with a take etc.
-         */
-
-        private Node<E> current;
+        private Node<E> next;           // Node holding nextItem
+        private E nextItem;             // next item to hand out
         private Node<E> lastRet;
-        private E currentElement;
+        private Node<E> ancestor;       // Helps unlink lastRet on remove()
 
         Itr() {
             fullyLock();
             try {
-                current = head.next;
-                if (current != null)
-                    currentElement = current.item;
+                if ((next = head.next) != null)
+                    nextItem = next.item;
             } finally {
                 fullyUnlock();
             }
         }
 
         public boolean hasNext() {
-            return current != null;
+            return next != null;
         }
 
         public E next() {
+            Node<E> p;
+            if ((p = next) == null)
+                throw new NoSuchElementException();
+            lastRet = p;
+            E x = nextItem;
             fullyLock();
             try {
-                if (current == null)
-                    throw new NoSuchElementException();
-                lastRet = current;
-                E item = null;
-                // Unlike other traversal methods, iterators must handle both:
-                // - dequeued nodes (p.next == p)
-                // - (possibly multiple) interior removed nodes (p.item == null)
-                for (Node<E> p = current, q;; p = q) {
-                    if ((q = p.next) == p)
-                        q = head.next;
-                    if (q == null || (item = q.item) != null) {
-                        current = q;
-                        E x = currentElement;
-                        currentElement = item;
-                        return x;
-                    }
-                }
+                E e = null;
+                for (p = p.next; p != null && (e = p.item) == null; )
+                    p = succ(p);
+                next = p;
+                nextItem = e;
             } finally {
                 fullyUnlock();
             }
+            return x;
+        }
+
+        public void forEachRemaining(Consumer<? super E> action) {
+            // A variant of forEachFrom
+            Objects.requireNonNull(action);
+            Node<E> p;
+            if ((p = next) == null) return;
+            lastRet = p;
+            next = null;
+            final int batchSize = 64;
+            Object[] es = null;
+            int n, len = 1;
+            do {
+                fullyLock();
+                try {
+                    if (es == null) {
+                        p = p.next;
+                        for (Node<E> q = p; q != null; q = succ(q))
+                            if (q.item != null && ++len == batchSize)
+                                break;
+                        es = new Object[len];
+                        es[0] = nextItem;
+                        nextItem = null;
+                        n = 1;
+                    } else
+                        n = 0;
+                    for (; p != null && n < len; p = succ(p))
+                        if ((es[n] = p.item) != null) {
+                            lastRet = p;
+                            n++;
+                        }
+                } finally {
+                    fullyUnlock();
+                }
+                for (int i = 0; i < n; i++) {
+                    @SuppressWarnings("unchecked") E e = (E) es[i];
+                    action.accept(e);
+                }
+            } while (n > 0 && p != null);
         }
 
         public void remove() {
-            if (lastRet == null)
+            Node<E> p = lastRet;
+            if (p == null)
                 throw new IllegalStateException();
+            lastRet = null;
             fullyLock();
             try {
-                Node<E> node = lastRet;
-                lastRet = null;
-                for (Node<E> trail = head, p = trail.next;
-                     p != null;
-                     trail = p, p = p.next) {
-                    if (p == node) {
-                        unlink(p, trail);
-                        break;
-                    }
+                if (p.item != null) {
+                    if (ancestor == null)
+                        ancestor = head;
+                    ancestor = findPred(p, ancestor);
+                    unlink(p, ancestor);
                 }
             } finally {
                 fullyUnlock();
@@ -825,42 +861,38 @@
         }
     }
 
-    /** A customized variant of Spliterators.IteratorSpliterator */
-    static final class LBQSpliterator<E> implements Spliterator<E> {
+    /**
+     * A customized variant of Spliterators.IteratorSpliterator.
+     * Keep this class in sync with (very similar) LBDSpliterator.
+     */
+    private final class LBQSpliterator implements Spliterator<E> {
         static final int MAX_BATCH = 1 << 25;  // max batch array size;
-        final LinkedBlockingQueue<E> queue;
         Node<E> current;    // current node; null until initialized
         int batch;          // batch size for splits
         boolean exhausted;  // true when no more nodes
-        long est;           // size estimate
-        LBQSpliterator(LinkedBlockingQueue<E> queue) {
-            this.queue = queue;
-            this.est = queue.size();
-        }
+        long est = size();  // size estimate
+
+        LBQSpliterator() {}
 
         public long estimateSize() { return est; }
 
         public Spliterator<E> trySplit() {
             Node<E> h;
-            final LinkedBlockingQueue<E> q = this.queue;
-            int b = batch;
-            int n = (b <= 0) ? 1 : (b >= MAX_BATCH) ? MAX_BATCH : b + 1;
             if (!exhausted &&
-                ((h = current) != null || (h = q.head.next) != null) &&
-                h.next != null) {
+                ((h = current) != null || (h = head.next) != null)
+                && h.next != null) {
+                int n = batch = Math.min(batch + 1, MAX_BATCH);
                 Object[] a = new Object[n];
                 int i = 0;
                 Node<E> p = current;
-                q.fullyLock();
+                fullyLock();
                 try {
-                    if (p != null || (p = q.head.next) != null) {
-                        do {
+                    if (p != null || (p = head.next) != null)
+                        for (; p != null && i < n; p = succ(p))
                             if ((a[i] = p.item) != null)
-                                ++i;
-                        } while ((p = p.next) != null && i < n);
-                    }
+                                i++;
                 } finally {
-                    q.fullyUnlock();
+                    fullyUnlock();
                 }
                 if ((current = p) == null) {
                     est = 0L;
@@ -868,64 +900,32 @@
                 }
                 else if ((est -= i) < 0L)
                     est = 0L;
-                if (i > 0) {
-                    batch = i;
+                if (i > 0)
                     return Spliterators.spliterator
                         (a, 0, i, (Spliterator.ORDERED |
                                    Spliterator.NONNULL |
                                    Spliterator.CONCURRENT));
-                }
             }
             return null;
         }
 
-        public void forEachRemaining(Consumer<? super E> action) {
-            if (action == null) throw new NullPointerException();
-            final LinkedBlockingQueue<E> q = this.queue;
-            if (!exhausted) {
-                exhausted = true;
-                Node<E> p = current;
-                do {
-                    E e = null;
-                    q.fullyLock();
-                    try {
-                        if (p == null)
-                            p = q.head.next;
-                        while (p != null) {
-                            e = p.item;
-                            p = p.next;
-                            if (e != null)
-                                break;
-                        }
-                    } finally {
-                        q.fullyUnlock();
-                    }
-                    if (e != null)
-                        action.accept(e);
-                } while (p != null);
-            }
-        }
-
         public boolean tryAdvance(Consumer<? super E> action) {
-            if (action == null) throw new NullPointerException();
-            final LinkedBlockingQueue<E> q = this.queue;
+            Objects.requireNonNull(action);
             if (!exhausted) {
                 E e = null;
-                q.fullyLock();
+                fullyLock();
                 try {
-                    if (current == null)
-                        current = q.head.next;
-                    while (current != null) {
-                        e = current.item;
-                        current = current.next;
-                        if (e != null)
-                            break;
-                    }
+                    Node<E> p;
+                    if ((p = current) != null || (p = head.next) != null)
+                        do {
+                            e = p.item;
+                            p = succ(p);
+                        } while (e == null && p != null);
+                    if ((current = p) == null)
+                        exhausted = true;
                 } finally {
-                    q.fullyUnlock();
+                    fullyUnlock();
                 }
-                if (current == null)
-                    exhausted = true;
                 if (e != null) {
                     action.accept(e);
                     return true;
@@ -934,9 +934,20 @@
             return false;
         }
 
+        public void forEachRemaining(Consumer<? super E> action) {
+            Objects.requireNonNull(action);
+            if (!exhausted) {
+                exhausted = true;
+                Node<E> p = current;
+                current = null;
+                forEachFrom(action, p);
+            }
+        }
+
         public int characteristics() {
-            return Spliterator.ORDERED | Spliterator.NONNULL |
-                Spliterator.CONCURRENT;
+            return (Spliterator.ORDERED |
+                    Spliterator.NONNULL |
+                    Spliterator.CONCURRENT);
         }
     }
 
@@ -957,7 +968,140 @@
      * @since 1.8
      */
     public Spliterator<E> spliterator() {
-        return new LBQSpliterator<E>(this);
+        return new LBQSpliterator();
+    }
+
+    /**
+     * @throws NullPointerException {@inheritDoc}
+     */
+    public void forEach(Consumer<? super E> action) {
+        Objects.requireNonNull(action);
+        forEachFrom(action, null);
+    }
+
+    /**
+     * Runs action on each element found during a traversal starting at p.
+     * If p is null, traversal starts at head.
+     */
+    void forEachFrom(Consumer<? super E> action, Node<E> p) {
+        // Extract batches of elements while holding the lock; then
+        // run the action on the elements while not
+        final int batchSize = 64;       // max number of elements per batch
+        Object[] es = null;             // container for batch of elements
+        int n, len = 0;
+        do {
+            fullyLock();
+            try {
+                if (es == null) {
+                    if (p == null) p = head.next;
+                    for (Node<E> q = p; q != null; q = succ(q))
+                        if (q.item != null && ++len == batchSize)
+                            break;
+                    es = new Object[len];
+                }
+                for (n = 0; p != null && n < len; p = succ(p))
+                    if ((es[n] = p.item) != null)
+                        n++;
+            } finally {
+                fullyUnlock();
+            }
+            for (int i = 0; i < n; i++) {
+                @SuppressWarnings("unchecked") E e = (E) es[i];
+                action.accept(e);
+            }
+        } while (n > 0 && p != null);
+    }
+
+    /**
+     * @throws NullPointerException {@inheritDoc}
+     */
+    public boolean removeIf(Predicate<? super E> filter) {
+        Objects.requireNonNull(filter);
+        return bulkRemove(filter);
+    }
+
+    /**
+     * @throws NullPointerException {@inheritDoc}
+     */
+    public boolean removeAll(Collection<?> c) {
+        Objects.requireNonNull(c);
+        return bulkRemove(e -> c.contains(e));
+    }
+
+    /**
+     * @throws NullPointerException {@inheritDoc}
+     */
+    public boolean retainAll(Collection<?> c) {
+        Objects.requireNonNull(c);
+        return bulkRemove(e -> !c.contains(e));
+    }
+
+    /**
+     * Returns the predecessor of live node p, given a node that was
+     * once a live ancestor of p (or head); allows unlinking of p.
+     */
+    Node<E> findPred(Node<E> p, Node<E> ancestor) {
+        // assert p.item != null;
+        if (ancestor.item == null)
+            ancestor = head;
+        // Fails with NPE if precondition not satisfied
+        for (Node<E> q; (q = ancestor.next) != p; )
+            ancestor = q;
+        return ancestor;
+    }
+
+    /** Implementation of bulk remove methods. */
+    @SuppressWarnings("unchecked")
+    private boolean bulkRemove(Predicate<? super E> filter) {
+        boolean removed = false;
+        Node<E> p = null, ancestor = head;
+        Node<E>[] nodes = null;
+        int n, len = 0;
+        do {
+            // 1. Extract batch of up to 64 elements while holding the lock.
+            fullyLock();
+            try {
+                if (nodes == null) {  // first batch; initialize
+                    p = head.next;
+                    for (Node<E> q = p; q != null; q = succ(q))
+                        if (q.item != null && ++len == 64)
+                            break;
+                    nodes = (Node<E>[]) new Node<?>[len];
+                }
+                for (n = 0; p != null && n < len; p = succ(p))
+                    nodes[n++] = p;
+            } finally {
+                fullyUnlock();
+            }
+
+            // 2. Run the filter on the elements while lock is free.
+            long deathRow = 0L;       // "bitset" of size 64
+            for (int i = 0; i < n; i++) {
+                final E e;
+                if ((e = nodes[i].item) != null && filter.test(e))
+                    deathRow |= 1L << i;
+            }
+
+            // 3. Remove any filtered elements while holding the lock.
+            if (deathRow != 0) {
+                fullyLock();
+                try {
+                    for (int i = 0; i < n; i++) {
+                        final Node<E> q;
+                        if ((deathRow & (1L << i)) != 0L
+                            && (q = nodes[i]).item != null) {
+                            ancestor = findPred(q, ancestor);
+                            unlink(q, ancestor);
+                            removed = true;
+                        }
+                        nodes[i] = null; // help GC
+                    }
+                } finally {
+                    fullyUnlock();
+                }
+            }
+        } while (n > 0 && p != null);
+        return removed;
     }
 
     /**
diff --git a/ojluni/src/main/java/java/util/concurrent/LinkedTransferQueue.java b/ojluni/src/main/java/java/util/concurrent/LinkedTransferQueue.java
index e282b42..06d04e22 100644
--- a/ojluni/src/main/java/java/util/concurrent/LinkedTransferQueue.java
+++ b/ojluni/src/main/java/java/util/concurrent/LinkedTransferQueue.java
@@ -35,20 +35,20 @@
 
 package java.util.concurrent;
 
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.VarHandle;
 import java.util.AbstractQueue;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Iterator;
 import java.util.NoSuchElementException;
+import java.util.Objects;
 import java.util.Queue;
 import java.util.Spliterator;
 import java.util.Spliterators;
 import java.util.concurrent.locks.LockSupport;
 import java.util.function.Consumer;
-
-// BEGIN android-note
-// removed link to collections framework docs
-// END android-note
+import java.util.function.Predicate;
 
 /**
  * An unbounded {@link TransferQueue} based on linked nodes.
@@ -63,16 +63,15 @@
  * asynchronous nature of these queues, determining the current number
  * of elements requires a traversal of the elements, and so may report
  * inaccurate results if this collection is modified during traversal.
- * Additionally, the bulk operations {@code addAll},
- * {@code removeAll}, {@code retainAll}, {@code containsAll},
- * {@code equals}, and {@code toArray} are <em>not</em> guaranteed
- * to be performed atomically. For example, an iterator operating
- * concurrently with an {@code addAll} operation might view only some
- * of the added elements.
  *
- * <p>This class and its iterator implement all of the
- * <em>optional</em> methods of the {@link Collection} and {@link
- * Iterator} interfaces.
+ * <p>Bulk operations that add, remove, or examine multiple elements,
+ * such as {@link #addAll}, {@link #removeIf} or {@link #forEach},
+ * are <em>not</em> guaranteed to be performed atomically.
+ * For example, a {@code forEach} traversal concurrent with an {@code
+ * addAll} operation might observe only some of the added elements.
+ *
+ * <p>This class and its iterator implement all of the <em>optional</em>
+ * methods of the {@link Collection} and {@link Iterator} interfaces.
  *
  * <p>Memory consistency effects: As with other concurrent
  * collections, actions in a thread prior to placing an object into a
@@ -81,6 +80,10 @@
  * actions subsequent to the access or removal of that element from
  * the {@code LinkedTransferQueue} in another thread.
  *
+ * <p>This class is a member of the
+ * <a href="{@docRoot}/java.base/java/util/package-summary.html#CollectionsFramework">
+ * Java Collections Framework</a>.
+ *
  * @since 1.7
  * @author Doug Lea
  * @param <E> the type of elements held in this queue
@@ -93,8 +96,8 @@
      * *** Overview of Dual Queues with Slack ***
      *
      * Dual Queues, introduced by Scherer and Scott
-     * (http://www.cs.rice.edu/~wns1/papers/2004-DISC-DDS.pdf) are
-     * (linked) queues in which nodes may represent either data or
+     * (http://www.cs.rochester.edu/~scott/papers/2004_DISC_dual_DS.pdf)
+     * are (linked) queues in which nodes may represent either data or
      * requests.  When a thread tries to enqueue a data node, but
      * encounters a request node, it instead "matches" and removes it;
      * and vice versa for enqueuing requests. Blocking Dual Queues
@@ -156,9 +159,8 @@
      * correctly perform enqueue and dequeue operations by traversing
      * from a pointer to the initial node; CASing the item of the
      * first unmatched node on match and CASing the next field of the
-     * trailing node on appends. (Plus some special-casing when
-     * initially empty).  While this would be a terrible idea in
-     * itself, it does have the benefit of not requiring ANY atomic
+     * trailing node on appends.  While this would be a terrible idea
+     * in itself, it does have the benefit of not requiring ANY atomic
      * updates on head/tail fields.
      *
      * We introduce here an approach that lies between the extremes of
@@ -194,15 +196,15 @@
      * with a given probability per traversal step.
      *
      * In any strategy along these lines, because CASes updating
-     * fields may fail, the actual slack may exceed targeted
-     * slack. However, they may be retried at any time to maintain
-     * targets.  Even when using very small slack values, this
-     * approach works well for dual queues because it allows all
-     * operations up to the point of matching or appending an item
-     * (hence potentially allowing progress by another thread) to be
-     * read-only, thus not introducing any further contention. As
-     * described below, we implement this by performing slack
-     * maintenance retries only after these points.
+     * fields may fail, the actual slack may exceed targeted slack.
+     * However, they may be retried at any time to maintain targets.
+     * Even when using very small slack values, this approach works
+     * well for dual queues because it allows all operations up to the
+     * point of matching or appending an item (hence potentially
+     * allowing progress by another thread) to be read-only, thus not
+     * introducing any further contention.  As described below, we
+     * implement this by performing slack maintenance retries only
+     * after these points.
      *
      * As an accompaniment to such techniques, traversal overhead can
      * be further reduced without increasing contention of head
@@ -221,7 +223,7 @@
      * (Similar issues arise in non-GC environments.)  To cope with
      * this in our implementation, upon CASing to advance the head
      * pointer, we set the "next" link of the previous head to point
-     * only to itself; thus limiting the length of connected dead lists.
+     * only to itself; thus limiting the length of chains of dead nodes.
      * (We also take similar care to wipe out possibly garbage
      * retaining values held in other Node fields.)  However, doing so
      * adds some further complexity to traversal: If any "next"
@@ -264,15 +266,6 @@
      * interior nodes) except in the case of cancellation/removal (see
      * below).
      *
-     * We allow both the head and tail fields to be null before any
-     * nodes are enqueued; initializing upon first append.  This
-     * simplifies some other logic, as well as providing more
-     * efficient explicit control paths instead of letting JVMs insert
-     * implicit NullPointerExceptions when they are null.  While not
-     * currently fully implemented, we also leave open the possibility
-     * of re-nulling these fields when empty (which is complicated to
-     * arrange, for little benefit.)
-     *
      * All enqueue/dequeue operations are handled by the single method
      * "xfer" with parameters indicating whether to act as some form
      * of offer, put, poll, take, or transfer (each possibly with
@@ -280,44 +273,40 @@
      * method outweighs the code bulk and maintenance problems of
      * using separate methods for each case.
      *
-     * Operation consists of up to three phases. The first is
-     * implemented within method xfer, the second in tryAppend, and
-     * the third in method awaitMatch.
+     * Operation consists of up to two phases. The first is implemented
+     * in method xfer, the second in method awaitMatch.
      *
-     * 1. Try to match an existing node
+     * 1. Traverse until matching or appending (method xfer)
      *
-     *    Starting at head, skip already-matched nodes until finding
-     *    an unmatched node of opposite mode, if one exists, in which
-     *    case matching it and returning, also if necessary updating
-     *    head to one past the matched node (or the node itself if the
-     *    list has no other unmatched nodes). If the CAS misses, then
-     *    a loop retries advancing head by two steps until either
-     *    success or the slack is at most two. By requiring that each
-     *    attempt advances head by two (if applicable), we ensure that
-     *    the slack does not grow without bound. Traversals also check
-     *    if the initial head is now off-list, in which case they
-     *    start at the new head.
+     *    Conceptually, we simply traverse all nodes starting from head.
+     *    If we encounter an unmatched node of opposite mode, we match
+     *    it and return, also updating head (by at least 2 hops) to
+     *    one past the matched node (or the node itself if it's the
+     *    pinned trailing node).  Traversals also check for the
+     *    possibility of falling off-list, in which case they restart.
      *
-     *    If no candidates are found and the call was untimed
-     *    poll/offer, (argument "how" is NOW) return.
+     *    If the trailing node of the list is reached, a match is not
+     *    possible.  If this call was untimed poll or tryTransfer
+     *    (argument "how" is NOW), return empty-handed immediately.
+     *    Else a new node is CAS-appended.  On successful append, if
+     *    this call was ASYNC (e.g. offer), an element was
+     *    successfully added to the end of the queue and we return.
      *
-     * 2. Try to append a new node (method tryAppend)
+     *    Of course, this naive traversal is O(n) when no match is
+     *    possible.  We optimize the traversal by maintaining a tail
+     *    pointer, which is expected to be "near" the end of the list.
+     *    It is only safe to fast-forward to tail (in the presence of
+     *    arbitrary concurrent changes) if it is pointing to a node of
+     *    the same mode, even if it is dead (in this case no preceding
+     *    node could still be matchable by this traversal).  If we
+     *    need to restart due to falling off-list, we can again
+     *    fast-forward to tail, but only if it has changed since the
+     *    last traversal (else we might loop forever).  If tail cannot
+     *    be used, traversal starts at head (but in this case we
+     *    expect to be able to match near head).  As with head, we
+     *    CAS-advance the tail pointer by at least two hops.
      *
-     *    Starting at current tail pointer, find the actual last node
-     *    and try to append a new node (or if head was null, establish
-     *    the first node). Nodes can be appended only if their
-     *    predecessors are either already matched or are of the same
-     *    mode. If we detect otherwise, then a new node with opposite
-     *    mode must have been appended during traversal, so we must
-     *    restart at phase 1. The traversal and update steps are
-     *    otherwise similar to phase 1: Retrying upon CAS misses and
-     *    checking for staleness.  In particular, if a self-link is
-     *    encountered, then we can safely jump to a node on the list
-     *    by continuing the traversal at current head.
-     *
-     *    On successful append, if the call was ASYNC, return.
-     *
-     * 3. Await match or cancellation (method awaitMatch)
+     * 2. Await match or cancellation (method awaitMatch)
      *
      *    Wait for another thread to match node; instead cancelling if
      *    the current thread was interrupted or the wait timed out. On
@@ -371,12 +360,12 @@
      * from, the head of list.
      *
      * Without taking these into account, it would be possible for an
-     * unbounded number of supposedly removed nodes to remain
-     * reachable.  Situations leading to such buildup are uncommon but
-     * can occur in practice; for example when a series of short timed
-     * calls to poll repeatedly time out but never otherwise fall off
-     * the list because of an untimed call to take at the front of the
-     * queue.
+     * unbounded number of supposedly removed nodes to remain reachable.
+     * Situations leading to such buildup are uncommon but can occur
+     * in practice; for example when a series of short timed calls to
+     * poll repeatedly time out at the trailing node but otherwise
+     * never fall off the list because of an untimed call to take() at
+     * the front of the queue.
      *
      * When these cases arise, rather than always retraversing the
      * entire list to find an actual predecessor to unlink (which
@@ -389,10 +378,9 @@
      * We perform sweeps by the thread hitting threshold (rather than
      * background threads or by spreading work to other threads)
      * because in the main contexts in which removal occurs, the
-     * caller is already timed-out, cancelled, or performing a
-     * potentially O(n) operation (e.g. remove(x)), none of which are
-     * time-critical enough to warrant the overhead that alternatives
-     * would impose on other threads.
+     * caller is timed-out or cancelled, which are not time-critical
+     * enough to warrant the overhead that alternatives would impose
+     * on other threads.
      *
      * Because the sweepVotes estimate is conservative, and because
      * nodes become unlinked "naturally" as they fall off the head of
@@ -404,6 +392,13 @@
      * quiescent queues. The value defined below was chosen
      * empirically to balance these under various timeout scenarios.
      *
+     * Because traversal operations on the linked list of nodes are a
+     * natural opportunity to sweep dead nodes, we generally do so,
+     * including all the operations that might remove elements as they
+     * traverse, such as removeIf and Iterator.remove.  This largely
+     * eliminates long chains of dead interior nodes, except from
+     * cancelled or timed out blocking operations.
+     *
      * Note that we cannot self-link unlinked interior nodes during
      * sweeps. However, the associated garbage chains terminate when
      * some successor ultimately falls off the head of the list and is
@@ -444,55 +439,72 @@
 
     /**
      * Queue nodes. Uses Object, not E, for items to allow forgetting
-     * them after use.  Relies heavily on Unsafe mechanics to minimize
-     * unnecessary ordering constraints: Writes that are intrinsically
-     * ordered wrt other accesses or CASes use simple relaxed forms.
+     * them after use.  Writes that are intrinsically ordered wrt
+     * other accesses or CASes use simple relaxed forms.
      */
     static final class Node {
         final boolean isData;   // false if this is a request node
         volatile Object item;   // initially non-null if isData; CASed to match
         volatile Node next;
-        volatile Thread waiter; // null until waiting
+        volatile Thread waiter; // null when not waiting for a match
 
-        // CAS methods for fields
+        /**
+         * Constructs a data node holding item if item is non-null,
+         * else a request node.  Uses relaxed write because item can
+         * only be seen after piggy-backing publication via CAS.
+         */
+        Node(Object item) {
+            ITEM.set(this, item);
+            isData = (item != null);
+        }
+
+        /** Constructs a (matched data) dummy node. */
+        Node() {
+            isData = true;
+        }
+
         final boolean casNext(Node cmp, Node val) {
-            return U.compareAndSwapObject(this, NEXT, cmp, val);
+            // assert val != null;
+            return NEXT.compareAndSet(this, cmp, val);
         }
 
         final boolean casItem(Object cmp, Object val) {
-            // assert cmp == null || cmp.getClass() != Node.class;
-            return U.compareAndSwapObject(this, ITEM, cmp, val);
-        }
-
-        /**
-         * Constructs a new node.  Uses relaxed write because item can
-         * only be seen after publication via casNext.
-         */
-        Node(Object item, boolean isData) {
-            U.putObject(this, ITEM, item); // relaxed write
-            this.isData = isData;
+            // assert isData == (cmp != null);
+            // assert isData == (val == null);
+            // assert !(cmp instanceof Node);
+            return ITEM.compareAndSet(this, cmp, val);
         }
 
         /**
          * Links node to itself to avoid garbage retention.  Called
          * only after CASing head field, so uses relaxed write.
          */
-        final void forgetNext() {
-            U.putObject(this, NEXT, this);
+        final void selfLink() {
+            // assert isMatched();
+            NEXT.setRelease(this, this);
+        }
+
+        final void appendRelaxed(Node next) {
+            // assert next != null;
+            // assert this.next == null;
+            NEXT.set(this, next);
         }
 
         /**
-         * Sets item to self and waiter to null, to avoid garbage
-         * retention after matching or cancelling. Uses relaxed writes
-         * because order is already constrained in the only calling
-         * contexts: item is forgotten only after volatile/atomic
-         * mechanics that extract items.  Similarly, clearing waiter
-         * follows either CAS or return from park (if ever parked;
-         * else we don't care).
+         * Sets item (of a request node) to self and waiter to null,
+         * to avoid garbage retention after matching or cancelling.
+         * Uses relaxed writes because order is already constrained in
+         * the only calling contexts: item is forgotten only after
+         * volatile/atomic mechanics that extract items, and visitors
+         * of request nodes only ever check whether item is null.
+         * Similarly, clearing waiter follows either CAS or return
+         * from park (if ever parked; else we don't care).
          */
         final void forgetContents() {
-            U.putObject(this, ITEM, this);
-            U.putObject(this, WAITER, null);
+            // assert isMatched();
+            if (!isData)
+                ITEM.set(this, this);
+            WAITER.set(this, null);
         }
 
         /**
@@ -500,15 +512,16 @@
          * case of artificial matches due to cancellation.
          */
         final boolean isMatched() {
-            Object x = item;
-            return (x == this) || ((x == null) == isData);
+            return isData == (item == null);
         }
 
-        /**
-         * Returns true if this is an unmatched request node.
-         */
-        final boolean isUnmatchedRequest() {
-            return !isData && item == null;
+        /** Tries to CAS-match this node; if successful, wakes waiter. */
+        final boolean tryMatch(Object cmp, Object val) {
+            if (casItem(cmp, val)) {
+                LockSupport.unpark(waiter);
+                return true;
+            }
+            return false;
         }
 
         /**
@@ -518,69 +531,118 @@
          */
         final boolean cannotPrecede(boolean haveData) {
             boolean d = isData;
-            Object x;
-            return d != haveData && (x = item) != this && (x != null) == d;
-        }
-
-        /**
-         * Tries to artificially match a data node -- used by remove.
-         */
-        final boolean tryMatchData() {
-            // assert isData;
-            Object x = item;
-            if (x != null && x != this && casItem(x, null)) {
-                LockSupport.unpark(waiter);
-                return true;
-            }
-            return false;
+            return d != haveData && d != (item == null);
         }
 
         private static final long serialVersionUID = -3375979862319811754L;
-
-        // Unsafe mechanics
-        private static final sun.misc.Unsafe U = sun.misc.Unsafe.getUnsafe();
-        private static final long ITEM;
-        private static final long NEXT;
-        private static final long WAITER;
-        static {
-            try {
-                ITEM = U.objectFieldOffset
-                    (Node.class.getDeclaredField("item"));
-                NEXT = U.objectFieldOffset
-                    (Node.class.getDeclaredField("next"));
-                WAITER = U.objectFieldOffset
-                    (Node.class.getDeclaredField("waiter"));
-            } catch (ReflectiveOperationException e) {
-                throw new Error(e);
-            }
-        }
     }
 
-    /** head of the queue; null until first enqueue */
+    /**
+     * A node from which the first live (non-matched) node (if any)
+     * can be reached in O(1) time.
+     * Invariants:
+     * - all live nodes are reachable from head via .next
+     * - head != null
+     * - (tmp = head).next != tmp || tmp != head
+     * Non-invariants:
+     * - head may or may not be live
+     * - it is permitted for tail to lag behind head, that is, for tail
+     *   to not be reachable from head!
+     */
     transient volatile Node head;
 
-    /** tail of the queue; null until first append */
+    /**
+     * A node from which the last node on list (that is, the unique
+     * node with node.next == null) can be reached in O(1) time.
+     * Invariants:
+     * - the last node is always reachable from tail via .next
+     * - tail != null
+     * Non-invariants:
+     * - tail may or may not be live
+     * - it is permitted for tail to lag behind head, that is, for tail
+     *   to not be reachable from head!
+     * - tail.next may or may not be self-linked.
+     */
     private transient volatile Node tail;
 
-    /** The number of apparent failures to unsplice removed nodes */
+    /** The number of apparent failures to unsplice cancelled nodes */
     private transient volatile int sweepVotes;
 
-    // CAS methods for fields
     private boolean casTail(Node cmp, Node val) {
-        return U.compareAndSwapObject(this, TAIL, cmp, val);
+        // assert cmp != null;
+        // assert val != null;
+        return TAIL.compareAndSet(this, cmp, val);
     }
 
     private boolean casHead(Node cmp, Node val) {
-        return U.compareAndSwapObject(this, HEAD, cmp, val);
+        return HEAD.compareAndSet(this, cmp, val);
     }
 
-    private boolean casSweepVotes(int cmp, int val) {
-        return U.compareAndSwapInt(this, SWEEPVOTES, cmp, val);
+    /** Atomic version of ++sweepVotes. */
+    private int incSweepVotes() {
+        return (int) SWEEPVOTES.getAndAdd(this, 1) + 1;
     }
 
-    /*
-     * Possible values for "how" argument in xfer method.
+    /**
+     * Tries to CAS pred.next (or head, if pred is null) from c to p.
+     * Caller must ensure that we're not unlinking the trailing node.
      */
+    private boolean tryCasSuccessor(Node pred, Node c, Node p) {
+        // assert p != null;
+        // assert c.isData != (c.item != null);
+        // assert c != p;
+        if (pred != null)
+            return pred.casNext(c, p);
+        if (casHead(c, p)) {
+            c.selfLink();
+            return true;
+        }
+        return false;
+    }
+
+    /**
+     * Collapses dead (matched) nodes between pred and q.
+     * @param pred the last known live node, or null if none
+     * @param c the first dead node
+     * @param p the last dead node
+     * @param q p.next: the next live node, or null if at end
+     * @return pred if pred still alive and CAS succeeded; else p
+     */
+    private Node skipDeadNodes(Node pred, Node c, Node p, Node q) {
+        // assert pred != c;
+        // assert p != q;
+        // assert c.isMatched();
+        // assert p.isMatched();
+        if (q == null) {
+            // Never unlink trailing node.
+            if (c == p) return pred;
+            q = p;
+        }
+        return (tryCasSuccessor(pred, c, q)
+                && (pred == null || !pred.isMatched()))
+            ? pred : p;
+    }
+
+    /**
+     * Collapses dead (matched) nodes from h (which was once head) to p.
+     * Caller ensures all nodes from h up to and including p are dead.
+     */
+    private void skipDeadNodesNearHead(Node h, Node p) {
+        // assert h != null;
+        // assert h != p;
+        // assert p.isMatched();
+        for (;;) {
+            final Node q;
+            if ((q = p.next) == null) break;
+            else if (!q.isMatched()) { p = q; break; }
+            else if (p == (p = q)) return;
+        }
+        if (casHead(h, p))
+            h.selfLink();
+    }
+
+    /* Possible values for "how" argument in xfer method. */
+
     private static final int NOW   = 0; // for untimed poll, tryTransfer
     private static final int ASYNC = 1; // for offer, put, add
     private static final int SYNC  = 2; // for transfer, take
@@ -596,84 +658,32 @@
      * @return an item if matched, else e
      * @throws NullPointerException if haveData mode but e is null
      */
+    @SuppressWarnings("unchecked")
     private E xfer(E e, boolean haveData, int how, long nanos) {
         if (haveData && (e == null))
             throw new NullPointerException();
-        Node s = null;                        // the node to append, if needed
 
-        retry:
-        for (;;) {                            // restart on append race
-
-            for (Node h = head, p = h; p != null;) { // find & match first node
-                boolean isData = p.isData;
-                Object item = p.item;
-                if (item != p && (item != null) == isData) { // unmatched
-                    if (isData == haveData)   // can't match
-                        break;
-                    if (p.casItem(item, e)) { // match
-                        for (Node q = p; q != h;) {
-                            Node n = q.next;  // update by 2 unless singleton
-                            if (head == h && casHead(h, n == null ? q : n)) {
-                                h.forgetNext();
-                                break;
-                            }                 // advance and retry
-                            if ((h = head)   == null ||
-                                (q = h.next) == null || !q.isMatched())
-                                break;        // unless slack < 2
-                        }
-                        LockSupport.unpark(p.waiter);
-                        @SuppressWarnings("unchecked") E itemE = (E) item;
-                        return itemE;
+        restart: for (Node s = null, t = null, h = null;;) {
+            for (Node p = (t != (t = tail) && t.isData == haveData) ? t
+                     : (h = head);; ) {
+                final Node q; final Object item;
+                if (p.isData != haveData
+                    && haveData == ((item = p.item) == null)) {
+                    if (h == null) h = head;
+                    if (p.tryMatch(item, e)) {
+                        if (h != p) skipDeadNodesNearHead(h, p);
+                        return (E) item;
                     }
                 }
-                Node n = p.next;
-                p = (p != n) ? n : (h = head); // Use head if p offlist
-            }
-
-            if (how != NOW) {                 // No matches available
-                if (s == null)
-                    s = new Node(e, haveData);
-                Node pred = tryAppend(s, haveData);
-                if (pred == null)
-                    continue retry;           // lost race vs opposite mode
-                if (how != ASYNC)
-                    return awaitMatch(s, pred, e, (how == TIMED), nanos);
-            }
-            return e; // not waiting
-        }
-    }
-
-    /**
-     * Tries to append node s as tail.
-     *
-     * @param s the node to append
-     * @param haveData true if appending in data mode
-     * @return null on failure due to losing race with append in
-     * different mode, else s's predecessor, or s itself if no
-     * predecessor
-     */
-    private Node tryAppend(Node s, boolean haveData) {
-        for (Node t = tail, p = t;;) {        // move p to last node and append
-            Node n, u;                        // temps for reads of next & tail
-            if (p == null && (p = head) == null) {
-                if (casHead(null, s))
-                    return s;                 // initialize
-            }
-            else if (p.cannotPrecede(haveData))
-                return null;                  // lost race vs opposite mode
-            else if ((n = p.next) != null)    // not last; keep traversing
-                p = p != t && t != (u = tail) ? (t = u) : // stale tail
-                    (p != n) ? n : null;      // restart if off list
-            else if (!p.casNext(null, s))
-                p = p.next;                   // re-read on CAS failure
-            else {
-                if (p != t) {                 // update if slack now >= 2
-                    while ((tail != t || !casTail(t, s)) &&
-                           (t = tail)   != null &&
-                           (s = t.next) != null && // advance and retry
-                           (s = s.next) != null && s != t);
+                if ((q = p.next) == null) {
+                    if (how == NOW) return e;
+                    if (s == null) s = new Node(e);
+                    if (!p.casNext(null, s)) continue;
+                    if (p != t) casTail(t, s);
+                    if (how == ASYNC) return e;
+                    return awaitMatch(s, p, e, (how == TIMED), nanos);
                 }
-                return p;
+                if (p == (p = q)) continue restart;
             }
         }
     }
@@ -682,9 +692,9 @@
      * Spins/yields/blocks until node s is matched or caller gives up.
      *
      * @param s the waiting node
-     * @param pred the predecessor of s, or s itself if it has no
-     * predecessor, or null if unknown (the null case does not occur
-     * in any current calls but may in possible future extensions)
+     * @param pred the predecessor of s, or null if unknown (the null
+     * case does not occur in any current calls but may in possible
+     * future extensions)
      * @param e the comparison value for checking match
      * @param timed if true, wait only until timeout elapses
      * @param nanos timeout in nanosecs, used only if timed is true
@@ -697,17 +707,20 @@
         ThreadLocalRandom randomYields = null; // bound if needed
 
         for (;;) {
-            Object item = s.item;
-            if (item != e) {                  // matched
+            final Object item;
+            if ((item = s.item) != e) {       // matched
                 // assert item != s;
                 s.forgetContents();           // avoid garbage
                 @SuppressWarnings("unchecked") E itemE = (E) item;
                 return itemE;
             }
             else if (w.isInterrupted() || (timed && nanos <= 0L)) {
-                unsplice(pred, s);           // try to unlink and cancel
-                if (s.casItem(e, s))         // return normally if lost CAS
+                // try to cancel and unlink
+                if (s.casItem(e, s.isData ? null : s)) {
+                    unsplice(pred, s);
                     return e;
+                }
+                // return normally if lost CAS
             }
             else if (spins < 0) {            // establish spins at/near front
                 if ((spins = spinsFor(pred, s.isData)) > 0)
@@ -751,34 +764,32 @@
     /* -------------- Traversal methods -------------- */
 
     /**
-     * Returns the successor of p, or the head node if p.next has been
-     * linked to self, which will only be true if traversing with a
-     * stale pointer that is now off the list.
-     */
-    final Node succ(Node p) {
-        Node next = p.next;
-        return (p == next) ? head : next;
-    }
-
-    /**
      * Returns the first unmatched data node, or null if none.
-     * Callers must recheck if the returned node's item field is null
-     * or self-linked before using.
+     * Callers must recheck if the returned node is unmatched
+     * before using.
      */
     final Node firstDataNode() {
+        Node first = null;
         restartFromHead: for (;;) {
-            for (Node p = head; p != null;) {
-                Object item = p.item;
-                if (p.isData) {
-                    if (item != null && item != p)
-                        return p;
+            Node h = head, p = h;
+            while (p != null) {
+                if (p.item != null) {
+                    if (p.isData) {
+                        first = p;
+                        break;
+                    }
                 }
-                else if (item == null)
+                else if (!p.isData)
                     break;
-                if (p == (p = p.next))
+                final Node q;
+                if ((q = p.next) == null)
+                    break;
+                if (p == (p = q))
                     continue restartFromHead;
             }
-            return null;
+            if (p != h && casHead(h, p))
+                h.selfLink();
+            return first;
         }
     }
 
@@ -811,7 +822,7 @@
             for (Node p = head; p != null;) {
                 Object item = p.item;
                 if (p.isData) {
-                    if (item != null && item != p) {
+                    if (item != null) {
                         if (a == null)
                             a = new String[4];
                         else if (size == a.length)
@@ -840,7 +851,7 @@
             for (Node p = head; p != null;) {
                 Object item = p.item;
                 if (p.isData) {
-                    if (item != null && item != p) {
+                    if (item != null) {
                         if (x == null)
                             x = new Object[4];
                         else if (size == x.length)
@@ -919,76 +930,50 @@
      */
     @SuppressWarnings("unchecked")
     public <T> T[] toArray(T[] a) {
-        if (a == null) throw new NullPointerException();
+        Objects.requireNonNull(a);
         return (T[]) toArrayInternal(a);
     }
 
+    /**
+     * Weakly-consistent iterator.
+     *
+     * Lazily updated ancestor is expected to be amortized O(1) remove(),
+     * but O(n) in the worst case, when lastRet is concurrently deleted.
+     */
     final class Itr implements Iterator<E> {
         private Node nextNode;   // next node to return item for
         private E nextItem;      // the corresponding item
         private Node lastRet;    // last returned node, to support remove
-        private Node lastPred;   // predecessor to unlink lastRet
+        private Node ancestor;   // Helps unlink lastRet on remove()
 
         /**
-         * Moves to next node after prev, or first node if prev null.
+         * Moves to next node after pred, or first node if pred null.
          */
-        private void advance(Node prev) {
-            /*
-             * To track and avoid buildup of deleted nodes in the face
-             * of calls to both Queue.remove and Itr.remove, we must
-             * include variants of unsplice and sweep upon each
-             * advance: Upon Itr.remove, we may need to catch up links
-             * from lastPred, and upon other removes, we might need to
-             * skip ahead from stale nodes and unsplice deleted ones
-             * found while advancing.
-             */
-
-            Node r, b; // reset lastPred upon possible deletion of lastRet
-            if ((r = lastRet) != null && !r.isMatched())
-                lastPred = r;    // next lastPred is old lastRet
-            else if ((b = lastPred) == null || b.isMatched())
-                lastPred = null; // at start of list
-            else {
-                Node s, n;       // help with removal of lastPred.next
-                while ((s = b.next) != null &&
-                       s != b && s.isMatched() &&
-                       (n = s.next) != null && n != s)
-                    b.casNext(s, n);
-            }
-
-            this.lastRet = prev;
-
-            for (Node p = prev, s, n;;) {
-                s = (p == null) ? head : p.next;
-                if (s == null)
-                    break;
-                else if (s == p) {
-                    p = null;
-                    continue;
+        @SuppressWarnings("unchecked")
+        private void advance(Node pred) {
+            for (Node p = (pred == null) ? head : pred.next, c = p;
+                 p != null; ) {
+                final Object item;
+                if ((item = p.item) != null && p.isData) {
+                    nextNode = p;
+                    nextItem = (E) item;
+                    if (c != p)
+                        tryCasSuccessor(pred, c, p);
+                    return;
                 }
-                Object item = s.item;
-                if (s.isData) {
-                    if (item != null && item != s) {
-                        @SuppressWarnings("unchecked") E itemE = (E) item;
-                        nextItem = itemE;
-                        nextNode = s;
-                        return;
-                    }
+                else if (!p.isData && item == null)
+                    break;
+                if (c != p && !tryCasSuccessor(pred, c, c = p)) {
+                    pred = p;
+                    c = p = p.next;
                 }
-                else if (item == null)
-                    break;
-                // assert s.isMatched();
-                if (p == null)
-                    p = s;
-                else if ((n = s.next) == null)
-                    break;
-                else if (s == n)
-                    p = null;
-                else
-                    p.casNext(s, n);
+                else if (p == (p = p.next)) {
+                    pred = null;
+                    c = p = head;
+                }
             }
-            nextNode = null;
             nextItem = null;
+            nextNode = null;
         }
 
         Itr() {
@@ -1000,25 +985,67 @@
         }
 
         public final E next() {
-            Node p = nextNode;
-            if (p == null) throw new NoSuchElementException();
+            final Node p;
+            if ((p = nextNode) == null) throw new NoSuchElementException();
             E e = nextItem;
-            advance(p);
+            advance(lastRet = p);
             return e;
         }
 
+        public void forEachRemaining(Consumer<? super E> action) {
+            Objects.requireNonNull(action);
+            Node q = null;
+            for (Node p; (p = nextNode) != null; advance(q = p))
+                action.accept(nextItem);
+            if (q != null)
+                lastRet = q;
+        }
+
         public final void remove() {
             final Node lastRet = this.lastRet;
             if (lastRet == null)
                 throw new IllegalStateException();
             this.lastRet = null;
-            if (lastRet.tryMatchData())
-                unsplice(lastPred, lastRet);
+            if (lastRet.item == null)   // already deleted?
+                return;
+            // Advance ancestor, collapsing intervening dead nodes
+            Node pred = ancestor;
+            for (Node p = (pred == null) ? head : pred.next, c = p, q;
+                 p != null; ) {
+                if (p == lastRet) {
+                    final Object item;
+                    if ((item = p.item) != null)
+                        p.tryMatch(item, null);
+                    if ((q = p.next) == null) q = p;
+                    if (c != q) tryCasSuccessor(pred, c, q);
+                    ancestor = pred;
+                    return;
+                }
+                final Object item; final boolean pAlive;
+                if (pAlive = ((item = p.item) != null && p.isData)) {
+                    // exceptionally, nothing to do
+                }
+                else if (!p.isData && item == null)
+                    break;
+                if ((c != p && !tryCasSuccessor(pred, c, c = p)) || pAlive) {
+                    pred = p;
+                    c = p = p.next;
+                }
+                else if (p == (p = p.next)) {
+                    pred = null;
+                    c = p = head;
+                }
+            }
+            // traversal failed to find lastRet; must have been deleted;
+            // leave ancestor at original location to avoid overshoot;
+            // better luck next time!
+
+            // assert lastRet.isMatched();
         }
     }
 
     /** A customized variant of Spliterators.IteratorSpliterator */
-    final class LTQSpliterator<E> implements Spliterator<E> {
+    final class LTQSpliterator implements Spliterator<E> {
         static final int MAX_BATCH = 1 << 25;  // max batch array size;
         Node current;       // current node; null until initialized
         int batch;          // batch size for splits
@@ -1026,79 +1053,90 @@
         LTQSpliterator() {}
 
         public Spliterator<E> trySplit() {
-            Node p;
-            int b = batch;
-            int n = (b <= 0) ? 1 : (b >= MAX_BATCH) ? MAX_BATCH : b + 1;
-            if (!exhausted &&
-                ((p = current) != null || (p = firstDataNode()) != null) &&
-                p.next != null) {
-                Object[] a = new Object[n];
-                int i = 0;
-                do {
-                    Object e = p.item;
-                    if (e != p && (a[i] = e) != null)
-                        ++i;
-                    if (p == (p = p.next))
-                        p = firstDataNode();
-                } while (p != null && i < n && p.isData);
-                if ((current = p) == null)
-                    exhausted = true;
-                if (i > 0) {
-                    batch = i;
-                    return Spliterators.spliterator
-                        (a, 0, i, (Spliterator.ORDERED |
-                                   Spliterator.NONNULL |
-                                   Spliterator.CONCURRENT));
+            Node p, q;
+            if ((p = current()) == null || (q = p.next) == null)
+                return null;
+            int i = 0, n = batch = Math.min(batch + 1, MAX_BATCH);
+            Object[] a = null;
+            do {
+                final Object item = p.item;
+                if (p.isData) {
+                    if (item != null) {
+                        if (a == null)
+                            a = new Object[n];
+                        a[i++] = item;
+                    }
+                } else if (item == null) {
+                    p = null;
+                    break;
                 }
-            }
-            return null;
+                if (p == (p = q))
+                    p = firstDataNode();
+            } while (p != null && (q = p.next) != null && i < n);
+            setCurrent(p);
+            return (i == 0) ? null :
+                Spliterators.spliterator(a, 0, i, (Spliterator.ORDERED |
+                                                   Spliterator.NONNULL |
+                                                   Spliterator.CONCURRENT));
         }
 
-        @SuppressWarnings("unchecked")
         public void forEachRemaining(Consumer<? super E> action) {
-            Node p;
-            if (action == null) throw new NullPointerException();
-            if (!exhausted &&
-                ((p = current) != null || (p = firstDataNode()) != null)) {
+            Objects.requireNonNull(action);
+            final Node p;
+            if ((p = current()) != null) {
+                current = null;
                 exhausted = true;
-                do {
-                    Object e = p.item;
-                    if (e != null && e != p)
-                        action.accept((E)e);
-                    if (p == (p = p.next))
-                        p = firstDataNode();
-                } while (p != null && p.isData);
+                forEachFrom(action, p);
             }
         }
 
         @SuppressWarnings("unchecked")
         public boolean tryAdvance(Consumer<? super E> action) {
+            Objects.requireNonNull(action);
             Node p;
-            if (action == null) throw new NullPointerException();
-            if (!exhausted &&
-                ((p = current) != null || (p = firstDataNode()) != null)) {
-                Object e;
+            if ((p = current()) != null) {
+                E e = null;
                 do {
-                    if ((e = p.item) == p)
-                        e = null;
+                    final Object item = p.item;
+                    final boolean isData = p.isData;
                     if (p == (p = p.next))
-                        p = firstDataNode();
-                } while (e == null && p != null && p.isData);
-                if ((current = p) == null)
-                    exhausted = true;
+                        p = head;
+                    if (isData) {
+                        if (item != null) {
+                            e = (E) item;
+                            break;
+                        }
+                    }
+                    else if (item == null)
+                        p = null;
+                } while (p != null);
+                setCurrent(p);
                 if (e != null) {
-                    action.accept((E)e);
+                    action.accept(e);
                     return true;
                 }
             }
             return false;
         }
 
+        private void setCurrent(Node p) {
+            if ((current = p) == null)
+                exhausted = true;
+        }
+
+        private Node current() {
+            Node p;
+            if ((p = current) == null && !exhausted)
+                setCurrent(p = firstDataNode());
+            return p;
+        }
+
         public long estimateSize() { return Long.MAX_VALUE; }
 
         public int characteristics() {
-            return Spliterator.ORDERED | Spliterator.NONNULL |
-                Spliterator.CONCURRENT;
+            return (Spliterator.ORDERED |
+                    Spliterator.NONNULL |
+                    Spliterator.CONCURRENT);
         }
     }
 
@@ -1119,7 +1157,7 @@
      * @since 1.8
      */
     public Spliterator<E> spliterator() {
-        return new LTQSpliterator<E>();
+        return new LTQSpliterator();
     }
 
     /* -------------- Removal methods -------------- */
@@ -1129,10 +1167,15 @@
      * the given predecessor.
      *
      * @param pred a node that was at one time known to be the
-     * predecessor of s, or null or s itself if s is/was at head
+     * predecessor of s
      * @param s the node to be unspliced
      */
     final void unsplice(Node pred, Node s) {
+        // assert pred != null;
+        // assert pred != s;
+        // assert s != null;
+        // assert s.isMatched();
+        // assert (SWEEP_THRESHOLD & (SWEEP_THRESHOLD - 1)) == 0;
         s.waiter = null; // disable signals
         /*
          * See above for rationale. Briefly: if pred still points to
@@ -1141,13 +1184,13 @@
          * nor s are head or offlist, add to sweepVotes, and if enough
          * votes have accumulated, sweep.
          */
-        if (pred != null && pred != s && pred.next == s) {
+        if (pred != null && pred.next == s) {
             Node n = s.next;
             if (n == null ||
                 (n != s && pred.casNext(s, n) && pred.isMatched())) {
                 for (;;) {               // check if at, or could be, head
                     Node h = head;
-                    if (h == pred || h == s || h == null)
+                    if (h == pred || h == s)
                         return;          // at head or list empty
                     if (!h.isMatched())
                         break;
@@ -1155,21 +1198,12 @@
                     if (hn == null)
                         return;          // now empty
                     if (hn != h && casHead(h, hn))
-                        h.forgetNext();  // advance head
+                        h.selfLink();  // advance head
                 }
-                if (pred.next != pred && s.next != s) { // recheck if offlist
-                    for (;;) {           // sweep now if enough votes
-                        int v = sweepVotes;
-                        if (v < SWEEP_THRESHOLD) {
-                            if (casSweepVotes(v, v + 1))
-                                break;
-                        }
-                        else if (casSweepVotes(v, 0)) {
-                            sweep();
-                            break;
-                        }
-                    }
-                }
+                // sweep every SWEEP_THRESHOLD votes
+                if (pred.next != pred && s.next != s // recheck if offlist
+                    && (incSweepVotes() & (SWEEP_THRESHOLD - 1)) == 0)
+                    sweep();
             }
         }
     }
@@ -1194,35 +1228,10 @@
     }
 
     /**
-     * Main implementation of remove(Object)
-     */
-    private boolean findAndRemove(Object e) {
-        if (e != null) {
-            for (Node pred = null, p = head; p != null; ) {
-                Object item = p.item;
-                if (p.isData) {
-                    if (item != null && item != p && e.equals(item) &&
-                        p.tryMatchData()) {
-                        unsplice(pred, p);
-                        return true;
-                    }
-                }
-                else if (item == null)
-                    break;
-                pred = p;
-                if ((p = p.next) == pred) { // stale
-                    pred = null;
-                    p = head;
-                }
-            }
-        }
-        return false;
-    }
-
-    /**
      * Creates an initially empty {@code LinkedTransferQueue}.
      */
     public LinkedTransferQueue() {
+        head = tail = new Node();
     }
 
     /**
@@ -1235,8 +1244,18 @@
      *         of its elements are null
      */
     public LinkedTransferQueue(Collection<? extends E> c) {
-        this();
-        addAll(c);
+        Node h = null, t = null;
+        for (E e : c) {
+            Node newNode = new Node(Objects.requireNonNull(e));
+            if (h == null)
+                h = t = newNode;
+            else
+                t.appendRelaxed(t = newNode);
+        }
+        if (h == null)
+            h = t = new Node();
+        head = h;
+        tail = t;
     }
 
     /**
@@ -1255,8 +1274,7 @@
      * return {@code false}.
      *
      * @return {@code true} (as specified by
-     *  {@link java.util.concurrent.BlockingQueue#offer(Object,long,TimeUnit)
-     *  BlockingQueue.offer})
+     *  {@link BlockingQueue#offer(Object,long,TimeUnit) BlockingQueue.offer})
      * @throws NullPointerException if the specified element is null
      */
     public boolean offer(E e, long timeout, TimeUnit unit) {
@@ -1368,15 +1386,12 @@
      * @throws IllegalArgumentException {@inheritDoc}
      */
     public int drainTo(Collection<? super E> c) {
-        if (c == null)
-            throw new NullPointerException();
+        Objects.requireNonNull(c);
         if (c == this)
             throw new IllegalArgumentException();
         int n = 0;
-        for (E e; (e = poll()) != null;) {
+        for (E e; (e = poll()) != null; n++)
             c.add(e);
-            ++n;
-        }
         return n;
     }
 
@@ -1385,15 +1400,12 @@
      * @throws IllegalArgumentException {@inheritDoc}
      */
     public int drainTo(Collection<? super E> c, int maxElements) {
-        if (c == null)
-            throw new NullPointerException();
+        Objects.requireNonNull(c);
         if (c == this)
             throw new IllegalArgumentException();
         int n = 0;
-        for (E e; n < maxElements && (e = poll()) != null;) {
+        for (E e; n < maxElements && (e = poll()) != null; n++)
             c.add(e);
-            ++n;
-        }
         return n;
     }
 
@@ -1415,7 +1427,7 @@
             for (Node p = head; p != null;) {
                 Object item = p.item;
                 if (p.isData) {
-                    if (item != null && item != p) {
+                    if (item != null) {
                         @SuppressWarnings("unchecked") E e = (E) item;
                         return e;
                     }
@@ -1443,7 +1455,7 @@
             for (Node p = head; p != null;) {
                 Object item = p.item;
                 if (p.isData) {
-                    if (item != null && item != p)
+                    if (item != null)
                         break;
                 }
                 else if (item == null)
@@ -1487,7 +1499,31 @@
      * @return {@code true} if this queue changed as a result of the call
      */
     public boolean remove(Object o) {
-        return findAndRemove(o);
+        if (o == null) return false;
+        restartFromHead: for (;;) {
+            for (Node p = head, pred = null; p != null; ) {
+                Node q = p.next;
+                final Object item;
+                if ((item = p.item) != null) {
+                    if (p.isData) {
+                        if (o.equals(item) && p.tryMatch(item, null)) {
+                            skipDeadNodes(pred, p, p, q);
+                            return true;
+                        }
+                        pred = p; p = q; continue;
+                    }
+                }
+                else if (!p.isData)
+                    break;
+                for (Node c = p;; q = p.next) {
+                    if (q == null || !q.isMatched()) {
+                        pred = skipDeadNodes(pred, c, p, q); p = q; break;
+                    }
+                    if (p == (p = q)) continue restartFromHead;
+                }
+            }
+            return false;
+        }
     }
 
     /**
@@ -1499,18 +1535,29 @@
      * @return {@code true} if this queue contains the specified element
      */
     public boolean contains(Object o) {
-        if (o != null) {
-            for (Node p = head; p != null; p = succ(p)) {
-                Object item = p.item;
-                if (p.isData) {
-                    if (item != null && item != p && o.equals(item))
-                        return true;
+        if (o == null) return false;
+        restartFromHead: for (;;) {
+            for (Node p = head, pred = null; p != null; ) {
+                Node q = p.next;
+                final Object item;
+                if ((item = p.item) != null) {
+                    if (p.isData) {
+                        if (o.equals(item))
+                            return true;
+                        pred = p; p = q; continue;
+                    }
                 }
-                else if (item == null)
+                else if (!p.isData)
                     break;
+                for (Node c = p;; q = p.next) {
+                    if (q == null || !q.isMatched()) {
+                        pred = skipDeadNodes(pred, c, p, q); p = q; break;
+                    }
+                    if (p == (p = q)) continue restartFromHead;
+                }
             }
+            return false;
         }
-        return false;
     }
 
     /**
@@ -1518,8 +1565,7 @@
      * {@code LinkedTransferQueue} is not capacity constrained.
      *
      * @return {@code Integer.MAX_VALUE} (as specified by
-     *         {@link java.util.concurrent.BlockingQueue#remainingCapacity()
-     *         BlockingQueue.remainingCapacity})
+     *         {@link BlockingQueue#remainingCapacity()})
      */
     public int remainingCapacity() {
         return Integer.MAX_VALUE;
@@ -1551,33 +1597,149 @@
      */
     private void readObject(java.io.ObjectInputStream s)
         throws java.io.IOException, ClassNotFoundException {
-        s.defaultReadObject();
-        for (;;) {
-            @SuppressWarnings("unchecked")
-            E item = (E) s.readObject();
-            if (item == null)
-                break;
+
+        // Read in elements until trailing null sentinel found
+        Node h = null, t = null;
+        for (Object item; (item = s.readObject()) != null; ) {
+            Node newNode = new Node(item);
+            if (h == null)
+                h = t = newNode;
             else
-                offer(item);
+                t.appendRelaxed(t = newNode);
+        }
+        if (h == null)
+            h = t = new Node();
+        head = h;
+        tail = t;
+    }
+
+    /**
+     * @throws NullPointerException {@inheritDoc}
+     */
+    public boolean removeIf(Predicate<? super E> filter) {
+        Objects.requireNonNull(filter);
+        return bulkRemove(filter);
+    }
+
+    /**
+     * @throws NullPointerException {@inheritDoc}
+     */
+    public boolean removeAll(Collection<?> c) {
+        Objects.requireNonNull(c);
+        return bulkRemove(e -> c.contains(e));
+    }
+
+    /**
+     * @throws NullPointerException {@inheritDoc}
+     */
+    public boolean retainAll(Collection<?> c) {
+        Objects.requireNonNull(c);
+        return bulkRemove(e -> !c.contains(e));
+    }
+
+    public void clear() {
+        bulkRemove(e -> true);
+    }
+
+    /**
+     * Tolerate this many consecutive dead nodes before CAS-collapsing.
+     * Amortized cost of clear() is (1 + 1/MAX_HOPS) CASes per element.
+     */
+    private static final int MAX_HOPS = 8;
+
+    /** Implementation of bulk remove methods. */
+    @SuppressWarnings("unchecked")
+    private boolean bulkRemove(Predicate<? super E> filter) {
+        boolean removed = false;
+        restartFromHead: for (;;) {
+            int hops = MAX_HOPS;
+            // c will be CASed to collapse intervening dead nodes between
+            // pred (or head if null) and p.
+            for (Node p = head, c = p, pred = null, q; p != null; p = q) {
+                q = p.next;
+                final Object item; boolean pAlive;
+                if (pAlive = ((item = p.item) != null && p.isData)) {
+                    if (filter.test((E) item)) {
+                        if (p.tryMatch(item, null))
+                            removed = true;
+                        pAlive = false;
+                    }
+                }
+                else if (!p.isData && item == null)
+                    break;
+                if (pAlive || q == null || --hops == 0) {
+                    // p might already be self-linked here, but if so:
+                    // - CASing head will surely fail
+                    // - CASing pred's next will be useless but harmless.
+                    if ((c != p && !tryCasSuccessor(pred, c, c = p))
+                        || pAlive) {
+                        // if CAS failed or alive, abandon old pred
+                        hops = MAX_HOPS;
+                        pred = p;
+                        c = q;
+                    }
+                } else if (p == q)
+                    continue restartFromHead;
+            }
+            return removed;
         }
     }
 
-    // Unsafe mechanics
+    /**
+     * Runs action on each element found during a traversal starting at p.
+     * If p is null, the action is not run.
+     */
+    @SuppressWarnings("unchecked")
+    void forEachFrom(Consumer<? super E> action, Node p) {
+        for (Node pred = null; p != null; ) {
+            Node q = p.next;
+            final Object item;
+            if ((item = p.item) != null) {
+                if (p.isData) {
+                    action.accept((E) item);
+                    pred = p; p = q; continue;
+                }
+            }
+            else if (!p.isData)
+                break;
+            for (Node c = p;; q = p.next) {
+                if (q == null || !q.isMatched()) {
+                    pred = skipDeadNodes(pred, c, p, q); p = q; break;
+                }
+                if (p == (p = q)) { pred = null; p = head; break; }
+            }
+        }
+    }
 
-    private static final sun.misc.Unsafe U = sun.misc.Unsafe.getUnsafe();
-    private static final long HEAD;
-    private static final long TAIL;
-    private static final long SWEEPVOTES;
+    /**
+     * @throws NullPointerException {@inheritDoc}
+     */
+    public void forEach(Consumer<? super E> action) {
+        Objects.requireNonNull(action);
+        forEachFrom(action, head);
+    }
+
+    // VarHandle mechanics
+    private static final VarHandle HEAD;
+    private static final VarHandle TAIL;
+    private static final VarHandle SWEEPVOTES;
+    static final VarHandle ITEM;
+    static final VarHandle NEXT;
+    static final VarHandle WAITER;
     static {
         try {
-            HEAD = U.objectFieldOffset
-                (LinkedTransferQueue.class.getDeclaredField("head"));
-            TAIL = U.objectFieldOffset
-                (LinkedTransferQueue.class.getDeclaredField("tail"));
-            SWEEPVOTES = U.objectFieldOffset
-                (LinkedTransferQueue.class.getDeclaredField("sweepVotes"));
+            MethodHandles.Lookup l = MethodHandles.lookup();
+            HEAD = l.findVarHandle(LinkedTransferQueue.class, "head",
+                                   Node.class);
+            TAIL = l.findVarHandle(LinkedTransferQueue.class, "tail",
+                                   Node.class);
+            SWEEPVOTES = l.findVarHandle(LinkedTransferQueue.class, "sweepVotes",
+                                         int.class);
+            ITEM = l.findVarHandle(Node.class, "item", Object.class);
+            NEXT = l.findVarHandle(Node.class, "next", Node.class);
+            WAITER = l.findVarHandle(Node.class, "waiter", Thread.class);
         } catch (ReflectiveOperationException e) {
-            throw new Error(e);
+            throw new ExceptionInInitializerError(e);
         }
 
         // Reduce the risk of rare disastrous classloading in first call to
diff --git a/ojluni/src/main/java/java/util/concurrent/Phaser.java b/ojluni/src/main/java/java/util/concurrent/Phaser.java
index 9ef9936..d878e45 100644
--- a/ojluni/src/main/java/java/util/concurrent/Phaser.java
+++ b/ojluni/src/main/java/java/util/concurrent/Phaser.java
@@ -35,14 +35,15 @@
 
 package java.util.concurrent;
 
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.VarHandle;
 import java.util.concurrent.atomic.AtomicReference;
 import java.util.concurrent.locks.LockSupport;
 
 /**
  * A reusable synchronization barrier, similar in functionality to
- * {@link java.util.concurrent.CyclicBarrier CyclicBarrier} and
- * {@link java.util.concurrent.CountDownLatch CountDownLatch}
- * but supporting more flexible usage.
+ * {@link CyclicBarrier} and {@link CountDownLatch} but supporting
+ * more flexible usage.
  *
  * <p><b>Registration.</b> Unlike the case for other barriers, the
  * number of parties <em>registered</em> to synchronize on a phaser
@@ -152,49 +153,46 @@
  * <p>A {@code Phaser} may be used instead of a {@code CountDownLatch}
  * to control a one-shot action serving a variable number of parties.
  * The typical idiom is for the method setting this up to first
- * register, then start the actions, then deregister, as in:
+ * register, then start all the actions, then deregister, as in:
  *
  * <pre> {@code
  * void runTasks(List<Runnable> tasks) {
- *   final Phaser phaser = new Phaser(1); // "1" to register self
+ *   Phaser startingGate = new Phaser(1); // "1" to register self
  *   // create and start threads
- *   for (final Runnable task : tasks) {
- *     phaser.register();
- *     new Thread() {
- *       public void run() {
- *         phaser.arriveAndAwaitAdvance(); // await all creation
- *         task.run();
- *       }
- *     }.start();
+ *   for (Runnable task : tasks) {
+ *     startingGate.register();
+ *     new Thread(() -> {
+ *       startingGate.arriveAndAwaitAdvance();
+ *       task.run();
+ *     }).start();
  *   }
  *
- *   // allow threads to start and deregister self
- *   phaser.arriveAndDeregister();
+ *   // deregister self to allow threads to proceed
+ *   startingGate.arriveAndDeregister();
  * }}</pre>
  *
  * <p>One way to cause a set of threads to repeatedly perform actions
  * for a given number of iterations is to override {@code onAdvance}:
  *
  * <pre> {@code
- * void startTasks(List<Runnable> tasks, final int iterations) {
- *   final Phaser phaser = new Phaser() {
+ * void startTasks(List<Runnable> tasks, int iterations) {
+ *   Phaser phaser = new Phaser() {
  *     protected boolean onAdvance(int phase, int registeredParties) {
- *       return phase >= iterations || registeredParties == 0;
+ *       return phase >= iterations - 1 || registeredParties == 0;
  *     }
  *   };
  *   phaser.register();
- *   for (final Runnable task : tasks) {
+ *   for (Runnable task : tasks) {
  *     phaser.register();
- *     new Thread() {
- *       public void run() {
- *         do {
- *           task.run();
- *           phaser.arriveAndAwaitAdvance();
- *         } while (!phaser.isTerminated());
- *       }
- *     }.start();
+ *     new Thread(() -> {
+ *       do {
+ *         task.run();
+ *         phaser.arriveAndAwaitAdvance();
+ *       } while (!phaser.isTerminated());
+ *     }).start();
  *   }
- *   phaser.arriveAndDeregister(); // deregister self, don't wait
+ *   // allow threads to proceed; don't wait for them
+ *   phaser.arriveAndDeregister();
  * }}</pre>
  *
  * If the main task must later await termination, it
@@ -221,7 +219,6 @@
  *   phaser.arriveAndDeregister();
  * }}</pre>
  *
- *
  * <p>To create a set of {@code n} tasks using a tree of phasers, you
  * could use code of the following form, assuming a Task class with a
  * constructor accepting a {@code Phaser} that it registers with upon
@@ -348,10 +345,6 @@
     private final AtomicReference<QNode> evenQ;
     private final AtomicReference<QNode> oddQ;
 
-    private AtomicReference<QNode> queueFor(int phase) {
-        return ((phase & 1) == 0) ? evenQ : oddQ;
-    }
-
     /**
      * Returns message string for bounds exceptions on arrival.
      */
@@ -388,7 +381,7 @@
             int unarrived = (counts == EMPTY) ? 0 : (counts & UNARRIVED_MASK);
             if (unarrived <= 0)
                 throw new IllegalStateException(badArrive(s));
-            if (U.compareAndSwapLong(this, STATE, s, s-=adjust)) {
+            if (STATE.compareAndSet(this, s, s-=adjust)) {
                 if (unarrived == 1) {
                     long n = s & PARTIES_MASK;  // base of next state
                     int nextUnarrived = (int)n >>> PARTIES_SHIFT;
@@ -401,12 +394,12 @@
                             n |= nextUnarrived;
                         int nextPhase = (phase + 1) & MAX_PHASE;
                         n |= (long)nextPhase << PHASE_SHIFT;
-                        U.compareAndSwapLong(this, STATE, s, n);
+                        STATE.compareAndSet(this, s, n);
                         releaseWaiters(phase);
                     }
                     else if (nextUnarrived == 0) { // propagate deregistration
                         phase = parent.doArrive(ONE_DEREGISTER);
-                        U.compareAndSwapLong(this, STATE, s, s | EMPTY);
+                        STATE.compareAndSet(this, s, s | EMPTY);
                     }
                     else
                         phase = parent.doArrive(ONE_ARRIVAL);
@@ -441,13 +434,13 @@
                 if (parent == null || reconcileState() == s) {
                     if (unarrived == 0)             // wait out advance
                         root.internalAwaitAdvance(phase, null);
-                    else if (U.compareAndSwapLong(this, STATE, s, s + adjust))
+                    else if (STATE.compareAndSet(this, s, s + adjust))
                         break;
                 }
             }
             else if (parent == null) {              // 1st root registration
                 long next = ((long)phase << PHASE_SHIFT) | adjust;
-                if (U.compareAndSwapLong(this, STATE, s, next))
+                if (STATE.compareAndSet(this, s, next))
                     break;
             }
             else {
@@ -459,8 +452,8 @@
                         // finish registration whenever parent registration
                         // succeeded, even when racing with termination,
                         // since these are part of the same "transaction".
-                        while (!U.compareAndSwapLong
-                               (this, STATE, s,
+                        while (!STATE.weakCompareAndSet
+                               (this, s,
                                 ((long)phase << PHASE_SHIFT) | adjust)) {
                             s = state;
                             phase = (int)(root.state >>> PHASE_SHIFT);
@@ -491,8 +484,8 @@
             // CAS to root phase with current parties, tripping unarrived
             while ((phase = (int)(root.state >>> PHASE_SHIFT)) !=
                    (int)(s >>> PHASE_SHIFT) &&
-                   !U.compareAndSwapLong
-                   (this, STATE, s,
+                   !STATE.weakCompareAndSet
+                   (this, s,
                     s = (((long)phase << PHASE_SHIFT) |
                          ((phase < 0) ? (s & COUNTS_MASK) :
                           (((p = (int)s >>> PARTIES_SHIFT) == 0) ? EMPTY :
@@ -681,7 +674,7 @@
             int unarrived = (counts == EMPTY) ? 0 : (counts & UNARRIVED_MASK);
             if (unarrived <= 0)
                 throw new IllegalStateException(badArrive(s));
-            if (U.compareAndSwapLong(this, STATE, s, s -= ONE_ARRIVAL)) {
+            if (STATE.compareAndSet(this, s, s -= ONE_ARRIVAL)) {
                 if (unarrived > 1)
                     return root.internalAwaitAdvance(phase, null);
                 if (root != this)
@@ -696,7 +689,7 @@
                     n |= nextUnarrived;
                 int nextPhase = (phase + 1) & MAX_PHASE;
                 n |= (long)nextPhase << PHASE_SHIFT;
-                if (!U.compareAndSwapLong(this, STATE, s, n))
+                if (!STATE.compareAndSet(this, s, n))
                     return (int)(state >>> PHASE_SHIFT); // terminated
                 releaseWaiters(phase);
                 return nextPhase;
@@ -812,7 +805,7 @@
         final Phaser root = this.root;
         long s;
         while ((s = root.state) >= 0) {
-            if (U.compareAndSwapLong(root, STATE, s, s | TERMINATION_BIT)) {
+            if (STATE.compareAndSet(root, s, s | TERMINATION_BIT)) {
                 // signal all threads
                 releaseWaiters(0); // Waiters on evenQ
                 releaseWaiters(1); // Waiters on oddQ
@@ -1047,6 +1040,8 @@
                     node = new QNode(this, phase, false, false, 0L);
                     node.wasInterrupted = interrupted;
                 }
+                else
+                    Thread.onSpinWait();
             }
             else if (node.isReleasable()) // done or aborted
                 break;
@@ -1135,16 +1130,14 @@
         }
     }
 
-    // Unsafe mechanics
-
-    private static final sun.misc.Unsafe U = sun.misc.Unsafe.getUnsafe();
-    private static final long STATE;
+    // VarHandle mechanics
+    private static final VarHandle STATE;
     static {
         try {
-            STATE = U.objectFieldOffset
-                (Phaser.class.getDeclaredField("state"));
+            MethodHandles.Lookup l = MethodHandles.lookup();
+            STATE = l.findVarHandle(Phaser.class, "state", long.class);
         } catch (ReflectiveOperationException e) {
-            throw new Error(e);
+            throw new ExceptionInInitializerError(e);
         }
 
         // Reduce the risk of rare disastrous classloading in first call to
diff --git a/ojluni/src/main/java/java/util/concurrent/PriorityBlockingQueue.java b/ojluni/src/main/java/java/util/concurrent/PriorityBlockingQueue.java
index 644de86..03a17e5 100644
--- a/ojluni/src/main/java/java/util/concurrent/PriorityBlockingQueue.java
+++ b/ojluni/src/main/java/java/util/concurrent/PriorityBlockingQueue.java
@@ -35,12 +35,15 @@
 
 package java.util.concurrent;
 
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.VarHandle;
 import java.util.AbstractQueue;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Comparator;
 import java.util.Iterator;
 import java.util.NoSuchElementException;
+import java.util.Objects;
 import java.util.PriorityQueue;
 import java.util.Queue;
 import java.util.SortedSet;
@@ -48,10 +51,8 @@
 import java.util.concurrent.locks.Condition;
 import java.util.concurrent.locks.ReentrantLock;
 import java.util.function.Consumer;
-
-// BEGIN android-note
-// removed link to collections framework docs
-// END android-note
+import java.util.function.Predicate;
+import jdk.internal.misc.SharedSecrets;
 
 /**
  * An unbounded {@linkplain BlockingQueue blocking queue} that uses
@@ -64,15 +65,15 @@
  * non-comparable objects (doing so results in
  * {@code ClassCastException}).
  *
- * <p>This class and its iterator implement all of the
- * <em>optional</em> methods of the {@link Collection} and {@link
- * Iterator} interfaces.  The Iterator provided in method {@link
- * #iterator()} is <em>not</em> guaranteed to traverse the elements of
- * the PriorityBlockingQueue in any particular order. If you need
- * ordered traversal, consider using
- * {@code Arrays.sort(pq.toArray())}.  Also, method {@code drainTo}
- * can be used to <em>remove</em> some or all elements in priority
- * order and place them in another collection.
+ * <p>This class and its iterator implement all of the <em>optional</em>
+ * methods of the {@link Collection} and {@link Iterator} interfaces.
+ * The Iterator provided in method {@link #iterator()} and the
+ * Spliterator provided in method {@link #spliterator()} are <em>not</em>
+ * guaranteed to traverse the elements of the PriorityBlockingQueue in
+ * any particular order. If you need ordered traversal, consider using
+ * {@code Arrays.sort(pq.toArray())}.  Also, method {@code drainTo} can
+ * be used to <em>remove</em> some or all elements in priority order and
+ * place them in another collection.
  *
  * <p>Operations on this class make no guarantees about the ordering
  * of elements with equal priority. If you need to enforce an
@@ -101,6 +102,10 @@
  *   }
  * }}</pre>
  *
+ * <p>This class is a member of the
+ * <a href="{@docRoot}/java.base/java/util/package-summary.html#CollectionsFramework">
+ * Java Collections Framework</a>.
+ *
  * @since 1.5
  * @author Doug Lea
  * @param <E> the type of elements held in this queue
@@ -163,12 +168,12 @@
     /**
      * Lock used for all public operations.
      */
-    private final ReentrantLock lock;
+    private final ReentrantLock lock = new ReentrantLock();
 
     /**
      * Condition for blocking when empty.
      */
-    private final Condition notEmpty;
+    private final Condition notEmpty = lock.newCondition();
 
     /**
      * Spinlock for allocation, acquired via CAS.
@@ -220,10 +225,8 @@
                                  Comparator<? super E> comparator) {
         if (initialCapacity < 1)
             throw new IllegalArgumentException();
-        this.lock = new ReentrantLock();
-        this.notEmpty = lock.newCondition();
         this.comparator = comparator;
-        this.queue = new Object[initialCapacity];
+        this.queue = new Object[Math.max(1, initialCapacity)];
     }
 
     /**
@@ -243,8 +246,6 @@
      *         of its elements are null
      */
     public PriorityBlockingQueue(Collection<? extends E> c) {
-        this.lock = new ReentrantLock();
-        this.notEmpty = lock.newCondition();
         boolean heapify = true; // true if not known to be in heap order
         boolean screen = true;  // true if must screen for nulls
         if (c instanceof SortedSet<?>) {
@@ -260,22 +261,27 @@
             if (pq.getClass() == PriorityBlockingQueue.class) // exact match
                 heapify = false;
         }
-        Object[] a = c.toArray();
-        int n = a.length;
+        Object[] es = c.toArray();
+        int n = es.length;
         // If c.toArray incorrectly doesn't return Object[], copy it.
-        if (a.getClass() != Object[].class)
-            a = Arrays.copyOf(a, n, Object[].class);
+        if (es.getClass() != Object[].class)
+            es = Arrays.copyOf(es, n, Object[].class);
         if (screen && (n == 1 || this.comparator != null)) {
-            for (int i = 0; i < n; ++i)
-                if (a[i] == null)
+            for (Object e : es)
+                if (e == null)
                     throw new NullPointerException();
         }
-        this.queue = a;
+        this.queue = ensureNonEmpty(es);
         this.size = n;
         if (heapify)
             heapify();
     }
 
+    /** Ensures that queue[0] exists, helping peek() and poll(). */
+    private static Object[] ensureNonEmpty(Object[] es) {
+        return (es.length > 0) ? es : new Object[1];
+    }
+
     /**
      * Tries to grow array to accommodate at least one more element
      * (but normally expand by about 50%), giving up (allowing retry)
@@ -289,7 +295,7 @@
         lock.unlock(); // must release and then re-acquire main lock
         Object[] newArray = null;
         if (allocationSpinLock == 0 &&
-            U.compareAndSwapInt(this, ALLOCATIONSPINLOCK, 0, 1)) {
+            ALLOCATIONSPINLOCK.compareAndSet(this, 0, 1)) {
             try {
                 int newCap = oldCap + ((oldCap < 64) ?
                                        (oldCap + 2) : // grow faster if small
@@ -319,22 +325,23 @@
      * Mechanics for poll().  Call only while holding lock.
      */
     private E dequeue() {
-        int n = size - 1;
-        if (n < 0)
-            return null;
-        else {
-            Object[] array = queue;
-            E result = (E) array[0];
-            E x = (E) array[n];
-            array[n] = null;
-            Comparator<? super E> cmp = comparator;
-            if (cmp == null)
-                siftDownComparable(0, x, array, n);
-            else
-                siftDownUsingComparator(0, x, array, n, cmp);
-            size = n;
-            return result;
+        // assert lock.isHeldByCurrentThread();
+        final Object[] es;
+        final E result;
+
+        if ((result = (E) ((es = queue)[0])) != null) {
+            final int n;
+            final E x = (E) es[(n = --size)];
+            es[n] = null;
+            if (n > 0) {
+                final Comparator<? super E> cmp;
+                if ((cmp = comparator) == null)
+                    siftDownComparable(0, x, es, n);
+                else
+                    siftDownUsingComparator(0, x, es, n, cmp);
+            }
         }
+        return result;
     }
 
     /**
@@ -342,40 +349,38 @@
      * promoting x up the tree until it is greater than or equal to
      * its parent, or is the root.
      *
-     * To simplify and speed up coercions and comparisons. the
+     * To simplify and speed up coercions and comparisons, the
      * Comparable and Comparator versions are separated into different
      * methods that are otherwise identical. (Similarly for siftDown.)
-     * These methods are static, with heap state as arguments, to
-     * simplify use in light of possible comparator exceptions.
      *
      * @param k the position to fill
      * @param x the item to insert
-     * @param array the heap array
+     * @param es the heap array
      */
-    private static <T> void siftUpComparable(int k, T x, Object[] array) {
+    private static <T> void siftUpComparable(int k, T x, Object[] es) {
         Comparable<? super T> key = (Comparable<? super T>) x;
         while (k > 0) {
             int parent = (k - 1) >>> 1;
-            Object e = array[parent];
+            Object e = es[parent];
             if (key.compareTo((T) e) >= 0)
                 break;
-            array[k] = e;
+            es[k] = e;
             k = parent;
         }
-        array[k] = key;
+        es[k] = key;
     }
 
-    private static <T> void siftUpUsingComparator(int k, T x, Object[] array,
-                                       Comparator<? super T> cmp) {
+    private static <T> void siftUpUsingComparator(
+        int k, T x, Object[] es, Comparator<? super T> cmp) {
         while (k > 0) {
             int parent = (k - 1) >>> 1;
-            Object e = array[parent];
+            Object e = es[parent];
             if (cmp.compare(x, (T) e) >= 0)
                 break;
-            array[k] = e;
+            es[k] = e;
             k = parent;
         }
-        array[k] = x;
+        es[k] = x;
     }
 
     /**
@@ -385,67 +390,61 @@
      *
      * @param k the position to fill
      * @param x the item to insert
-     * @param array the heap array
+     * @param es the heap array
      * @param n heap size
      */
-    private static <T> void siftDownComparable(int k, T x, Object[] array,
-                                               int n) {
-        if (n > 0) {
-            Comparable<? super T> key = (Comparable<? super T>)x;
-            int half = n >>> 1;           // loop while a non-leaf
-            while (k < half) {
-                int child = (k << 1) + 1; // assume left child is least
-                Object c = array[child];
-                int right = child + 1;
-                if (right < n &&
-                    ((Comparable<? super T>) c).compareTo((T) array[right]) > 0)
-                    c = array[child = right];
-                if (key.compareTo((T) c) <= 0)
-                    break;
-                array[k] = c;
-                k = child;
-            }
-            array[k] = key;
+    private static <T> void siftDownComparable(int k, T x, Object[] es, int n) {
+        // assert n > 0;
+        Comparable<? super T> key = (Comparable<? super T>)x;
+        int half = n >>> 1;           // loop while a non-leaf
+        while (k < half) {
+            int child = (k << 1) + 1; // assume left child is least
+            Object c = es[child];
+            int right = child + 1;
+            if (right < n &&
+                ((Comparable<? super T>) c).compareTo((T) es[right]) > 0)
+                c = es[child = right];
+            if (key.compareTo((T) c) <= 0)
+                break;
+            es[k] = c;
+            k = child;
         }
+        es[k] = key;
     }
 
-    private static <T> void siftDownUsingComparator(int k, T x, Object[] array,
-                                                    int n,
-                                                    Comparator<? super T> cmp) {
-        if (n > 0) {
-            int half = n >>> 1;
-            while (k < half) {
-                int child = (k << 1) + 1;
-                Object c = array[child];
-                int right = child + 1;
-                if (right < n && cmp.compare((T) c, (T) array[right]) > 0)
-                    c = array[child = right];
-                if (cmp.compare(x, (T) c) <= 0)
-                    break;
-                array[k] = c;
-                k = child;
-            }
-            array[k] = x;
+    private static <T> void siftDownUsingComparator(
+        int k, T x, Object[] es, int n, Comparator<? super T> cmp) {
+        // assert n > 0;
+        int half = n >>> 1;
+        while (k < half) {
+            int child = (k << 1) + 1;
+            Object c = es[child];
+            int right = child + 1;
+            if (right < n && cmp.compare((T) c, (T) es[right]) > 0)
+                c = es[child = right];
+            if (cmp.compare(x, (T) c) <= 0)
+                break;
+            es[k] = c;
+            k = child;
         }
+        es[k] = x;
     }
 
     /**
      * Establishes the heap invariant (described above) in the entire tree,
      * assuming nothing about the order of the elements prior to the call.
+     * This classic algorithm due to Floyd (1964) is known to be O(size).
      */
     private void heapify() {
-        Object[] array = queue;
-        int n = size;
-        int half = (n >>> 1) - 1;
-        Comparator<? super E> cmp = comparator;
-        if (cmp == null) {
-            for (int i = half; i >= 0; i--)
-                siftDownComparable(i, (E) array[i], array, n);
-        }
-        else {
-            for (int i = half; i >= 0; i--)
-                siftDownUsingComparator(i, (E) array[i], array, n, cmp);
-        }
+        final Object[] es = queue;
+        int n = size, i = (n >>> 1) - 1;
+        final Comparator<? super E> cmp;
+        if ((cmp = comparator) == null)
+            for (; i >= 0; i--)
+                siftDownComparable(i, (E) es[i], es, n);
+        else
+            for (; i >= 0; i--)
+                siftDownUsingComparator(i, (E) es[i], es, n, cmp);
     }
 
     /**
@@ -479,15 +478,15 @@
         final ReentrantLock lock = this.lock;
         lock.lock();
         int n, cap;
-        Object[] array;
-        while ((n = size) >= (cap = (array = queue).length))
-            tryGrow(array, cap);
+        Object[] es;
+        while ((n = size) >= (cap = (es = queue).length))
+            tryGrow(es, cap);
         try {
-            Comparator<? super E> cmp = comparator;
-            if (cmp == null)
-                siftUpComparable(n, e, array);
+            final Comparator<? super E> cmp;
+            if ((cmp = comparator) == null)
+                siftUpComparable(n, e, es);
             else
-                siftUpUsingComparator(n, e, array, cmp);
+                siftUpUsingComparator(n, e, es, cmp);
             size = n + 1;
             notEmpty.signal();
         } finally {
@@ -570,7 +569,7 @@
         final ReentrantLock lock = this.lock;
         lock.lock();
         try {
-            return (size == 0) ? null : (E) queue[0];
+            return (E) queue[0];
         } finally {
             lock.unlock();
         }
@@ -610,10 +609,9 @@
 
     private int indexOf(Object o) {
         if (o != null) {
-            Object[] array = queue;
-            int n = size;
-            for (int i = 0; i < n; i++)
-                if (o.equals(array[i]))
+            final Object[] es = queue;
+            for (int i = 0, n = size; i < n; i++)
+                if (o.equals(es[i]))
                     return i;
         }
         return -1;
@@ -623,23 +621,23 @@
      * Removes the ith element from queue.
      */
     private void removeAt(int i) {
-        Object[] array = queue;
-        int n = size - 1;
+        final Object[] es = queue;
+        final int n = size - 1;
         if (n == i) // removed last element
-            array[i] = null;
+            es[i] = null;
         else {
-            E moved = (E) array[n];
-            array[n] = null;
-            Comparator<? super E> cmp = comparator;
-            if (cmp == null)
-                siftDownComparable(i, moved, array, n);
+            E moved = (E) es[n];
+            es[n] = null;
+            final Comparator<? super E> cmp;
+            if ((cmp = comparator) == null)
+                siftDownComparable(i, moved, es, n);
             else
-                siftDownUsingComparator(i, moved, array, n, cmp);
-            if (array[i] == moved) {
+                siftDownUsingComparator(i, moved, es, n, cmp);
+            if (es[i] == moved) {
                 if (cmp == null)
-                    siftUpComparable(i, moved, array);
+                    siftUpComparable(i, moved, es);
                 else
-                    siftUpUsingComparator(i, moved, array, cmp);
+                    siftUpUsingComparator(i, moved, es, cmp);
             }
         }
         size = n;
@@ -672,14 +670,16 @@
 
     /**
      * Identity-based version for use in Itr.remove.
+     *
+     * @param o element to be removed from this queue, if present
      */
-    void removeEQ(Object o) {
+    void removeEq(Object o) {
         final ReentrantLock lock = this.lock;
         lock.lock();
         try {
-            Object[] array = queue;
+            final Object[] es = queue;
             for (int i = 0, n = size; i < n; i++) {
-                if (o == array[i]) {
+                if (o == es[i]) {
                     removeAt(i);
                     break;
                 }
@@ -728,8 +728,7 @@
      * @throws IllegalArgumentException      {@inheritDoc}
      */
     public int drainTo(Collection<? super E> c, int maxElements) {
-        if (c == null)
-            throw new NullPointerException();
+        Objects.requireNonNull(c);
         if (c == this)
             throw new IllegalArgumentException();
         if (maxElements <= 0)
@@ -756,11 +755,10 @@
         final ReentrantLock lock = this.lock;
         lock.lock();
         try {
-            Object[] array = queue;
-            int n = size;
+            final Object[] es = queue;
+            for (int i = 0, n = size; i < n; i++)
+                es[i] = null;
             size = 0;
-            for (int i = 0; i < n; i++)
-                array[i] = null;
         } finally {
             lock.unlock();
         }
@@ -861,10 +859,9 @@
     final class Itr implements Iterator<E> {
         final Object[] array; // Array of all elements
         int cursor;           // index of next element to return
-        int lastRet;          // index of last element, or -1 if no such
+        int lastRet = -1;     // index of last element, or -1 if no such
 
         Itr(Object[] array) {
-            lastRet = -1;
             this.array = array;
         }
 
@@ -875,16 +872,28 @@
         public E next() {
             if (cursor >= array.length)
                 throw new NoSuchElementException();
-            lastRet = cursor;
-            return (E)array[cursor++];
+            return (E)array[lastRet = cursor++];
         }
 
         public void remove() {
             if (lastRet < 0)
                 throw new IllegalStateException();
-            removeEQ(array[lastRet]);
+            removeEq(array[lastRet]);
             lastRet = -1;
         }
+
+        public void forEachRemaining(Consumer<? super E> action) {
+            Objects.requireNonNull(action);
+            final Object[] es = array;
+            int i;
+            if ((i = cursor) < es.length) {
+                lastRet = -1;
+                cursor = es.length;
+                for (; i < es.length; i++)
+                    action.accept((E) es[i]);
+                lastRet = es.length - 1;
+            }
+        }
     }
 
     /**
@@ -922,7 +931,9 @@
         throws java.io.IOException, ClassNotFoundException {
         try {
             s.defaultReadObject();
-            this.queue = new Object[q.size()];
+            int sz = q.size();
+            SharedSecrets.getJavaObjectInputStreamAccess().checkArray(s, Object[].class, sz);
+            this.queue = new Object[Math.max(1, sz)];
             comparator = q.comparator();
             addAll(q);
         } finally {
@@ -930,68 +941,65 @@
         }
     }
 
-    // Similar to Collections.ArraySnapshotSpliterator but avoids
-    // commitment to toArray until needed
-    static final class PBQSpliterator<E> implements Spliterator<E> {
-        final PriorityBlockingQueue<E> queue;
-        Object[] array;
+    /**
+     * Immutable snapshot spliterator that binds to elements "late".
+     */
+    final class PBQSpliterator implements Spliterator<E> {
+        Object[] array;        // null until late-bound-initialized
         int index;
         int fence;
 
-        PBQSpliterator(PriorityBlockingQueue<E> queue, Object[] array,
-                       int index, int fence) {
-            this.queue = queue;
+        PBQSpliterator() {}
+
+        PBQSpliterator(Object[] array, int index, int fence) {
             this.array = array;
             this.index = index;
             this.fence = fence;
         }
 
-        final int getFence() {
-            int hi;
-            if ((hi = fence) < 0)
-                hi = fence = (array = queue.toArray()).length;
-            return hi;
+        private int getFence() {
+            if (array == null)
+                fence = (array = toArray()).length;
+            return fence;
         }
 
-        public PBQSpliterator<E> trySplit() {
+        public PBQSpliterator trySplit() {
             int hi = getFence(), lo = index, mid = (lo + hi) >>> 1;
             return (lo >= mid) ? null :
-                new PBQSpliterator<E>(queue, array, lo, index = mid);
+                new PBQSpliterator(array, lo, index = mid);
         }
 
-        @SuppressWarnings("unchecked")
         public void forEachRemaining(Consumer<? super E> action) {
-            Object[] a; int i, hi; // hoist accesses and checks from loop
-            if (action == null)
-                throw new NullPointerException();
-            if ((a = array) == null)
-                fence = (a = queue.toArray()).length;
-            if ((hi = fence) <= a.length &&
-                (i = index) >= 0 && i < (index = hi)) {
-                do { action.accept((E)a[i]); } while (++i < hi);
-            }
+            Objects.requireNonNull(action);
+            final int hi = getFence(), lo = index;
+            final Object[] es = array;
+            index = hi;                 // ensure exhaustion
+            for (int i = lo; i < hi; i++)
+                action.accept((E) es[i]);
         }
 
         public boolean tryAdvance(Consumer<? super E> action) {
-            if (action == null)
-                throw new NullPointerException();
+            Objects.requireNonNull(action);
             if (getFence() > index && index >= 0) {
-                @SuppressWarnings("unchecked") E e = (E) array[index++];
-                action.accept(e);
+                action.accept((E) array[index++]);
                 return true;
             }
             return false;
         }
 
-        public long estimateSize() { return (long)(getFence() - index); }
+        public long estimateSize() { return getFence() - index; }
 
         public int characteristics() {
-            return Spliterator.NONNULL | Spliterator.SIZED | Spliterator.SUBSIZED;
+            return (Spliterator.NONNULL |
+                    Spliterator.SIZED |
+                    Spliterator.SUBSIZED);
         }
     }
 
     /**
      * Returns a {@link Spliterator} over the elements in this queue.
+     * The spliterator does not traverse elements in any particular order
+     * (the {@link Spliterator#ORDERED ORDERED} characteristic is not reported).
      *
      * <p>The returned spliterator is
      * <a href="package-summary.html#Weakly"><i>weakly consistent</i></a>.
@@ -1006,18 +1014,106 @@
      * @since 1.8
      */
     public Spliterator<E> spliterator() {
-        return new PBQSpliterator<E>(this, null, 0, -1);
+        return new PBQSpliterator();
     }
 
-    // Unsafe mechanics
-    private static final sun.misc.Unsafe U = sun.misc.Unsafe.getUnsafe();
-    private static final long ALLOCATIONSPINLOCK;
+    /**
+     * @throws NullPointerException {@inheritDoc}
+     */
+    public boolean removeIf(Predicate<? super E> filter) {
+        Objects.requireNonNull(filter);
+        return bulkRemove(filter);
+    }
+
+    /**
+     * @throws NullPointerException {@inheritDoc}
+     */
+    public boolean removeAll(Collection<?> c) {
+        Objects.requireNonNull(c);
+        return bulkRemove(e -> c.contains(e));
+    }
+
+    /**
+     * @throws NullPointerException {@inheritDoc}
+     */
+    public boolean retainAll(Collection<?> c) {
+        Objects.requireNonNull(c);
+        return bulkRemove(e -> !c.contains(e));
+    }
+
+    // A tiny bit set implementation
+
+    private static long[] nBits(int n) {
+        return new long[((n - 1) >> 6) + 1];
+    }
+    private static void setBit(long[] bits, int i) {
+        bits[i >> 6] |= 1L << i;
+    }
+    private static boolean isClear(long[] bits, int i) {
+        return (bits[i >> 6] & (1L << i)) == 0;
+    }
+
+    /** Implementation of bulk remove methods. */
+    private boolean bulkRemove(Predicate<? super E> filter) {
+        final ReentrantLock lock = this.lock;
+        lock.lock();
+        try {
+            final Object[] es = queue;
+            final int end = size;
+            int i;
+            // Optimize for initial run of survivors
+            for (i = 0; i < end && !filter.test((E) es[i]); i++)
+                ;
+            if (i >= end)
+                return false;
+            // Tolerate predicates that reentrantly access the
+            // collection for read, so traverse once to find elements
+            // to delete, a second pass to physically expunge.
+            final int beg = i;
+            final long[] deathRow = nBits(end - beg);
+            deathRow[0] = 1L;   // set bit 0
+            for (i = beg + 1; i < end; i++)
+                if (filter.test((E) es[i]))
+                    setBit(deathRow, i - beg);
+            int w = beg;
+            for (i = beg; i < end; i++)
+                if (isClear(deathRow, i - beg))
+                    es[w++] = es[i];
+            for (i = size = w; i < end; i++)
+                es[i] = null;
+            heapify();
+            return true;
+        } finally {
+            lock.unlock();
+        }
+    }
+
+    /**
+     * @throws NullPointerException {@inheritDoc}
+     */
+    public void forEach(Consumer<? super E> action) {
+        Objects.requireNonNull(action);
+        final ReentrantLock lock = this.lock;
+        lock.lock();
+        try {
+            final Object[] es = queue;
+            for (int i = 0, n = size; i < n; i++)
+                action.accept((E) es[i]);
+        } finally {
+            lock.unlock();
+        }
+    }
+
+    // VarHandle mechanics
+    private static final VarHandle ALLOCATIONSPINLOCK;
     static {
         try {
-            ALLOCATIONSPINLOCK = U.objectFieldOffset
-                (PriorityBlockingQueue.class.getDeclaredField("allocationSpinLock"));
+            MethodHandles.Lookup l = MethodHandles.lookup();
+            ALLOCATIONSPINLOCK = l.findVarHandle(PriorityBlockingQueue.class,
+                                                 "allocationSpinLock",
+                                                 int.class);
         } catch (ReflectiveOperationException e) {
-            throw new Error(e);
+            throw new ExceptionInInitializerError(e);
         }
     }
 }
diff --git a/ojluni/src/main/java/java/util/concurrent/ScheduledExecutorService.java b/ojluni/src/main/java/java/util/concurrent/ScheduledExecutorService.java
index 6b9fcc3..be75ff4 100644
--- a/ojluni/src/main/java/java/util/concurrent/ScheduledExecutorService.java
+++ b/ojluni/src/main/java/java/util/concurrent/ScheduledExecutorService.java
@@ -77,14 +77,11 @@
  *     Executors.newScheduledThreadPool(1);
  *
  *   public void beepForAnHour() {
- *     final Runnable beeper = new Runnable() {
- *       public void run() { System.out.println("beep"); }
- *     };
- *     final ScheduledFuture&lt;?&lt; beeperHandle =
+ *     Runnable beeper = () -> System.out.println("beep");
+ *     ScheduledFuture<?> beeperHandle =
  *       scheduler.scheduleAtFixedRate(beeper, 10, 10, SECONDS);
- *     scheduler.schedule(new Runnable() {
- *       public void run() { beeperHandle.cancel(true); }
- *     }, 60 * 60, SECONDS);
+ *     Runnable canceller = () -> beeperHandle.cancel(false);
+ *     scheduler.schedule(canceller, 1, HOURS);
  *   }
  * }}</pre>
  *
@@ -94,8 +91,7 @@
 public interface ScheduledExecutorService extends ExecutorService {
 
     /**
-     * Creates and executes a one-shot action that becomes enabled
-     * after the given delay.
+     * Submits a one-shot task that becomes enabled after the given delay.
      *
      * @param command the task to execute
      * @param delay the time from now to delay execution
@@ -105,14 +101,14 @@
      *         {@code null} upon completion
      * @throws RejectedExecutionException if the task cannot be
      *         scheduled for execution
-     * @throws NullPointerException if command is null
+     * @throws NullPointerException if command or unit is null
      */
     public ScheduledFuture<?> schedule(Runnable command,
                                        long delay, TimeUnit unit);
 
     /**
-     * Creates and executes a ScheduledFuture that becomes enabled after the
-     * given delay.
+     * Submits a value-returning one-shot task that becomes enabled
+     * after the given delay.
      *
      * @param callable the function to execute
      * @param delay the time from now to delay execution
@@ -121,15 +117,15 @@
      * @return a ScheduledFuture that can be used to extract result or cancel
      * @throws RejectedExecutionException if the task cannot be
      *         scheduled for execution
-     * @throws NullPointerException if callable is null
+     * @throws NullPointerException if callable or unit is null
      */
     public <V> ScheduledFuture<V> schedule(Callable<V> callable,
                                            long delay, TimeUnit unit);
 
     /**
-     * Creates and executes a periodic action that becomes enabled first
-     * after the given initial delay, and subsequently with the given
-     * period; that is, executions will commence after
+     * Submits a periodic action that becomes enabled first after the
+     * given initial delay, and subsequently with the given period;
+     * that is, executions will commence after
      * {@code initialDelay}, then {@code initialDelay + period}, then
      * {@code initialDelay + 2 * period}, and so on.
      *
@@ -140,8 +136,8 @@
      * via the returned future.
      * <li>The executor terminates, also resulting in task cancellation.
      * <li>An execution of the task throws an exception.  In this case
-     * calling {@link Future#get() get} on the returned future will
-     * throw {@link ExecutionException}.
+     * calling {@link Future#get() get} on the returned future will throw
+     * {@link ExecutionException}, holding the exception as its cause.
      * </ul>
      * Subsequent executions are suppressed.  Subsequent calls to
      * {@link Future#isDone isDone()} on the returned future will
@@ -162,7 +158,7 @@
      *         abnormal termination of a task execution.
      * @throws RejectedExecutionException if the task cannot be
      *         scheduled for execution
-     * @throws NullPointerException if command is null
+     * @throws NullPointerException if command or unit is null
      * @throws IllegalArgumentException if period less than or equal to zero
      */
     public ScheduledFuture<?> scheduleAtFixedRate(Runnable command,
@@ -171,10 +167,10 @@
                                                   TimeUnit unit);
 
     /**
-     * Creates and executes a periodic action that becomes enabled first
-     * after the given initial delay, and subsequently with the
-     * given delay between the termination of one execution and the
-     * commencement of the next.
+     * Submits a periodic action that becomes enabled first after the
+     * given initial delay, and subsequently with the given delay
+     * between the termination of one execution and the commencement of
+     * the next.
      *
      * <p>The sequence of task executions continues indefinitely until
      * one of the following exceptional completions occur:
@@ -183,8 +179,8 @@
      * via the returned future.
      * <li>The executor terminates, also resulting in task cancellation.
      * <li>An execution of the task throws an exception.  In this case
-     * calling {@link Future#get() get} on the returned future will
-     * throw {@link ExecutionException}.
+     * calling {@link Future#get() get} on the returned future will throw
+     * {@link ExecutionException}, holding the exception as its cause.
      * </ul>
      * Subsequent executions are suppressed.  Subsequent calls to
      * {@link Future#isDone isDone()} on the returned future will
@@ -202,7 +198,7 @@
      *         abnormal termination of a task execution.
      * @throws RejectedExecutionException if the task cannot be
      *         scheduled for execution
-     * @throws NullPointerException if command is null
+     * @throws NullPointerException if command or unit is null
      * @throws IllegalArgumentException if delay less than or equal to zero
      */
     public ScheduledFuture<?> scheduleWithFixedDelay(Runnable command,
diff --git a/ojluni/src/main/java/java/util/concurrent/ScheduledThreadPoolExecutor.java b/ojluni/src/main/java/java/util/concurrent/ScheduledThreadPoolExecutor.java
index 4249872..6aba89a 100644
--- a/ojluni/src/main/java/java/util/concurrent/ScheduledThreadPoolExecutor.java
+++ b/ojluni/src/main/java/java/util/concurrent/ScheduledThreadPoolExecutor.java
@@ -44,6 +44,7 @@
 import java.util.Iterator;
 import java.util.List;
 import java.util.NoSuchElementException;
+import java.util.Objects;
 import java.util.concurrent.atomic.AtomicLong;
 import java.util.concurrent.locks.Condition;
 import java.util.concurrent.locks.ReentrantLock;
@@ -89,6 +90,11 @@
  * use {@code allowCoreThreadTimeOut} because this may leave the pool
  * without threads to handle tasks once they become eligible to run.
  *
+ * <p>As with {@code ThreadPoolExecutor}, if not otherwise specified,
+ * this class uses {@link Executors#defaultThreadFactory} as the
+ * default thread factory, and {@link ThreadPoolExecutor.AbortPolicy}
+ * as the default rejected execution handler.
+ *
  * <p><b>Extension notes:</b> This class overrides the
  * {@link ThreadPoolExecutor#execute(Runnable) execute} and
  * {@link AbstractExecutorService#submit(Runnable) submit}
@@ -163,7 +169,7 @@
     private volatile boolean continueExistingPeriodicTasksAfterShutdown;
 
     /**
-     * False if should cancel non-periodic tasks on shutdown.
+     * False if should cancel non-periodic not-yet-expired tasks on shutdown.
      */
     private volatile boolean executeExistingDelayedTasksAfterShutdown = true;
 
@@ -294,10 +300,9 @@
          * Overrides FutureTask version so as to reset/requeue if periodic.
          */
         public void run() {
-            boolean periodic = isPeriodic();
-            if (!canRunInCurrentRunState(periodic))
+            if (!canRunInCurrentRunState(this))
                 cancel(false);
-            else if (!periodic)
+            else if (!isPeriodic())
                 super.run();
             else if (super.runAndReset()) {
                 setNextRunTime();
@@ -307,15 +312,18 @@
     }
 
     /**
-     * Returns true if can run a task given current run state
-     * and run-after-shutdown parameters.
-     *
-     * @param periodic true if this task periodic, false if delayed
+     * Returns true if can run a task given current run state and
+     * run-after-shutdown parameters.
      */
-    boolean canRunInCurrentRunState(boolean periodic) {
-        return isRunningOrShutdown(periodic ?
-                                   continueExistingPeriodicTasksAfterShutdown :
-                                   executeExistingDelayedTasksAfterShutdown);
+    boolean canRunInCurrentRunState(RunnableScheduledFuture<?> task) {
+        if (!isShutdown())
+            return true;
+        if (isStopped())
+            return false;
+        return task.isPeriodic()
+            ? continueExistingPeriodicTasksAfterShutdown
+            : (executeExistingDelayedTasksAfterShutdown
+               || task.getDelay(NANOSECONDS) <= 0);
     }
 
     /**
@@ -334,9 +342,7 @@
             reject(task);
         else {
             super.getQueue().add(task);
-            if (isShutdown() &&
-                !canRunInCurrentRunState(task.isPeriodic()) &&
-                remove(task))
+            if (!canRunInCurrentRunState(task) && remove(task))
                 task.cancel(false);
             else
                 ensurePrestart();
@@ -350,13 +356,14 @@
      * @param task the task
      */
     void reExecutePeriodic(RunnableScheduledFuture<?> task) {
-        if (canRunInCurrentRunState(true)) {
+        if (canRunInCurrentRunState(task)) {
             super.getQueue().add(task);
-            if (!canRunInCurrentRunState(true) && remove(task))
-                task.cancel(false);
-            else
+            if (canRunInCurrentRunState(task) || !remove(task)) {
                 ensurePrestart();
+                return;
+            }
         }
+        task.cancel(false);
     }
 
     /**
@@ -369,23 +376,18 @@
             getExecuteExistingDelayedTasksAfterShutdownPolicy();
         boolean keepPeriodic =
             getContinueExistingPeriodicTasksAfterShutdownPolicy();
-        if (!keepDelayed && !keepPeriodic) {
-            for (Object e : q.toArray())
-                if (e instanceof RunnableScheduledFuture<?>)
-                    ((RunnableScheduledFuture<?>) e).cancel(false);
-            q.clear();
-        }
-        else {
-            // Traverse snapshot to avoid iterator exceptions
-            for (Object e : q.toArray()) {
-                if (e instanceof RunnableScheduledFuture) {
-                    RunnableScheduledFuture<?> t =
-                        (RunnableScheduledFuture<?>)e;
-                    if ((t.isPeriodic() ? !keepPeriodic : !keepDelayed) ||
-                        t.isCancelled()) { // also remove if already cancelled
-                        if (q.remove(t))
-                            t.cancel(false);
-                    }
+        // Traverse snapshot to avoid iterator exceptions
+        // TODO: implement and use efficient removeIf
+        // super.getQueue().removeIf(...);
+        for (Object e : q.toArray()) {
+            if (e instanceof RunnableScheduledFuture) {
+                RunnableScheduledFuture<?> t = (RunnableScheduledFuture<?>)e;
+                if ((t.isPeriodic()
+                     ? !keepPeriodic
+                     : (!keepDelayed && t.getDelay(NANOSECONDS) > 0))
+                    || t.isCancelled()) { // also remove if already cancelled
+                    if (q.remove(t))
+                        t.cancel(false);
                 }
             }
         }
@@ -581,6 +583,34 @@
     }
 
     /**
+     * Submits a periodic action that becomes enabled first after the
+     * given initial delay, and subsequently with the given period;
+     * that is, executions will commence after
+     * {@code initialDelay}, then {@code initialDelay + period}, then
+     * {@code initialDelay + 2 * period}, and so on.
+     *
+     * <p>The sequence of task executions continues indefinitely until
+     * one of the following exceptional completions occur:
+     * <ul>
+     * <li>The task is {@linkplain Future#cancel explicitly cancelled}
+     * via the returned future.
+     * <li>Method {@link #shutdown} is called and the {@linkplain
+     * #getContinueExistingPeriodicTasksAfterShutdownPolicy policy on
+     * whether to continue after shutdown} is not set true, or method
+     * {@link #shutdownNow} is called; also resulting in task
+     * cancellation.
+     * <li>An execution of the task throws an exception.  In this case
+     * calling {@link Future#get() get} on the returned future will throw
+     * {@link ExecutionException}, holding the exception as its cause.
+     * </ul>
+     * Subsequent executions are suppressed.  Subsequent calls to
+     * {@link Future#isDone isDone()} on the returned future will
+     * return {@code true}.
+     *
+     * <p>If any execution of this task takes longer than its period, then
+     * subsequent executions may start late, but will not concurrently
+     * execute.
+     *
      * @throws RejectedExecutionException {@inheritDoc}
      * @throws NullPointerException       {@inheritDoc}
      * @throws IllegalArgumentException   {@inheritDoc}
@@ -606,6 +636,29 @@
     }
 
     /**
+     * Submits a periodic action that becomes enabled first after the
+     * given initial delay, and subsequently with the given delay
+     * between the termination of one execution and the commencement of
+     * the next.
+     *
+     * <p>The sequence of task executions continues indefinitely until
+     * one of the following exceptional completions occur:
+     * <ul>
+     * <li>The task is {@linkplain Future#cancel explicitly cancelled}
+     * via the returned future.
+     * <li>Method {@link #shutdown} is called and the {@linkplain
+     * #getContinueExistingPeriodicTasksAfterShutdownPolicy policy on
+     * whether to continue after shutdown} is not set true, or method
+     * {@link #shutdownNow} is called; also resulting in task
+     * cancellation.
+     * <li>An execution of the task throws an exception.  In this case
+     * calling {@link Future#get() get} on the returned future will throw
+     * {@link ExecutionException}, holding the exception as its cause.
+     * </ul>
+     * Subsequent executions are suppressed.  Subsequent calls to
+     * {@link Future#isDone isDone()} on the returned future will
+     * return {@code true}.
+     *
      * @throws RejectedExecutionException {@inheritDoc}
      * @throws NullPointerException       {@inheritDoc}
      * @throws IllegalArgumentException   {@inheritDoc}
@@ -683,9 +736,8 @@
     /**
      * Sets the policy on whether to continue executing existing
      * periodic tasks even when this executor has been {@code shutdown}.
-     * In this case, these tasks will only terminate upon
-     * {@code shutdownNow} or after setting the policy to
-     * {@code false} when already shutdown.
+     * In this case, executions will continue until {@code shutdownNow}
+     * or the policy is set to {@code false} when already shutdown.
      * This value is by default {@code false}.
      *
      * @param value if {@code true}, continue after shutdown, else don't
@@ -700,9 +752,8 @@
     /**
      * Gets the policy on whether to continue executing existing
      * periodic tasks even when this executor has been {@code shutdown}.
-     * In this case, these tasks will only terminate upon
-     * {@code shutdownNow} or after setting the policy to
-     * {@code false} when already shutdown.
+     * In this case, executions will continue until {@code shutdownNow}
+     * or the policy is set to {@code false} when already shutdown.
      * This value is by default {@code false}.
      *
      * @return {@code true} if will continue after shutdown
@@ -905,7 +956,7 @@
         /**
          * Sets f's heapIndex if it is a ScheduledFutureTask.
          */
-        private void setIndex(RunnableScheduledFuture<?> f, int idx) {
+        private static void setIndex(RunnableScheduledFuture<?> f, int idx) {
             if (f instanceof ScheduledFutureTask)
                 ((ScheduledFutureTask)f).heapIndex = idx;
         }
@@ -1203,41 +1254,12 @@
             }
         }
 
-        /**
-         * Returns first element only if it is expired.
-         * Used only by drainTo.  Call only when holding lock.
-         */
-        private RunnableScheduledFuture<?> peekExpired() {
-            // assert lock.isHeldByCurrentThread();
-            RunnableScheduledFuture<?> first = queue[0];
-            return (first == null || first.getDelay(NANOSECONDS) > 0) ?
-                null : first;
-        }
-
         public int drainTo(Collection<? super Runnable> c) {
-            if (c == null)
-                throw new NullPointerException();
-            if (c == this)
-                throw new IllegalArgumentException();
-            final ReentrantLock lock = this.lock;
-            lock.lock();
-            try {
-                RunnableScheduledFuture<?> first;
-                int n = 0;
-                while ((first = peekExpired()) != null) {
-                    c.add(first);   // In this order, in case add() throws.
-                    finishPoll(first);
-                    ++n;
-                }
-                return n;
-            } finally {
-                lock.unlock();
-            }
+            return drainTo(c, Integer.MAX_VALUE);
         }
 
         public int drainTo(Collection<? super Runnable> c, int maxElements) {
-            if (c == null)
-                throw new NullPointerException();
+            Objects.requireNonNull(c);
             if (c == this)
                 throw new IllegalArgumentException();
             if (maxElements <= 0)
@@ -1245,9 +1267,11 @@
             final ReentrantLock lock = this.lock;
             lock.lock();
             try {
-                RunnableScheduledFuture<?> first;
                 int n = 0;
-                while (n < maxElements && (first = peekExpired()) != null) {
+                for (RunnableScheduledFuture<?> first;
+                     n < maxElements
+                         && (first = queue[0]) != null
+                         && first.getDelay(NANOSECONDS) <= 0;) {
                     c.add(first);   // In this order, in case add() throws.
                     finishPoll(first);
                     ++n;
@@ -1285,7 +1309,13 @@
         }
 
         public Iterator<Runnable> iterator() {
-            return new Itr(Arrays.copyOf(queue, size));
+            final ReentrantLock lock = this.lock;
+            lock.lock();
+            try {
+                return new Itr(Arrays.copyOf(queue, size));
+            } finally {
+                lock.unlock();
+            }
         }
 
         /**
@@ -1307,8 +1337,7 @@
             public Runnable next() {
                 if (cursor >= array.length)
                     throw new NoSuchElementException();
-                lastRet = cursor;
-                return array[cursor++];
+                return array[lastRet = cursor++];
             }
 
             public void remove() {
diff --git a/ojluni/src/main/java/java/util/concurrent/Semaphore.java b/ojluni/src/main/java/java/util/concurrent/Semaphore.java
index 1298a6e..86ee638 100644
--- a/ojluni/src/main/java/java/util/concurrent/Semaphore.java
+++ b/ojluni/src/main/java/java/util/concurrent/Semaphore.java
@@ -72,8 +72,8 @@
  *   protected synchronized Object getNextAvailableItem() {
  *     for (int i = 0; i < MAX_AVAILABLE; ++i) {
  *       if (!used[i]) {
- *          used[i] = true;
- *          return items[i];
+ *         used[i] = true;
+ *         return items[i];
  *       }
  *     }
  *     return null; // not reached
@@ -82,11 +82,11 @@
  *   protected synchronized boolean markAsUnused(Object item) {
  *     for (int i = 0; i < MAX_AVAILABLE; ++i) {
  *       if (item == items[i]) {
- *          if (used[i]) {
- *            used[i] = false;
- *            return true;
- *          } else
- *            return false;
+ *         if (used[i]) {
+ *           used[i] = false;
+ *           return true;
+ *         } else
+ *           return false;
  *       }
  *     }
  *     return false;
@@ -359,7 +359,7 @@
      * This &quot;barging&quot; behavior can be useful in certain
      * circumstances, even though it breaks fairness. If you want to honor
      * the fairness setting, then use
-     * {@link #tryAcquire(long, TimeUnit) tryAcquire(0, TimeUnit.SECONDS) }
+     * {@link #tryAcquire(long, TimeUnit) tryAcquire(0, TimeUnit.SECONDS)}
      * which is almost equivalent (it also detects interruption).
      *
      * @return {@code true} if a permit was acquired and {@code false}
@@ -523,7 +523,7 @@
      * &quot;barging&quot; behavior can be useful in certain
      * circumstances, even though it breaks fairness. If you want to
      * honor the fairness setting, then use {@link #tryAcquire(int,
-     * long, TimeUnit) tryAcquire(permits, 0, TimeUnit.SECONDS) }
+     * long, TimeUnit) tryAcquire(permits, 0, TimeUnit.SECONDS)}
      * which is almost equivalent (it also detects interruption).
      *
      * @param permits the number of permits to acquire
@@ -631,9 +631,12 @@
     }
 
     /**
-     * Acquires and returns all permits that are immediately available.
+     * Acquires and returns all permits that are immediately
+     * available, or if negative permits are available, releases them.
+     * Upon return, zero permits are available.
      *
-     * @return the number of permits acquired
+     * @return the number of permits acquired or, if negative, the
+     * number released
      */
     public int drainPermits() {
         return sync.drainPermits();
diff --git a/ojluni/src/main/java/java/util/concurrent/SubmissionPublisher.java b/ojluni/src/main/java/java/util/concurrent/SubmissionPublisher.java
new file mode 100644
index 0000000..9efb01d
--- /dev/null
+++ b/ojluni/src/main/java/java/util/concurrent/SubmissionPublisher.java
@@ -0,0 +1,1477 @@
+/*
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * This file is available under and governed by the GNU General Public
+ * License version 2 only, as published by the Free Software Foundation.
+ * However, the following notice accompanied the original version of this
+ * file:
+ *
+ * Written by Doug Lea with assistance from members of JCP JSR-166
+ * Expert Group and released to the public domain, as explained at
+ * http://creativecommons.org/publicdomain/zero/1.0/
+ */
+
+package java.util.concurrent;
+
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.VarHandle;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.concurrent.locks.LockSupport;
+import java.util.function.BiConsumer;
+import java.util.function.BiPredicate;
+import java.util.function.Consumer;
+import static java.util.concurrent.Flow.Publisher;
+import static java.util.concurrent.Flow.Subscriber;
+import static java.util.concurrent.Flow.Subscription;
+
+/**
+ * A {@link Flow.Publisher} that asynchronously issues submitted
+ * (non-null) items to current subscribers until it is closed.  Each
+ * current subscriber receives newly submitted items in the same order
+ * unless drops or exceptions are encountered.  Using a
+ * SubmissionPublisher allows item generators to act as compliant <a
+ * href="http://www.reactive-streams.org/"> reactive-streams</a>
+ * Publishers relying on drop handling and/or blocking for flow
+ * control.
+ *
+ * <p>A SubmissionPublisher uses the {@link Executor} supplied in its
+ * constructor for delivery to subscribers. The best choice of
+ * Executor depends on expected usage. If the generator(s) of
+ * submitted items run in separate threads, and the number of
+ * subscribers can be estimated, consider using a {@link
+ * Executors#newFixedThreadPool}. Otherwise consider using the
+ * default, normally the {@link ForkJoinPool#commonPool}.
+ *
+ * <p>Buffering allows producers and consumers to transiently operate
+ * at different rates.  Each subscriber uses an independent buffer.
+ * Buffers are created upon first use and expanded as needed up to the
+ * given maximum. (The enforced capacity may be rounded up to the
+ * nearest power of two and/or bounded by the largest value supported
+ * by this implementation.)  Invocations of {@link
+ * Flow.Subscription#request(long) request} do not directly result in
+ * buffer expansion, but risk saturation if unfilled requests exceed
+ * the maximum capacity.  The default value of {@link
+ * Flow#defaultBufferSize()} may provide a useful starting point for
+ * choosing a capacity based on expected rates, resources, and usages.
+ *
+ * <p>A single SubmissionPublisher may be shared among multiple
+ * sources. Actions in a source thread prior to publishing an item or
+ * issuing a signal <a href="package-summary.html#MemoryVisibility">
+ * <i>happen-before</i></a> actions subsequent to the corresponding
+ * access by each subscriber. But reported estimates of lag and demand
+ * are designed for use in monitoring, not for synchronization
+ * control, and may reflect stale or inaccurate views of progress.
+ *
+ * <p>Publication methods support different policies about what to do
+ * when buffers are saturated. Method {@link #submit(Object) submit}
+ * blocks until resources are available. This is simplest, but least
+ * responsive.  The {@code offer} methods may drop items (either
+ * immediately or with bounded timeout), but provide an opportunity to
+ * interpose a handler and then retry.
+ *
+ * <p>If any Subscriber method throws an exception, its subscription
+ * is cancelled.  If a handler is supplied as a constructor argument,
+ * it is invoked before cancellation upon an exception in method
+ * {@link Flow.Subscriber#onNext onNext}, but exceptions in methods
+ * {@link Flow.Subscriber#onSubscribe onSubscribe},
+ * {@link Flow.Subscriber#onError(Throwable) onError} and
+ * {@link Flow.Subscriber#onComplete() onComplete} are not recorded or
+ * handled before cancellation.  If the supplied Executor throws
+ * {@link RejectedExecutionException} (or any other RuntimeException
+ * or Error) when attempting to execute a task, or a drop handler
+ * throws an exception when processing a dropped item, then the
+ * exception is rethrown. In these cases, not all subscribers will
+ * have been issued the published item. It is usually good practice to
+ * {@link #closeExceptionally closeExceptionally} in these cases.
+ *
+ * <p>Method {@link #consume(Consumer)} simplifies support for a
+ * common case in which the only action of a subscriber is to request
+ * and process all items using a supplied function.
+ *
+ * <p>This class may also serve as a convenient base for subclasses
+ * that generate items, and use the methods in this class to publish
+ * them.  For example here is a class that periodically publishes the
+ * items generated from a supplier. (In practice you might add methods
+ * to independently start and stop generation, to share Executors
+ * among publishers, and so on, or use a SubmissionPublisher as a
+ * component rather than a superclass.)
+ *
+ * <pre> {@code
+ * class PeriodicPublisher<T> extends SubmissionPublisher<T> {
+ *   final ScheduledFuture<?> periodicTask;
+ *   final ScheduledExecutorService scheduler;
+ *   PeriodicPublisher(Executor executor, int maxBufferCapacity,
+ *                     Supplier<? extends T> supplier,
+ *                     long period, TimeUnit unit) {
+ *     super(executor, maxBufferCapacity);
+ *     scheduler = new ScheduledThreadPoolExecutor(1);
+ *     periodicTask = scheduler.scheduleAtFixedRate(
+ *       () -> submit(supplier.get()), 0, period, unit);
+ *   }
+ *   public void close() {
+ *     periodicTask.cancel(false);
+ *     scheduler.shutdown();
+ *     super.close();
+ *   }
+ * }}</pre>
+ *
+ * <p>Here is an example of a {@link Flow.Processor} implementation.
+ * It uses single-step requests to its publisher for simplicity of
+ * illustration. A more adaptive version could monitor flow using the
+ * lag estimate returned from {@code submit}, along with other utility
+ * methods.
+ *
+ * <pre> {@code
+ * class TransformProcessor<S,T> extends SubmissionPublisher<T>
+ *   implements Flow.Processor<S,T> {
+ *   final Function<? super S, ? extends T> function;
+ *   Flow.Subscription subscription;
+ *   TransformProcessor(Executor executor, int maxBufferCapacity,
+ *                      Function<? super S, ? extends T> function) {
+ *     super(executor, maxBufferCapacity);
+ *     this.function = function;
+ *   }
+ *   public void onSubscribe(Flow.Subscription subscription) {
+ *     (this.subscription = subscription).request(1);
+ *   }
+ *   public void onNext(S item) {
+ *     subscription.request(1);
+ *     submit(function.apply(item));
+ *   }
+ *   public void onError(Throwable ex) { closeExceptionally(ex); }
+ *   public void onComplete() { close(); }
+ * }}</pre>
+ *
+ * @param <T> the published item type
+ * @author Doug Lea
+ * @since 9
+ */
+public class SubmissionPublisher<T> implements Publisher<T>,
+                                               AutoCloseable {
+    /*
+     * Most mechanics are handled by BufferedSubscription. This class
+     * mainly tracks subscribers and ensures sequentiality, by using
+     * built-in synchronization locks across public methods. Using
+     * built-in locks works well in the most typical case in which
+     * only one thread submits items. We extend this idea in
+     * submission methods by detecting single-ownership to reduce
+     * producer-consumer synchronization strength.
+     */
+
+    /** The largest possible power of two array size. */
+    static final int BUFFER_CAPACITY_LIMIT = 1 << 30;
+
+    /**
+     * Initial buffer capacity used when maxBufferCapacity is
+     * greater. Must be a power of two.
+     */
+    static final int INITIAL_CAPACITY = 32;
+
+    /** Round capacity to power of 2, at most limit. */
+    static final int roundCapacity(int cap) {
+        int n = cap - 1;
+        n |= n >>> 1;
+        n |= n >>> 2;
+        n |= n >>> 4;
+        n |= n >>> 8;
+        n |= n >>> 16;
+        return (n <= 0) ? 1 : // at least 1
+            (n >= BUFFER_CAPACITY_LIMIT) ? BUFFER_CAPACITY_LIMIT : n + 1;
+    }
+
+    // default Executor setup; nearly the same as CompletableFuture
+
+    /**
+     * Default executor -- ForkJoinPool.commonPool() unless it cannot
+     * support parallelism.
+     */
+    private static final Executor ASYNC_POOL =
+        (ForkJoinPool.getCommonPoolParallelism() > 1) ?
+        ForkJoinPool.commonPool() : new ThreadPerTaskExecutor();
+
+    /** Fallback if ForkJoinPool.commonPool() cannot support parallelism */
+    private static final class ThreadPerTaskExecutor implements Executor {
+        ThreadPerTaskExecutor() {}      // prevent access constructor creation
+        public void execute(Runnable r) { new Thread(r).start(); }
+    }
+
+    /**
+     * Clients (BufferedSubscriptions) are maintained in a linked list
+     * (via their "next" fields). This works well for publish loops.
+     * It requires O(n) traversal to check for duplicate subscribers,
+     * but we expect that subscribing is much less common than
+     * publishing. Unsubscribing occurs only during traversal loops,
+     * when BufferedSubscription methods return negative values
+     * signifying that they have been closed.  To reduce
+     * head-of-line blocking, submit and offer methods first call
+     * BufferedSubscription.offer on each subscriber, and place
+     * saturated ones in retries list (using nextRetry field), and
+     * retry, possibly blocking or dropping.
+     */
+    BufferedSubscription<T> clients;
+
+    /** Run status, updated only within locks */
+    volatile boolean closed;
+    /** Set true on first call to subscribe, to initialize possible owner */
+    boolean subscribed;
+    /** The first caller thread to subscribe, or null if thread ever changed */
+    Thread owner;
+    /** If non-null, the exception in closeExceptionally */
+    volatile Throwable closedException;
+
+    // Parameters for constructing BufferedSubscriptions
+    final Executor executor;
+    final BiConsumer<? super Subscriber<? super T>, ? super Throwable> onNextHandler;
+    final int maxBufferCapacity;
+
+    /**
+     * Creates a new SubmissionPublisher using the given Executor for
+     * async delivery to subscribers, with the given maximum buffer size
+     * for each subscriber, and, if non-null, the given handler invoked
+     * when any Subscriber throws an exception in method {@link
+     * Flow.Subscriber#onNext(Object) onNext}.
+     *
+     * @param executor the executor to use for async delivery,
+     * supporting creation of at least one independent thread
+     * @param maxBufferCapacity the maximum capacity for each
+     * subscriber's buffer (the enforced capacity may be rounded up to
+     * the nearest power of two and/or bounded by the largest value
+     * supported by this implementation; method {@link #getMaxBufferCapacity}
+     * returns the actual value)
+     * @param handler if non-null, procedure to invoke upon exception
+     * thrown in method {@code onNext}
+     * @throws NullPointerException if executor is null
+     * @throws IllegalArgumentException if maxBufferCapacity not
+     * positive
+     */
+    public SubmissionPublisher(Executor executor, int maxBufferCapacity,
+                               BiConsumer<? super Subscriber<? super T>, ? super Throwable> handler) {
+        if (executor == null)
+            throw new NullPointerException();
+        if (maxBufferCapacity <= 0)
+            throw new IllegalArgumentException("capacity must be positive");
+        this.executor = executor;
+        this.onNextHandler = handler;
+        this.maxBufferCapacity = roundCapacity(maxBufferCapacity);
+    }
+
+    /**
+     * Creates a new SubmissionPublisher using the given Executor for
+     * async delivery to subscribers, with the given maximum buffer size
+     * for each subscriber, and no handler for Subscriber exceptions in
+     * method {@link Flow.Subscriber#onNext(Object) onNext}.
+     *
+     * @param executor the executor to use for async delivery,
+     * supporting creation of at least one independent thread
+     * @param maxBufferCapacity the maximum capacity for each
+     * subscriber's buffer (the enforced capacity may be rounded up to
+     * the nearest power of two and/or bounded by the largest value
+     * supported by this implementation; method {@link #getMaxBufferCapacity}
+     * returns the actual value)
+     * @throws NullPointerException if executor is null
+     * @throws IllegalArgumentException if maxBufferCapacity not
+     * positive
+     */
+    public SubmissionPublisher(Executor executor, int maxBufferCapacity) {
+        this(executor, maxBufferCapacity, null);
+    }
+
+    /**
+     * Creates a new SubmissionPublisher using the {@link
+     * ForkJoinPool#commonPool()} for async delivery to subscribers
+     * (unless it does not support a parallelism level of at least two,
+     * in which case, a new Thread is created to run each task), with
+     * maximum buffer capacity of {@link Flow#defaultBufferSize}, and no
+     * handler for Subscriber exceptions in method {@link
+     * Flow.Subscriber#onNext(Object) onNext}.
+     */
+    public SubmissionPublisher() {
+        this(ASYNC_POOL, Flow.defaultBufferSize(), null);
+    }
+
+    /**
+     * Adds the given Subscriber unless already subscribed.  If already
+     * subscribed, the Subscriber's {@link
+     * Flow.Subscriber#onError(Throwable) onError} method is invoked on
+     * the existing subscription with an {@link IllegalStateException}.
+     * Otherwise, upon success, the Subscriber's {@link
+     * Flow.Subscriber#onSubscribe onSubscribe} method is invoked
+     * asynchronously with a new {@link Flow.Subscription}.  If {@link
+     * Flow.Subscriber#onSubscribe onSubscribe} throws an exception, the
+     * subscription is cancelled. Otherwise, if this SubmissionPublisher
+     * was closed exceptionally, then the subscriber's {@link
+     * Flow.Subscriber#onError onError} method is invoked with the
+     * corresponding exception, or if closed without exception, the
+     * subscriber's {@link Flow.Subscriber#onComplete() onComplete}
+     * method is invoked.  Subscribers may enable receiving items by
+     * invoking the {@link Flow.Subscription#request(long) request}
+     * method of the new Subscription, and may unsubscribe by invoking
+     * its {@link Flow.Subscription#cancel() cancel} method.
+     *
+     * @param subscriber the subscriber
+     * @throws NullPointerException if subscriber is null
+     */
+    public void subscribe(Subscriber<? super T> subscriber) {
+        if (subscriber == null) throw new NullPointerException();
+        int max = maxBufferCapacity; // allocate initial array
+        Object[] array = new Object[max < INITIAL_CAPACITY ?
+                                    max : INITIAL_CAPACITY];
+        BufferedSubscription<T> subscription =
+            new BufferedSubscription<T>(subscriber, executor, onNextHandler,
+                                        array, max);
+        synchronized (this) {
+            if (!subscribed) {
+                subscribed = true;
+                owner = Thread.currentThread();
+            }
+            for (BufferedSubscription<T> b = clients, pred = null;;) {
+                if (b == null) {
+                    Throwable ex;
+                    subscription.onSubscribe();
+                    if ((ex = closedException) != null)
+                        subscription.onError(ex);
+                    else if (closed)
+                        subscription.onComplete();
+                    else if (pred == null)
+                        clients = subscription;
+                    else
+                        pred.next = subscription;
+                    break;
+                }
+                BufferedSubscription<T> next = b.next;
+                if (b.isClosed()) {   // remove
+                    b.next = null;    // detach
+                    if (pred == null)
+                        clients = next;
+                    else
+                        pred.next = next;
+                }
+                else if (subscriber.equals(b.subscriber)) {
+                    b.onError(new IllegalStateException("Duplicate subscribe"));
+                    break;
+                }
+                else
+                    pred = b;
+                b = next;
+            }
+        }
+    }
+
+    /**
+     * Common implementation for all three forms of submit and offer.
+     * Acts as submit if nanos == Long.MAX_VALUE, else offer.
+     */
+    private int doOffer(T item, long nanos,
+                        BiPredicate<Subscriber<? super T>, ? super T> onDrop) {
+        if (item == null) throw new NullPointerException();
+        int lag = 0;
+        boolean complete, unowned;
+        synchronized (this) {
+            Thread t = Thread.currentThread(), o;
+            BufferedSubscription<T> b = clients;
+            if ((unowned = ((o = owner) != t)) && o != null)
+                owner = null;                     // disable bias
+            if (b == null)
+                complete = closed;
+            else {
+                complete = false;
+                boolean cleanMe = false;
+                BufferedSubscription<T> retries = null, rtail = null, next;
+                do {
+                    next = b.next;
+                    int stat = b.offer(item, unowned);
+                    if (stat == 0) {              // saturated; add to retry list
+                        b.nextRetry = null;       // avoid garbage on exceptions
+                        if (rtail == null)
+                            retries = b;
+                        else
+                            rtail.nextRetry = b;
+                        rtail = b;
+                    }
+                    else if (stat < 0)            // closed
+                        cleanMe = true;           // remove later
+                    else if (stat > lag)
+                        lag = stat;
+                } while ((b = next) != null);
+
+                if (retries != null || cleanMe)
+                    lag = retryOffer(item, nanos, onDrop, retries, lag, cleanMe);
+            }
+        }
+        if (complete)
+            throw new IllegalStateException("Closed");
+        else
+            return lag;
+    }
+
+    /**
+     * Helps, (timed) waits for, and/or drops buffers on list; returns
+     * lag or negative drops (for use in offer).
+     */
+    private int retryOffer(T item, long nanos,
+                           BiPredicate<Subscriber<? super T>, ? super T> onDrop,
+                           BufferedSubscription<T> retries, int lag,
+                           boolean cleanMe) {
+        for (BufferedSubscription<T> r = retries; r != null;) {
+            BufferedSubscription<T> nextRetry = r.nextRetry;
+            r.nextRetry = null;
+            if (nanos > 0L)
+                r.awaitSpace(nanos);
+            int stat = r.retryOffer(item);
+            if (stat == 0 && onDrop != null && onDrop.test(r.subscriber, item))
+                stat = r.retryOffer(item);
+            if (stat == 0)
+                lag = (lag >= 0) ? -1 : lag - 1;
+            else if (stat < 0)
+                cleanMe = true;
+            else if (lag >= 0 && stat > lag)
+                lag = stat;
+            r = nextRetry;
+        }
+        if (cleanMe)
+            cleanAndCount();
+        return lag;
+    }
+
+    /**
+     * Returns current list count after removing closed subscribers.
+     * Call only while holding lock.  Used mainly by retryOffer for
+     * cleanup.
+     */
+    private int cleanAndCount() {
+        int count = 0;
+        BufferedSubscription<T> pred = null, next;
+        for (BufferedSubscription<T> b = clients; b != null; b = next) {
+            next = b.next;
+            if (b.isClosed()) {
+                b.next = null;
+                if (pred == null)
+                    clients = next;
+                else
+                    pred.next = next;
+            }
+            else {
+                pred = b;
+                ++count;
+            }
+        }
+        return count;
+    }
+
+    /**
+     * Publishes the given item to each current subscriber by
+     * asynchronously invoking its {@link Flow.Subscriber#onNext(Object)
+     * onNext} method, blocking uninterruptibly while resources for any
+     * subscriber are unavailable. This method returns an estimate of
+     * the maximum lag (number of items submitted but not yet consumed)
+     * among all current subscribers. This value is at least one
+     * (accounting for this submitted item) if there are any
+     * subscribers, else zero.
+     *
+     * <p>If the Executor for this publisher throws a
+     * RejectedExecutionException (or any other RuntimeException or
+     * Error) when attempting to asynchronously notify subscribers,
+     * then this exception is rethrown, in which case not all
+     * subscribers will have been issued this item.
+     *
+     * @param item the (non-null) item to publish
+     * @return the estimated maximum lag among subscribers
+     * @throws IllegalStateException if closed
+     * @throws NullPointerException if item is null
+     * @throws RejectedExecutionException if thrown by Executor
+     */
+    public int submit(T item) {
+        return doOffer(item, Long.MAX_VALUE, null);
+    }
+
+    /**
+     * Publishes the given item, if possible, to each current subscriber
+     * by asynchronously invoking its {@link
+     * Flow.Subscriber#onNext(Object) onNext} method. The item may be
+     * dropped by one or more subscribers if resource limits are
+     * exceeded, in which case the given handler (if non-null) is
+     * invoked, and if it returns true, retried once.  Other calls to
+     * methods in this class by other threads are blocked while the
+     * handler is invoked.  Unless recovery is assured, options are
+     * usually limited to logging the error and/or issuing an {@link
+     * Flow.Subscriber#onError(Throwable) onError} signal to the
+     * subscriber.
+     *
+     * <p>This method returns a status indicator: If negative, it
+     * represents the (negative) number of drops (failed attempts to
+     * issue the item to a subscriber). Otherwise it is an estimate of
+     * the maximum lag (number of items submitted but not yet
+     * consumed) among all current subscribers. This value is at least
+     * one (accounting for this submitted item) if there are any
+     * subscribers, else zero.
+     *
+     * <p>If the Executor for this publisher throws a
+     * RejectedExecutionException (or any other RuntimeException or
+     * Error) when attempting to asynchronously notify subscribers, or
+     * the drop handler throws an exception when processing a dropped
+     * item, then this exception is rethrown.
+     *
+     * @param item the (non-null) item to publish
+     * @param onDrop if non-null, the handler invoked upon a drop to a
+     * subscriber, with arguments of the subscriber and item; if it
+     * returns true, an offer is re-attempted (once)
+     * @return if negative, the (negative) number of drops; otherwise
+     * an estimate of maximum lag
+     * @throws IllegalStateException if closed
+     * @throws NullPointerException if item is null
+     * @throws RejectedExecutionException if thrown by Executor
+     */
+    public int offer(T item,
+                     BiPredicate<Subscriber<? super T>, ? super T> onDrop) {
+        return doOffer(item, 0L, onDrop);
+    }
+
+    /**
+     * Publishes the given item, if possible, to each current subscriber
+     * by asynchronously invoking its {@link
+     * Flow.Subscriber#onNext(Object) onNext} method, blocking while
+     * resources for any subscription are unavailable, up to the
+     * specified timeout or until the caller thread is interrupted, at
+     * which point the given handler (if non-null) is invoked, and if it
+     * returns true, retried once. (The drop handler may distinguish
+     * timeouts from interrupts by checking whether the current thread
+     * is interrupted.)  Other calls to methods in this class by other
+     * threads are blocked while the handler is invoked.  Unless
+     * recovery is assured, options are usually limited to logging the
+     * error and/or issuing an {@link Flow.Subscriber#onError(Throwable)
+     * onError} signal to the subscriber.
+     *
+     * <p>This method returns a status indicator: If negative, it
+     * represents the (negative) number of drops (failed attempts to
+     * issue the item to a subscriber). Otherwise it is an estimate of
+     * the maximum lag (number of items submitted but not yet
+     * consumed) among all current subscribers. This value is at least
+     * one (accounting for this submitted item) if there are any
+     * subscribers, else zero.
+     *
+     * <p>If the Executor for this publisher throws a
+     * RejectedExecutionException (or any other RuntimeException or
+     * Error) when attempting to asynchronously notify subscribers, or
+     * the drop handler throws an exception when processing a dropped
+     * item, then this exception is rethrown.
+     *
+     * @param item the (non-null) item to publish
+     * @param timeout how long to wait for resources for any subscriber
+     * before giving up, in units of {@code unit}
+     * @param unit a {@code TimeUnit} determining how to interpret the
+     * {@code timeout} parameter
+     * @param onDrop if non-null, the handler invoked upon a drop to a
+     * subscriber, with arguments of the subscriber and item; if it
+     * returns true, an offer is re-attempted (once)
+     * @return if negative, the (negative) number of drops; otherwise
+     * an estimate of maximum lag
+     * @throws IllegalStateException if closed
+     * @throws NullPointerException if item is null
+     * @throws RejectedExecutionException if thrown by Executor
+     */
+    public int offer(T item, long timeout, TimeUnit unit,
+                     BiPredicate<Subscriber<? super T>, ? super T> onDrop) {
+        long nanos = unit.toNanos(timeout);
+        // distinguishes from untimed (only wrt interrupt policy)
+        if (nanos == Long.MAX_VALUE) --nanos;
+        return doOffer(item, nanos, onDrop);
+    }
+
+    /**
+     * Unless already closed, issues {@link
+     * Flow.Subscriber#onComplete() onComplete} signals to current
+     * subscribers, and disallows subsequent attempts to publish.
+     * Upon return, this method does <em>NOT</em> guarantee that all
+     * subscribers have yet completed.
+     */
+    public void close() {
+        if (!closed) {
+            BufferedSubscription<T> b;
+            synchronized (this) {
+                // no need to re-check closed here
+                b = clients;
+                clients = null;
+                owner = null;
+                closed = true;
+            }
+            while (b != null) {
+                BufferedSubscription<T> next = b.next;
+                b.next = null;
+                b.onComplete();
+                b = next;
+            }
+        }
+    }
+
+    /**
+     * Unless already closed, issues {@link
+     * Flow.Subscriber#onError(Throwable) onError} signals to current
+     * subscribers with the given error, and disallows subsequent
+     * attempts to publish.  Future subscribers also receive the given
+     * error. Upon return, this method does <em>NOT</em> guarantee
+     * that all subscribers have yet completed.
+     *
+     * @param error the {@code onError} argument sent to subscribers
+     * @throws NullPointerException if error is null
+     */
+    public void closeExceptionally(Throwable error) {
+        if (error == null)
+            throw new NullPointerException();
+        if (!closed) {
+            BufferedSubscription<T> b;
+            synchronized (this) {
+                b = clients;
+                if (!closed) {  // don't clobber racing close
+                    closedException = error;
+                    clients = null;
+                    owner = null;
+                    closed = true;
+                }
+            }
+            while (b != null) {
+                BufferedSubscription<T> next = b.next;
+                b.next = null;
+                b.onError(error);
+                b = next;
+            }
+        }
+    }
+
+    /**
+     * Returns true if this publisher is not accepting submissions.
+     *
+     * @return true if closed
+     */
+    public boolean isClosed() {
+        return closed;
+    }
+
+    /**
+     * Returns the exception associated with {@link
+     * #closeExceptionally(Throwable) closeExceptionally}, or null if
+     * not closed or if closed normally.
+     *
+     * @return the exception, or null if none
+     */
+    public Throwable getClosedException() {
+        return closedException;
+    }
+
+    /**
+     * Returns true if this publisher has any subscribers.
+     *
+     * @return true if this publisher has any subscribers
+     */
+    public boolean hasSubscribers() {
+        boolean nonEmpty = false;
+        synchronized (this) {
+            for (BufferedSubscription<T> b = clients; b != null;) {
+                BufferedSubscription<T> next = b.next;
+                if (b.isClosed()) {
+                    b.next = null;
+                    b = clients = next;
+                }
+                else {
+                    nonEmpty = true;
+                    break;
+                }
+            }
+        }
+        return nonEmpty;
+    }
+
+    /**
+     * Returns the number of current subscribers.
+     *
+     * @return the number of current subscribers
+     */
+    public int getNumberOfSubscribers() {
+        synchronized (this) {
+            return cleanAndCount();
+        }
+    }
+
+    /**
+     * Returns the Executor used for asynchronous delivery.
+     *
+     * @return the Executor used for asynchronous delivery
+     */
+    public Executor getExecutor() {
+        return executor;
+    }
+
+    /**
+     * Returns the maximum per-subscriber buffer capacity.
+     *
+     * @return the maximum per-subscriber buffer capacity
+     */
+    public int getMaxBufferCapacity() {
+        return maxBufferCapacity;
+    }
+
+    /**
+     * Returns a list of current subscribers for monitoring and
+     * tracking purposes, not for invoking {@link Flow.Subscriber}
+     * methods on the subscribers.
+     *
+     * @return list of current subscribers
+     */
+    public List<Subscriber<? super T>> getSubscribers() {
+        ArrayList<Subscriber<? super T>> subs = new ArrayList<>();
+        synchronized (this) {
+            BufferedSubscription<T> pred = null, next;
+            for (BufferedSubscription<T> b = clients; b != null; b = next) {
+                next = b.next;
+                if (b.isClosed()) {
+                    b.next = null;
+                    if (pred == null)
+                        clients = next;
+                    else
+                        pred.next = next;
+                }
+                else {
+                    subs.add(b.subscriber);
+                    pred = b;
+                }
+            }
+        }
+        return subs;
+    }
+
+    /**
+     * Returns true if the given Subscriber is currently subscribed.
+     *
+     * @param subscriber the subscriber
+     * @return true if currently subscribed
+     * @throws NullPointerException if subscriber is null
+     */
+    public boolean isSubscribed(Subscriber<? super T> subscriber) {
+        if (subscriber == null) throw new NullPointerException();
+        if (!closed) {
+            synchronized (this) {
+                BufferedSubscription<T> pred = null, next;
+                for (BufferedSubscription<T> b = clients; b != null; b = next) {
+                    next = b.next;
+                    if (b.isClosed()) {
+                        b.next = null;
+                        if (pred == null)
+                            clients = next;
+                        else
+                            pred.next = next;
+                    }
+                    else if (subscriber.equals(b.subscriber))
+                        return true;
+                    else
+                        pred = b;
+                }
+            }
+        }
+        return false;
+    }
+
+    /**
+     * Returns an estimate of the minimum number of items requested
+     * (via {@link Flow.Subscription#request(long) request}) but not
+     * yet produced, among all current subscribers.
+     *
+     * @return the estimate, or zero if no subscribers
+     */
+    public long estimateMinimumDemand() {
+        long min = Long.MAX_VALUE;
+        boolean nonEmpty = false;
+        synchronized (this) {
+            BufferedSubscription<T> pred = null, next;
+            for (BufferedSubscription<T> b = clients; b != null; b = next) {
+                int n; long d;
+                next = b.next;
+                if ((n = b.estimateLag()) < 0) {
+                    b.next = null;
+                    if (pred == null)
+                        clients = next;
+                    else
+                        pred.next = next;
+                }
+                else {
+                    if ((d = b.demand - n) < min)
+                        min = d;
+                    nonEmpty = true;
+                    pred = b;
+                }
+            }
+        }
+        return nonEmpty ? min : 0;
+    }
+
+    /**
+     * Returns an estimate of the maximum number of items produced but
+     * not yet consumed among all current subscribers.
+     *
+     * @return the estimate
+     */
+    public int estimateMaximumLag() {
+        int max = 0;
+        synchronized (this) {
+            BufferedSubscription<T> pred = null, next;
+            for (BufferedSubscription<T> b = clients; b != null; b = next) {
+                int n;
+                next = b.next;
+                if ((n = b.estimateLag()) < 0) {
+                    b.next = null;
+                    if (pred == null)
+                        clients = next;
+                    else
+                        pred.next = next;
+                }
+                else {
+                    if (n > max)
+                        max = n;
+                    pred = b;
+                }
+            }
+        }
+        return max;
+    }
+
+    /**
+     * Processes all published items using the given Consumer function.
+     * Returns a CompletableFuture that is completed normally when this
+     * publisher signals {@link Flow.Subscriber#onComplete()
+     * onComplete}, or completed exceptionally upon any error, or an
+     * exception is thrown by the Consumer, or the returned
+     * CompletableFuture is cancelled, in which case no further items
+     * are processed.
+     *
+     * @param consumer the function applied to each onNext item
+     * @return a CompletableFuture that is completed normally
+     * when the publisher signals onComplete, and exceptionally
+     * upon any error or cancellation
+     * @throws NullPointerException if consumer is null
+     */
+    public CompletableFuture<Void> consume(Consumer<? super T> consumer) {
+        if (consumer == null)
+            throw new NullPointerException();
+        CompletableFuture<Void> status = new CompletableFuture<>();
+        subscribe(new ConsumerSubscriber<T>(status, consumer));
+        return status;
+    }
+
+    /** Subscriber for method consume */
+    static final class ConsumerSubscriber<T> implements Subscriber<T> {
+        final CompletableFuture<Void> status;
+        final Consumer<? super T> consumer;
+        Subscription subscription;
+        ConsumerSubscriber(CompletableFuture<Void> status,
+                           Consumer<? super T> consumer) {
+            this.status = status; this.consumer = consumer;
+        }
+        public final void onSubscribe(Subscription subscription) {
+            this.subscription = subscription;
+            status.whenComplete((v, e) -> subscription.cancel());
+            if (!status.isDone())
+                subscription.request(Long.MAX_VALUE);
+        }
+        public final void onError(Throwable ex) {
+            status.completeExceptionally(ex);
+        }
+        public final void onComplete() {
+            status.complete(null);
+        }
+        public final void onNext(T item) {
+            try {
+                consumer.accept(item);
+            } catch (Throwable ex) {
+                subscription.cancel();
+                status.completeExceptionally(ex);
+            }
+        }
+    }
+
+    /**
+     * A task for consuming buffer items and signals, created and
+     * executed whenever they become available. A task consumes as
+     * many items/signals as possible before terminating, at which
+     * point another task is created when needed. The dual Runnable
+     * and ForkJoinTask declaration saves overhead when executed by
+     * ForkJoinPools, without impacting other kinds of Executors.
+     */
+    @SuppressWarnings("serial")
+    static final class ConsumerTask<T> extends ForkJoinTask<Void>
+        implements Runnable, CompletableFuture.AsynchronousCompletionTask {
+        final BufferedSubscription<T> consumer;
+        ConsumerTask(BufferedSubscription<T> consumer) {
+            this.consumer = consumer;
+        }
+        public final Void getRawResult() { return null; }
+        public final void setRawResult(Void v) {}
+        public final boolean exec() { consumer.consume(); return false; }
+        public final void run() { consumer.consume(); }
+    }
+
+    /**
+     * A resizable array-based ring buffer with integrated control to
+     * start a consumer task whenever items are available.  The buffer
+     * algorithm is specialized for the case of at most one concurrent
+     * producer and consumer, and power of two buffer sizes. It relies
+     * primarily on atomic operations (CAS or getAndSet) at the next
+     * array slot to put or take an element, at the "tail" and "head"
+     * indices written only by the producer and consumer respectively.
+     *
+     * We ensure internally that there is at most one active consumer
+     * task at any given time. The publisher guarantees a single
+     * producer via its lock. Sync among producers and consumers
+     * relies on volatile fields "ctl", "demand", and "waiting" (along
+     * with element access). Other variables are accessed in plain
+     * mode, relying on outer ordering and exclusion, and/or enclosing
+     * them within other volatile accesses. Some atomic operations are
+     * avoided by tracking single threaded ownership by producers (in
+     * the style of biased locking).
+     *
+     * Execution control and protocol state are managed using field
+     * "ctl".  Methods to subscribe, close, request, and cancel set
+     * ctl bits (mostly using atomic boolean method getAndBitwiseOr),
+     * and ensure that a task is running. (The corresponding consumer
+     * side actions are in method consume.)  To avoid starting a new
+     * task on each action, ctl also includes a keep-alive bit
+     * (ACTIVE) that is refreshed if needed on producer actions.
+     * (Maintaining agreement about keep-alives requires most atomic
+     * updates to be full SC/Volatile strength, which is still much
+     * cheaper than using one task per item.)  Error signals
+     * additionally null out items and/or fields to reduce termination
+     * latency.  The cancel() method is supported by treating as ERROR
+     * but suppressing onError signal.
+     *
+     * Support for blocking also exploits the fact that there is only
+     * one possible waiter. ManagedBlocker-compatible control fields
+     * are placed in this class itself rather than in wait-nodes.
+     * Blocking control relies on the "waiting" and "waiter"
+     * fields. Producers set them before trying to block. Signalling
+     * unparks and clears fields. If the producer and/or consumer are
+     * using a ForkJoinPool, the producer attempts to help run
+     * consumer tasks via ForkJoinPool.helpAsyncBlocker before
+     * blocking.
+     *
+     * Usages of this class may encounter any of several forms of
+     * memory contention. We try to ameliorate across them without
+     * unduly impacting footprints in low-contention usages where it
+     * isn't needed. Buffer arrays start out small and grow only as
+     * needed.  The class uses @Contended and heuristic field
+     * declaration ordering to reduce false-sharing memory contention
+     * across instances of BufferedSubscription (as in, multiple
+     * subscribers per publisher).  We additionally segregate some
+     * fields that would otherwise nearly always encounter cache line
+     * contention among producers and consumers. To reduce contention
+     * across time (vs space), consumers only periodically update
+     * other fields (see method takeItems), at the expense of possibly
+     * staler reporting of lags and demand (bounded at 12.5% == 1/8
+     * capacity) and possibly more atomic operations.
+     *
+     * Other forms of imbalance and slowdowns can occur during startup
+     * when producer and consumer methods are compiled and/or memory
+     * is allocated at different rates.  This is ameliorated by
+     * artificially subdividing some consumer methods, including
+     * isolation of all subscriber callbacks.  This code also includes
+     * typical power-of-two array screening idioms to avoid compilers
+     * generating traps, along with the usual SSA-based inline
+     * assignment coding style. Also, all methods and fields have
+     * default visibility to simplify usage by callers.
+     */
+    @SuppressWarnings("serial")
+    @jdk.internal.vm.annotation.Contended
+    static final class BufferedSubscription<T>
+        implements Subscription, ForkJoinPool.ManagedBlocker {
+        long timeout;                      // Long.MAX_VALUE if untimed wait
+        int head;                          // next position to take
+        int tail;                          // next position to put
+        final int maxCapacity;             // max buffer size
+        volatile int ctl;                  // atomic run state flags
+        Object[] array;                    // buffer
+        final Subscriber<? super T> subscriber;
+        final BiConsumer<? super Subscriber<? super T>, ? super Throwable> onNextHandler;
+        Executor executor;                 // null on error
+        Thread waiter;                     // blocked producer thread
+        Throwable pendingError;            // holds until onError issued
+        BufferedSubscription<T> next;      // used only by publisher
+        BufferedSubscription<T> nextRetry; // used only by publisher
+
+        @jdk.internal.vm.annotation.Contended("c") // segregate
+        volatile long demand;              // # unfilled requests
+        @jdk.internal.vm.annotation.Contended("c")
+        volatile int waiting;              // nonzero if producer blocked
+
+        // ctl bit values
+        static final int CLOSED   = 0x01;  // if set, other bits ignored
+        static final int ACTIVE   = 0x02;  // keep-alive for consumer task
+        static final int REQS     = 0x04;  // (possibly) nonzero demand
+        static final int ERROR    = 0x08;  // issues onError when noticed
+        static final int COMPLETE = 0x10;  // issues onComplete when done
+        static final int RUN      = 0x20;  // task is or will be running
+        static final int OPEN     = 0x40;  // true after subscribe
+
+        static final long INTERRUPTED = -1L; // timeout vs interrupt sentinel
+
+        BufferedSubscription(Subscriber<? super T> subscriber,
+                             Executor executor,
+                             BiConsumer<? super Subscriber<? super T>,
+                             ? super Throwable> onNextHandler,
+                             Object[] array,
+                             int maxBufferCapacity) {
+            this.subscriber = subscriber;
+            this.executor = executor;
+            this.onNextHandler = onNextHandler;
+            this.array = array;
+            this.maxCapacity = maxBufferCapacity;
+        }
+
+        // Wrappers for some VarHandle methods
+
+        final boolean weakCasCtl(int cmp, int val) {
+            return CTL.weakCompareAndSet(this, cmp, val);
+        }
+
+        final int getAndBitwiseOrCtl(int bits) {
+            return (int)CTL.getAndBitwiseOr(this, bits);
+        }
+
+        final long subtractDemand(int k) {
+            long n = (long)(-k);
+            return n + (long)DEMAND.getAndAdd(this, n);
+        }
+
+        final boolean casDemand(long cmp, long val) {
+            return DEMAND.compareAndSet(this, cmp, val);
+        }
+
+        // Utilities used by SubmissionPublisher
+
+        /**
+         * Returns true if closed (consumer task may still be running).
+         */
+        final boolean isClosed() {
+            return (ctl & CLOSED) != 0;
+        }
+
+        /**
+         * Returns estimated number of buffered items, or negative if
+         * closed.
+         */
+        final int estimateLag() {
+            int c = ctl, n = tail - head;
+            return ((c & CLOSED) != 0) ? -1 : (n < 0) ? 0 : n;
+        }
+
+        // Methods for submitting items
+
+        /**
+         * Tries to add item and start consumer task if necessary.
+         * @return negative if closed, 0 if saturated, else estimated lag
+         */
+        final int offer(T item, boolean unowned) {
+            Object[] a;
+            int stat = 0, cap = ((a = array) == null) ? 0 : a.length;
+            int t = tail, i = t & (cap - 1), n = t + 1 - head;
+            if (cap > 0) {
+                boolean added;
+                if (n >= cap && cap < maxCapacity) // resize
+                    added = growAndOffer(item, a, t);
+                else if (n >= cap || unowned)      // need volatile CAS
+                    added = QA.compareAndSet(a, i, null, item);
+                else {                             // can use release mode
+                    QA.setRelease(a, i, item);
+                    added = true;
+                }
+                if (added) {
+                    tail = t + 1;
+                    stat = n;
+                }
+            }
+            return startOnOffer(stat);
+        }
+
+        /**
+         * Tries to expand buffer and add item, returning true on
+         * success. Currently fails only if out of memory.
+         */
+        final boolean growAndOffer(T item, Object[] a, int t) {
+            int cap = 0, newCap = 0;
+            Object[] newArray = null;
+            if (a != null && (cap = a.length) > 0 && (newCap = cap << 1) > 0) {
+                try {
+                    newArray = new Object[newCap];
+                } catch (OutOfMemoryError ex) {
+                }
+            }
+            if (newArray == null)
+                return false;
+            else {                                // take and move items
+                int newMask = newCap - 1;
+                newArray[t-- & newMask] = item;
+                for (int mask = cap - 1, k = mask; k >= 0; --k) {
+                    Object x = QA.getAndSet(a, t & mask, null);
+                    if (x == null)
+                        break;                    // already consumed
+                    else
+                        newArray[t-- & newMask] = x;
+                }
+                array = newArray;
+                VarHandle.releaseFence();         // release array and slots
+                return true;
+            }
+        }
+
+        /**
+         * Version of offer for retries (no resize or bias)
+         */
+        final int retryOffer(T item) {
+            Object[] a;
+            int stat = 0, t = tail, h = head, cap;
+            if ((a = array) != null && (cap = a.length) > 0 &&
+                QA.compareAndSet(a, (cap - 1) & t, null, item))
+                stat = (tail = t + 1) - h;
+            return startOnOffer(stat);
+        }
+
+        /**
+         * Tries to start consumer task after offer.
+         * @return negative if now closed, else argument
+         */
+        final int startOnOffer(int stat) {
+            int c; // start or keep alive if requests exist and not active
+            if (((c = ctl) & (REQS | ACTIVE)) == REQS &&
+                ((c = getAndBitwiseOrCtl(RUN | ACTIVE)) & (RUN | CLOSED)) == 0)
+                tryStart();
+            else if ((c & CLOSED) != 0)
+                stat = -1;
+            return stat;
+        }
+
+        /**
+         * Tries to start consumer task. Sets error state on failure.
+         */
+        final void tryStart() {
+            try {
+                Executor e;
+                ConsumerTask<T> task = new ConsumerTask<T>(this);
+                if ((e = executor) != null)   // skip if disabled on error
+                    e.execute(task);
+            } catch (RuntimeException | Error ex) {
+                getAndBitwiseOrCtl(ERROR | CLOSED);
+                throw ex;
+            }
+        }
+
+        // Signals to consumer tasks
+
+        /**
+         * Sets the given control bits, starting task if not running or closed.
+         * @param bits state bits, assumed to include RUN but not CLOSED
+         */
+        final void startOnSignal(int bits) {
+            if ((ctl & bits) != bits &&
+                (getAndBitwiseOrCtl(bits) & (RUN | CLOSED)) == 0)
+                tryStart();
+        }
+
+        final void onSubscribe() {
+            startOnSignal(RUN | ACTIVE);
+        }
+
+        final void onComplete() {
+            startOnSignal(RUN | ACTIVE | COMPLETE);
+        }
+
+        final void onError(Throwable ex) {
+            int c; Object[] a;      // to null out buffer on async error
+            if (ex != null)
+                pendingError = ex;  // races are OK
+            if (((c = getAndBitwiseOrCtl(ERROR | RUN | ACTIVE)) & CLOSED) == 0) {
+                if ((c & RUN) == 0)
+                    tryStart();
+                else if ((a = array) != null)
+                    Arrays.fill(a, null);
+            }
+        }
+
+        public final void cancel() {
+            onError(null);
+        }
+
+        public final void request(long n) {
+            if (n > 0L) {
+                for (;;) {
+                    long p = demand, d = p + n;  // saturate
+                    if (casDemand(p, d < p ? Long.MAX_VALUE : d))
+                        break;
+                }
+                startOnSignal(RUN | ACTIVE | REQS);
+            }
+            else
+                onError(new IllegalArgumentException(
+                            "non-positive subscription request"));
+        }
+
+        // Consumer task actions
+
+        /**
+         * Consumer loop, called from ConsumerTask, or indirectly when
+         * helping during submit.
+         */
+        final void consume() {
+            Subscriber<? super T> s;
+            if ((s = subscriber) != null) {          // hoist checks
+                subscribeOnOpen(s);
+                long d = demand;
+                for (int h = head, t = tail;;) {
+                    int c, taken; boolean empty;
+                    if (((c = ctl) & ERROR) != 0) {
+                        closeOnError(s, null);
+                        break;
+                    }
+                    else if ((taken = takeItems(s, d, h)) > 0) {
+                        head = h += taken;
+                        d = subtractDemand(taken);
+                    }
+                    else if ((d = demand) == 0L && (c & REQS) != 0)
+                        weakCasCtl(c, c & ~REQS);    // exhausted demand
+                    else if (d != 0L && (c & REQS) == 0)
+                        weakCasCtl(c, c | REQS);     // new demand
+                    else if (t == (t = tail)) {      // stability check
+                        if ((empty = (t == h)) && (c & COMPLETE) != 0) {
+                            closeOnComplete(s);      // end of stream
+                            break;
+                        }
+                        else if (empty || d == 0L) {
+                            int bit = ((c & ACTIVE) != 0) ? ACTIVE : RUN;
+                            if (weakCasCtl(c, c & ~bit) && bit == RUN)
+                                break;               // un-keep-alive or exit
+                        }
+                    }
+                }
+            }
+        }
+
+        /**
+         * Consumes some items until unavailable or bound or error.
+         *
+         * @param s subscriber
+         * @param d current demand
+         * @param h current head
+         * @return number taken
+         */
+        final int takeItems(Subscriber<? super T> s, long d, int h) {
+            Object[] a;
+            int k = 0, cap;
+            if ((a = array) != null && (cap = a.length) > 0) {
+                int m = cap - 1, b = (m >>> 3) + 1; // min(1, cap/8)
+                int n = (d < (long)b) ? (int)d : b;
+                for (; k < n; ++h, ++k) {
+                    Object x = QA.getAndSet(a, h & m, null);
+                    if (waiting != 0)
+                        signalWaiter();
+                    if (x == null)
+                        break;
+                    else if (!consumeNext(s, x))
+                        break;
+                }
+            }
+            return k;
+        }
+
+        final boolean consumeNext(Subscriber<? super T> s, Object x) {
+            try {
+                @SuppressWarnings("unchecked") T y = (T) x;
+                if (s != null)
+                    s.onNext(y);
+                return true;
+            } catch (Throwable ex) {
+                handleOnNext(s, ex);
+                return false;
+            }
+        }
+
+        /**
+         * Processes exception in Subscriber.onNext.
+         */
+        final void handleOnNext(Subscriber<? super T> s, Throwable ex) {
+            BiConsumer<? super Subscriber<? super T>, ? super Throwable> h;
+            try {
+                if ((h = onNextHandler) != null)
+                    h.accept(s, ex);
+            } catch (Throwable ignore) {
+            }
+            closeOnError(s, ex);
+        }
+
+        /**
+         * Issues subscriber.onSubscribe if this is first signal.
+         */
+        final void subscribeOnOpen(Subscriber<? super T> s) {
+            if ((ctl & OPEN) == 0 && (getAndBitwiseOrCtl(OPEN) & OPEN) == 0)
+                consumeSubscribe(s);
+        }
+
+        final void consumeSubscribe(Subscriber<? super T> s) {
+            try {
+                if (s != null) // ignore if disabled
+                    s.onSubscribe(this);
+            } catch (Throwable ex) {
+                closeOnError(s, ex);
+            }
+        }
+
+        /**
+         * Issues subscriber.onComplete unless already closed.
+         */
+        final void closeOnComplete(Subscriber<? super T> s) {
+            if ((getAndBitwiseOrCtl(CLOSED) & CLOSED) == 0)
+                consumeComplete(s);
+        }
+
+        final void consumeComplete(Subscriber<? super T> s) {
+            try {
+                if (s != null)
+                    s.onComplete();
+            } catch (Throwable ignore) {
+            }
+        }
+
+        /**
+         * Issues subscriber.onError, and unblocks producer if needed.
+         */
+        final void closeOnError(Subscriber<? super T> s, Throwable ex) {
+            if ((getAndBitwiseOrCtl(ERROR | CLOSED) & CLOSED) == 0) {
+                if (ex == null)
+                    ex = pendingError;
+                pendingError = null;  // detach
+                executor = null;      // suppress racing start calls
+                signalWaiter();
+                consumeError(s, ex);
+            }
+        }
+
+        final void consumeError(Subscriber<? super T> s, Throwable ex) {
+            try {
+                if (ex != null && s != null)
+                    s.onError(ex);
+            } catch (Throwable ignore) {
+            }
+        }
+
+        // Blocking support
+
+        /**
+         * Unblocks waiting producer.
+         */
+        final void signalWaiter() {
+            Thread w;
+            waiting = 0;
+            if ((w = waiter) != null)
+                LockSupport.unpark(w);
+        }
+
+        /**
+         * Returns true if closed or space available.
+         * For ManagedBlocker.
+         */
+        public final boolean isReleasable() {
+            Object[] a; int cap;
+            return ((ctl & CLOSED) != 0 ||
+                    ((a = array) != null && (cap = a.length) > 0 &&
+                     QA.getAcquire(a, (cap - 1) & tail) == null));
+        }
+
+        /**
+         * Helps or blocks until timeout, closed, or space available.
+         */
+        final void awaitSpace(long nanos) {
+            if (!isReleasable()) {
+                ForkJoinPool.helpAsyncBlocker(executor, this);
+                if (!isReleasable()) {
+                    timeout = nanos;
+                    try {
+                        ForkJoinPool.managedBlock(this);
+                    } catch (InterruptedException ie) {
+                        timeout = INTERRUPTED;
+                    }
+                    if (timeout == INTERRUPTED)
+                        Thread.currentThread().interrupt();
+                }
+            }
+        }
+
+        /**
+         * Blocks until closed, space available or timeout.
+         * For ManagedBlocker.
+         */
+        public final boolean block() {
+            long nanos = timeout;
+            boolean timed = (nanos < Long.MAX_VALUE);
+            long deadline = timed ? System.nanoTime() + nanos : 0L;
+            while (!isReleasable()) {
+                if (Thread.interrupted()) {
+                    timeout = INTERRUPTED;
+                    if (timed)
+                        break;
+                }
+                else if (timed && (nanos = deadline - System.nanoTime()) <= 0L)
+                    break;
+                else if (waiter == null)
+                    waiter = Thread.currentThread();
+                else if (waiting == 0)
+                    waiting = 1;
+                else if (timed)
+                    LockSupport.parkNanos(this, nanos);
+                else
+                    LockSupport.park(this);
+            }
+            waiter = null;
+            waiting = 0;
+            return true;
+        }
+
+        // VarHandle mechanics
+        static final VarHandle CTL;
+        static final VarHandle DEMAND;
+        static final VarHandle QA;
+
+        static {
+            try {
+                MethodHandles.Lookup l = MethodHandles.lookup();
+                CTL = l.findVarHandle(BufferedSubscription.class, "ctl",
+                                      int.class);
+                DEMAND = l.findVarHandle(BufferedSubscription.class, "demand",
+                                         long.class);
+                QA = MethodHandles.arrayElementVarHandle(Object[].class);
+            } catch (ReflectiveOperationException e) {
+                throw new ExceptionInInitializerError(e);
+            }
+
+            // Reduce the risk of rare disastrous classloading in first call to
+            // LockSupport.park: https://bugs.openjdk.java.net/browse/JDK-8074773
+            Class<?> ensureLoaded = LockSupport.class;
+        }
+    }
+}
diff --git a/ojluni/src/main/java/java/util/concurrent/SynchronousQueue.java b/ojluni/src/main/java/java/util/concurrent/SynchronousQueue.java
index 9655205..8de28b8 100644
--- a/ojluni/src/main/java/java/util/concurrent/SynchronousQueue.java
+++ b/ojluni/src/main/java/java/util/concurrent/SynchronousQueue.java
@@ -36,19 +36,18 @@
 
 package java.util.concurrent;
 
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.VarHandle;
 import java.util.AbstractQueue;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Iterator;
+import java.util.Objects;
 import java.util.Spliterator;
 import java.util.Spliterators;
 import java.util.concurrent.locks.LockSupport;
 import java.util.concurrent.locks.ReentrantLock;
 
-// BEGIN android-note
-// removed link to collections framework docs
-// END android-note
-
 /**
  * A {@linkplain BlockingQueue blocking queue} in which each insert
  * operation must wait for a corresponding remove operation by another
@@ -77,9 +76,12 @@
  * is not guaranteed. However, a queue constructed with fairness set
  * to {@code true} grants threads access in FIFO order.
  *
- * <p>This class and its iterator implement all of the
- * <em>optional</em> methods of the {@link Collection} and {@link
- * Iterator} interfaces.
+ * <p>This class and its iterator implement all of the <em>optional</em>
+ * methods of the {@link Collection} and {@link Iterator} interfaces.
+ *
+ * <p>This class is a member of the
+ * <a href="{@docRoot}/java.base/java/util/package-summary.html#CollectionsFramework">
+ * Java Collections Framework</a>.
  *
  * @since 1.5
  * @author Doug Lea and Bill Scherer and Michael Scott
@@ -247,7 +249,7 @@
 
             boolean casNext(SNode cmp, SNode val) {
                 return cmp == next &&
-                    U.compareAndSwapObject(this, NEXT, cmp, val);
+                    SNEXT.compareAndSet(this, cmp, val);
             }
 
             /**
@@ -260,7 +262,7 @@
              */
             boolean tryMatch(SNode s) {
                 if (match == null &&
-                    U.compareAndSwapObject(this, MATCH, null, s)) {
+                    SMATCH.compareAndSet(this, null, s)) {
                     Thread w = waiter;
                     if (w != null) {    // waiters need at most one unpark
                         waiter = null;
@@ -275,26 +277,23 @@
              * Tries to cancel a wait by matching node to itself.
              */
             void tryCancel() {
-                U.compareAndSwapObject(this, MATCH, null, this);
+                SMATCH.compareAndSet(this, null, this);
             }
 
             boolean isCancelled() {
                 return match == this;
             }
 
-            // Unsafe mechanics
-            private static final sun.misc.Unsafe U = sun.misc.Unsafe.getUnsafe();
-            private static final long MATCH;
-            private static final long NEXT;
-
+            // VarHandle mechanics
+            private static final VarHandle SMATCH;
+            private static final VarHandle SNEXT;
             static {
                 try {
-                    MATCH = U.objectFieldOffset
-                        (SNode.class.getDeclaredField("match"));
-                    NEXT = U.objectFieldOffset
-                        (SNode.class.getDeclaredField("next"));
+                    MethodHandles.Lookup l = MethodHandles.lookup();
+                    SMATCH = l.findVarHandle(SNode.class, "match", SNode.class);
+                    SNEXT = l.findVarHandle(SNode.class, "next", SNode.class);
                 } catch (ReflectiveOperationException e) {
-                    throw new Error(e);
+                    throw new ExceptionInInitializerError(e);
                 }
             }
         }
@@ -304,7 +303,7 @@
 
         boolean casHead(SNode h, SNode nh) {
             return h == head &&
-                U.compareAndSwapObject(this, HEAD, h, nh);
+                SHEAD.compareAndSet(this, h, nh);
         }
 
         /**
@@ -451,8 +450,10 @@
                         continue;
                     }
                 }
-                if (spins > 0)
+                if (spins > 0) {
+                    Thread.onSpinWait();
                     spins = shouldSpin(s) ? (spins - 1) : 0;
+                }
                 else if (s.waiter == null)
                     s.waiter = w; // establish waiter so can park next iter
                 else if (!timed)
@@ -508,15 +509,14 @@
             }
         }
 
-        // Unsafe mechanics
-        private static final sun.misc.Unsafe U = sun.misc.Unsafe.getUnsafe();
-        private static final long HEAD;
+        // VarHandle mechanics
+        private static final VarHandle SHEAD;
         static {
             try {
-                HEAD = U.objectFieldOffset
-                    (TransferStack.class.getDeclaredField("head"));
+                MethodHandles.Lookup l = MethodHandles.lookup();
+                SHEAD = l.findVarHandle(TransferStack.class, "head", SNode.class);
             } catch (ReflectiveOperationException e) {
-                throw new Error(e);
+                throw new ExceptionInInitializerError(e);
             }
         }
     }
@@ -546,19 +546,19 @@
 
             boolean casNext(QNode cmp, QNode val) {
                 return next == cmp &&
-                    U.compareAndSwapObject(this, NEXT, cmp, val);
+                    QNEXT.compareAndSet(this, cmp, val);
             }
 
             boolean casItem(Object cmp, Object val) {
                 return item == cmp &&
-                    U.compareAndSwapObject(this, ITEM, cmp, val);
+                    QITEM.compareAndSet(this, cmp, val);
             }
 
             /**
              * Tries to cancel by CAS'ing ref to this as item.
              */
             void tryCancel(Object cmp) {
-                U.compareAndSwapObject(this, ITEM, cmp, this);
+                QITEM.compareAndSet(this, cmp, this);
             }
 
             boolean isCancelled() {
@@ -574,19 +574,16 @@
                 return next == this;
             }
 
-            // Unsafe mechanics
-            private static final sun.misc.Unsafe U = sun.misc.Unsafe.getUnsafe();
-            private static final long ITEM;
-            private static final long NEXT;
-
+            // VarHandle mechanics
+            private static final VarHandle QITEM;
+            private static final VarHandle QNEXT;
             static {
                 try {
-                    ITEM = U.objectFieldOffset
-                        (QNode.class.getDeclaredField("item"));
-                    NEXT = U.objectFieldOffset
-                        (QNode.class.getDeclaredField("next"));
+                    MethodHandles.Lookup l = MethodHandles.lookup();
+                    QITEM = l.findVarHandle(QNode.class, "item", Object.class);
+                    QNEXT = l.findVarHandle(QNode.class, "next", QNode.class);
                 } catch (ReflectiveOperationException e) {
-                    throw new Error(e);
+                    throw new ExceptionInInitializerError(e);
                 }
             }
         }
@@ -614,7 +611,7 @@
          */
         void advanceHead(QNode h, QNode nh) {
             if (h == head &&
-                U.compareAndSwapObject(this, HEAD, h, nh))
+                QHEAD.compareAndSet(this, h, nh))
                 h.next = h; // forget old next
         }
 
@@ -623,7 +620,7 @@
          */
         void advanceTail(QNode t, QNode nt) {
             if (tail == t)
-                U.compareAndSwapObject(this, TAIL, t, nt);
+                QTAIL.compareAndSet(this, t, nt);
         }
 
         /**
@@ -631,7 +628,7 @@
          */
         boolean casCleanMe(QNode cmp, QNode val) {
             return cleanMe == cmp &&
-                U.compareAndSwapObject(this, CLEANME, cmp, val);
+                QCLEANME.compareAndSet(this, cmp, val);
         }
 
         /**
@@ -752,8 +749,10 @@
                         continue;
                     }
                 }
-                if (spins > 0)
+                if (spins > 0) {
                     --spins;
+                    Thread.onSpinWait();
+                }
                 else if (s.waiter == null)
                     s.waiter = w;
                 else if (!timed)
@@ -817,20 +816,21 @@
             }
         }
 
-        private static final sun.misc.Unsafe U = sun.misc.Unsafe.getUnsafe();
-        private static final long HEAD;
-        private static final long TAIL;
-        private static final long CLEANME;
+        // VarHandle mechanics
+        private static final VarHandle QHEAD;
+        private static final VarHandle QTAIL;
+        private static final VarHandle QCLEANME;
         static {
             try {
-                HEAD = U.objectFieldOffset
-                    (TransferQueue.class.getDeclaredField("head"));
-                TAIL = U.objectFieldOffset
-                    (TransferQueue.class.getDeclaredField("tail"));
-                CLEANME = U.objectFieldOffset
-                    (TransferQueue.class.getDeclaredField("cleanMe"));
+                MethodHandles.Lookup l = MethodHandles.lookup();
+                QHEAD = l.findVarHandle(TransferQueue.class, "head",
+                                        QNode.class);
+                QTAIL = l.findVarHandle(TransferQueue.class, "tail",
+                                        QNode.class);
+                QCLEANME = l.findVarHandle(TransferQueue.class, "cleanMe",
+                                           QNode.class);
             } catch (ReflectiveOperationException e) {
-                throw new Error(e);
+                throw new ExceptionInInitializerError(e);
             }
         }
     }
@@ -1066,7 +1066,7 @@
 
     /**
      * Returns an empty spliterator in which calls to
-     * {@link java.util.Spliterator#trySplit()} always return {@code null}.
+     * {@link Spliterator#trySplit() trySplit} always return {@code null}.
      *
      * @return an empty spliterator
      * @since 1.8
@@ -1112,15 +1112,12 @@
      * @throws IllegalArgumentException      {@inheritDoc}
      */
     public int drainTo(Collection<? super E> c) {
-        if (c == null)
-            throw new NullPointerException();
+        Objects.requireNonNull(c);
         if (c == this)
             throw new IllegalArgumentException();
         int n = 0;
-        for (E e; (e = poll()) != null;) {
+        for (E e; (e = poll()) != null; n++)
             c.add(e);
-            ++n;
-        }
         return n;
     }
 
@@ -1131,15 +1128,12 @@
      * @throws IllegalArgumentException      {@inheritDoc}
      */
     public int drainTo(Collection<? super E> c, int maxElements) {
-        if (c == null)
-            throw new NullPointerException();
+        Objects.requireNonNull(c);
         if (c == this)
             throw new IllegalArgumentException();
         int n = 0;
-        for (E e; n < maxElements && (e = poll()) != null;) {
+        for (E e; n < maxElements && (e = poll()) != null; n++)
             c.add(e);
-            ++n;
-        }
         return n;
     }
 
diff --git a/ojluni/src/main/java/java/util/concurrent/ThreadLocalRandom.java b/ojluni/src/main/java/java/util/concurrent/ThreadLocalRandom.java
index 195f8ac..c20d2b3 100644
--- a/ojluni/src/main/java/java/util/concurrent/ThreadLocalRandom.java
+++ b/ojluni/src/main/java/java/util/concurrent/ThreadLocalRandom.java
@@ -36,6 +36,7 @@
 package java.util.concurrent;
 
 import java.io.ObjectStreamField;
+import java.security.AccessControlContext;
 import java.util.Random;
 import java.util.Spliterator;
 import java.util.concurrent.atomic.AtomicInteger;
@@ -47,6 +48,8 @@
 import java.util.stream.IntStream;
 import java.util.stream.LongStream;
 import java.util.stream.StreamSupport;
+import jdk.internal.misc.Unsafe;
+import jdk.internal.misc.VM;
 
 /**
  * A random number generator isolated to the current thread.  Like the
@@ -64,7 +67,7 @@
  * {@code ThreadLocalRandom.current().nextX(...)} (where
  * {@code X} is {@code Int}, {@code Long}, etc).
  * When all usages are of this form, it is never possible to
- * accidently share a {@code ThreadLocalRandom} across multiple threads.
+ * accidentally share a {@code ThreadLocalRandom} across multiple threads.
  *
  * <p>This class also provides additional commonly used bounded random
  * generation methods.
@@ -95,7 +98,9 @@
      * ThreadLocalRandom sequence.  The dual use is a marriage of
      * convenience, but is a simple and efficient way of reducing
      * application-level overhead and footprint of most concurrent
-     * programs.
+     * programs. Even more opportunistically, we also define here
+     * other package-private utilities that access Thread class
+     * fields.
      *
      * Even though this class subclasses java.util.Random, it uses the
      * same basic algorithm as java.util.SplittableRandom.  (See its
@@ -193,9 +198,17 @@
         return r;
     }
 
-    // We must define this, but never use it.
+    /**
+     * Generates a pseudorandom number with the indicated number of
+     * low-order bits.  Because this class has no subclasses, this
+     * method cannot be invoked or overridden.
+     *
+     * @param  bits random bits
+     * @return the next pseudorandom value from this random number
+     *         generator's sequence
+     */
     protected int next(int bits) {
-        return (int)(mix64(nextSeed()) >>> (64 - bits));
+        return nextInt() >>> (32 - bits);
     }
 
     /**
@@ -455,7 +468,7 @@
             s = v1 * v1 + v2 * v2;
         } while (s >= 1 || s == 0);
         double multiplier = StrictMath.sqrt(-2 * StrictMath.log(s)/s);
-        nextLocalGaussian.set(new Double(v2 * multiplier));
+        nextLocalGaussian.set(Double.valueOf(v2 * multiplier));
         return v1 * multiplier;
     }
 
@@ -687,8 +700,7 @@
      * @return a stream of pseudorandom {@code double} values,
      *         each with the given origin (inclusive) and bound (exclusive)
      * @throws IllegalArgumentException if {@code streamSize} is
-     *         less than zero
-     * @throws IllegalArgumentException if {@code randomNumberOrigin}
+     *         less than zero, or {@code randomNumberOrigin}
      *         is greater than or equal to {@code randomNumberBound}
      * @since 1.8
      */
@@ -958,6 +970,21 @@
         return r;
     }
 
+    // Support for other package-private ThreadLocal access
+
+    /**
+     * Erases ThreadLocals by nulling out Thread maps.
+     */
+    static final void eraseThreadLocals(Thread thread) {
+        U.putObject(thread, THREADLOCALS, null);
+        U.putObject(thread, INHERITABLETHREADLOCALS, null);
+    }
+
+    static final void setInheritedAccessControlContext(Thread thread,
+                                                       AccessControlContext acc) {
+        U.putObjectRelease(thread, INHERITEDACCESSCONTROLCONTEXT, acc);
+    }
+
     // Serialization support
 
     private static final long serialVersionUID = -5851777807851030925L;
@@ -1012,7 +1039,10 @@
      */
     private static final long SEEDER_INCREMENT = 0xbb67ae8584caa73bL;
 
-    // Constants from SplittableRandom
+    /**
+     * The least non-zero value returned by nextDouble(). This value
+     * is scaled by a random value of 53 bits to produce a result.
+     */
     private static final double DOUBLE_UNIT = 0x1.0p-53;  // 1.0  / (1L << 53)
     private static final float  FLOAT_UNIT  = 0x1.0p-24f; // 1.0f / (1 << 24)
 
@@ -1022,22 +1052,19 @@
     static final String BAD_SIZE  = "size must be non-negative";
 
     // Unsafe mechanics
-    private static final sun.misc.Unsafe U = sun.misc.Unsafe.getUnsafe();
-    private static final long SEED;
-    private static final long PROBE;
-    private static final long SECONDARY;
-    static {
-        try {
-            SEED = U.objectFieldOffset
-                (Thread.class.getDeclaredField("threadLocalRandomSeed"));
-            PROBE = U.objectFieldOffset
-                (Thread.class.getDeclaredField("threadLocalRandomProbe"));
-            SECONDARY = U.objectFieldOffset
-                (Thread.class.getDeclaredField("threadLocalRandomSecondarySeed"));
-        } catch (ReflectiveOperationException e) {
-            throw new Error(e);
-        }
-    }
+    private static final Unsafe U = Unsafe.getUnsafe();
+    private static final long SEED = U.objectFieldOffset
+            (Thread.class, "threadLocalRandomSeed");
+    private static final long PROBE = U.objectFieldOffset
+            (Thread.class, "threadLocalRandomProbe");
+    private static final long SECONDARY = U.objectFieldOffset
+            (Thread.class, "threadLocalRandomSecondarySeed");
+    private static final long THREADLOCALS = U.objectFieldOffset
+            (Thread.class, "threadLocals");
+    private static final long INHERITABLETHREADLOCALS = U.objectFieldOffset
+            (Thread.class, "inheritableThreadLocals");
+    private static final long INHERITEDACCESSCONTROLCONTEXT = U.objectFieldOffset
+            (Thread.class, "inheritedAccessControlContext");
 
     /** Rarely-used holder for the second of a pair of Gaussians */
     private static final ThreadLocal<Double> nextLocalGaussian =
@@ -1058,11 +1085,8 @@
 
     // at end of <clinit> to survive static initialization circularity
     static {
-        if (java.security.AccessController.doPrivileged(
-            new java.security.PrivilegedAction<Boolean>() {
-                public Boolean run() {
-                    return Boolean.getBoolean("java.util.secureRandomSeed");
-                }})) {
+        String sec = VM.getSavedProperty("java.util.secureRandomSeed");
+        if (Boolean.parseBoolean(sec)) {
             byte[] seedBytes = java.security.SecureRandom.getSeed(8);
             long s = (long)seedBytes[0] & 0xffL;
             for (int i = 1; i < 8; ++i)
diff --git a/ojluni/src/main/java/java/util/concurrent/ThreadPoolExecutor.java b/ojluni/src/main/java/java/util/concurrent/ThreadPoolExecutor.java
index b0096a4..752c97d 100644
--- a/ojluni/src/main/java/java/util/concurrent/ThreadPoolExecutor.java
+++ b/ojluni/src/main/java/java/util/concurrent/ThreadPoolExecutor.java
@@ -79,31 +79,28 @@
  *
  * <dt>Core and maximum pool sizes</dt>
  *
- * <dd style="font-family:'DejaVu Sans', Arial, Helvetica, sans-serif">
- * A {@code ThreadPoolExecutor} will automatically adjust the
+ * <dd>A {@code ThreadPoolExecutor} will automatically adjust the
  * pool size (see {@link #getPoolSize})
  * according to the bounds set by
  * corePoolSize (see {@link #getCorePoolSize}) and
  * maximumPoolSize (see {@link #getMaximumPoolSize}).
  *
  * When a new task is submitted in method {@link #execute(Runnable)},
- * and fewer than corePoolSize threads are running, a new thread is
+ * if fewer than corePoolSize threads are running, a new thread is
  * created to handle the request, even if other worker threads are
- * idle.  If there are more than corePoolSize but less than
- * maximumPoolSize threads running, a new thread will be created only
- * if the queue is full.  By setting corePoolSize and maximumPoolSize
- * the same, you create a fixed-size thread pool. By setting
- * maximumPoolSize to an essentially unbounded value such as {@code
- * Integer.MAX_VALUE}, you allow the pool to accommodate an arbitrary
- * number of concurrent tasks. Most typically, core and maximum pool
- * sizes are set only upon construction, but they may also be changed
- * dynamically using {@link #setCorePoolSize} and {@link
- * #setMaximumPoolSize}. </dd>
+ * idle.  Else if fewer than maximumPoolSize threads are running, a
+ * new thread will be created to handle the request only if the queue
+ * is full.  By setting corePoolSize and maximumPoolSize the same, you
+ * create a fixed-size thread pool. By setting maximumPoolSize to an
+ * essentially unbounded value such as {@code Integer.MAX_VALUE}, you
+ * allow the pool to accommodate an arbitrary number of concurrent
+ * tasks. Most typically, core and maximum pool sizes are set only
+ * upon construction, but they may also be changed dynamically using
+ * {@link #setCorePoolSize} and {@link #setMaximumPoolSize}. </dd>
  *
  * <dt>On-demand construction</dt>
  *
- * <dd style="font-family:'DejaVu Sans', Arial, Helvetica, sans-serif">
- * By default, even core threads are initially created and
+ * <dd>By default, even core threads are initially created and
  * started only when new tasks arrive, but this can be overridden
  * dynamically using method {@link #prestartCoreThread} or {@link
  * #prestartAllCoreThreads}.  You probably want to prestart threads if
@@ -111,8 +108,7 @@
  *
  * <dt>Creating new threads</dt>
  *
- * <dd style="font-family:'DejaVu Sans', Arial, Helvetica, sans-serif">
- * New threads are created using a {@link ThreadFactory}.  If not
+ * <dd>New threads are created using a {@link ThreadFactory}.  If not
  * otherwise specified, a {@link Executors#defaultThreadFactory} is
  * used, that creates threads to all be in the same {@link
  * ThreadGroup} and with the same {@code NORM_PRIORITY} priority and
@@ -129,8 +125,7 @@
  *
  * <dt>Keep-alive times</dt>
  *
- * <dd style="font-family:'DejaVu Sans', Arial, Helvetica, sans-serif">
- * If the pool currently has more than corePoolSize threads,
+ * <dd>If the pool currently has more than corePoolSize threads,
  * excess threads will be terminated if they have been idle for more
  * than the keepAliveTime (see {@link #getKeepAliveTime(TimeUnit)}).
  * This provides a means of reducing resource consumption when the
@@ -147,8 +142,7 @@
  *
  * <dt>Queuing</dt>
  *
- * <dd style="font-family:'DejaVu Sans', Arial, Helvetica, sans-serif">
- * Any {@link BlockingQueue} may be used to transfer and hold
+ * <dd>Any {@link BlockingQueue} may be used to transfer and hold
  * submitted tasks.  The use of this queue interacts with pool sizing:
  *
  * <ul>
@@ -213,8 +207,7 @@
  *
  * <dt>Rejected tasks</dt>
  *
- * <dd style="font-family:'DejaVu Sans', Arial, Helvetica, sans-serif">
- * New tasks submitted in method {@link #execute(Runnable)} will be
+ * <dd>New tasks submitted in method {@link #execute(Runnable)} will be
  * <em>rejected</em> when the Executor has been shut down, and also when
  * the Executor uses finite bounds for both maximum threads and work queue
  * capacity, and is saturated.  In either case, the {@code execute} method
@@ -225,9 +218,8 @@
  *
  * <ol>
  *
- * <li>In the default {@link ThreadPoolExecutor.AbortPolicy}, the
- * handler throws a runtime {@link RejectedExecutionException} upon
- * rejection.
+ * <li>In the default {@link ThreadPoolExecutor.AbortPolicy}, the handler
+ * throws a runtime {@link RejectedExecutionException} upon rejection.
  *
  * <li>In {@link ThreadPoolExecutor.CallerRunsPolicy}, the thread
  * that invokes {@code execute} itself runs the task. This provides a
@@ -251,8 +243,7 @@
  *
  * <dt>Hook methods</dt>
  *
- * <dd style="font-family:'DejaVu Sans', Arial, Helvetica, sans-serif">
- * This class provides {@code protected} overridable
+ * <dd>This class provides {@code protected} overridable
  * {@link #beforeExecute(Thread, Runnable)} and
  * {@link #afterExecute(Runnable, Throwable)} methods that are called
  * before and after execution of each task.  These can be used to
@@ -268,22 +259,19 @@
  *
  * <dt>Queue maintenance</dt>
  *
- * <dd style="font-family:'DejaVu Sans', Arial, Helvetica, sans-serif">
- * Method {@link #getQueue()} allows access to the work queue
+ * <dd>Method {@link #getQueue()} allows access to the work queue
  * for purposes of monitoring and debugging.  Use of this method for
  * any other purpose is strongly discouraged.  Two supplied methods,
  * {@link #remove(Runnable)} and {@link #purge} are available to
  * assist in storage reclamation when large numbers of queued tasks
  * become cancelled.</dd>
  *
- * <dt>Finalization</dt>
+ * <dt>Reclamation</dt>
  *
- * <dd style="font-family:'DejaVu Sans', Arial, Helvetica, sans-serif">
- * A pool that is no longer referenced in a program <em>AND</em>
- * has no remaining threads will be {@code shutdown} automatically. If
- * you would like to ensure that unreferenced pools are reclaimed even
- * if users forget to call {@link #shutdown}, then you must arrange
- * that unused threads eventually die, by setting appropriate
+ * <dd>A pool that is no longer referenced in a program <em>AND</em>
+ * has no remaining threads may be reclaimed (garbage collected)
+ * without being explicitly shutdown. You can configure a pool to
+ * allow all unused threads to eventually die by setting appropriate
  * keep-alive times, using a lower bound of zero core threads and/or
  * setting {@link #allowCoreThreadTimeOut(boolean)}.  </dd>
  *
@@ -374,7 +362,7 @@
      * time, but need not hit each state. The transitions are:
      *
      * RUNNING -> SHUTDOWN
-     *    On invocation of shutdown(), perhaps implicitly in finalize()
+     *    On invocation of shutdown()
      * (RUNNING or SHUTDOWN) -> STOP
      *    On invocation of shutdownNow()
      * SHUTDOWN -> TIDYING
@@ -398,7 +386,7 @@
     @ReachabilitySensitive
     private final AtomicInteger ctl = new AtomicInteger(ctlOf(RUNNING, 0));
     private static final int COUNT_BITS = Integer.SIZE - 3;
-    private static final int CAPACITY   = (1 << COUNT_BITS) - 1;
+    private static final int COUNT_MASK = (1 << COUNT_BITS) - 1;
 
     // runState is stored in the high-order bits
     private static final int RUNNING    = -1 << COUNT_BITS;
@@ -408,8 +396,8 @@
     private static final int TERMINATED =  3 << COUNT_BITS;
 
     // Packing and unpacking ctl
-    private static int runStateOf(int c)     { return c & ~CAPACITY; }
-    private static int workerCountOf(int c)  { return c & CAPACITY; }
+    private static int runStateOf(int c)     { return c & ~COUNT_MASK; }
+    private static int workerCountOf(int c)  { return c & COUNT_MASK; }
     private static int ctlOf(int rs, int wc) { return rs | wc; }
 
     /*
@@ -449,7 +437,7 @@
      * decrements are performed within getTask.
      */
     private void decrementWorkerCount() {
-        do {} while (! compareAndDecrementWorkerCount(ctl.get()));
+        ctl.addAndGet(-1);
     }
 
     /**
@@ -555,12 +543,17 @@
      * Core pool size is the minimum number of workers to keep alive
      * (and not allow to time out etc) unless allowCoreThreadTimeOut
      * is set, in which case the minimum is zero.
+     *
+     * Since the worker count is actually stored in COUNT_BITS bits,
+     * the effective limit is {@code corePoolSize & COUNT_MASK}.
      */
     private volatile int corePoolSize;
 
     /**
-     * Maximum pool size. Note that the actual maximum is internally
-     * bounded by CAPACITY.
+     * Maximum pool size.
+     *
+     * Since the worker count is actually stored in COUNT_BITS bits,
+     * the effective limit is {@code maximumPoolSize & COUNT_MASK}.
      */
     private volatile int maximumPoolSize;
 
@@ -626,6 +619,9 @@
         /** Per-thread task counter */
         volatile long completedTasks;
 
+        // TODO: switch to AbstractQueuedLongSynchronizer and move
+        // completedTasks into the lock word.
+
         /**
          * Creates with given first task and thread from ThreadFactory.
          * @param firstTask the first task (null if none)
@@ -716,7 +712,7 @@
             int c = ctl.get();
             if (isRunning(c) ||
                 runStateAtLeast(c, TIDYING) ||
-                (runStateOf(c) == SHUTDOWN && ! workQueue.isEmpty()))
+                (runStateLessThan(c, STOP) && ! workQueue.isEmpty()))
                 return;
             if (workerCountOf(c) != 0) { // Eligible to terminate
                 interruptIdleWorkers(ONLY_ONE);
@@ -755,17 +751,12 @@
      * specially.
      */
     private void checkShutdownAccess() {
+        // assert mainLock.isHeldByCurrentThread();
         SecurityManager security = System.getSecurityManager();
         if (security != null) {
             security.checkPermission(shutdownPerm);
-            final ReentrantLock mainLock = this.mainLock;
-            mainLock.lock();
-            try {
-                for (Worker w : workers)
-                    security.checkAccess(w.thread);
-            } finally {
-                mainLock.unlock();
-            }
+            for (Worker w : workers)
+                security.checkAccess(w.thread);
         }
     }
 
@@ -774,14 +765,9 @@
      * (in which case some threads may remain uninterrupted).
      */
     private void interruptWorkers() {
-        final ReentrantLock mainLock = this.mainLock;
-        mainLock.lock();
-        try {
-            for (Worker w : workers)
-                w.interruptIfStarted();
-        } finally {
-            mainLock.unlock();
-        }
+        // assert mainLock.isHeldByCurrentThread();
+        for (Worker w : workers)
+            w.interruptIfStarted();
     }
 
     /**
@@ -857,17 +843,6 @@
     }
 
     /**
-     * State check needed by ScheduledThreadPoolExecutor to
-     * enable running tasks during shutdown.
-     *
-     * @param shutdownOK true if should return true if SHUTDOWN
-     */
-    final boolean isRunningOrShutdown(boolean shutdownOK) {
-        int rs = runStateOf(ctl.get());
-        return rs == RUNNING || (rs == SHUTDOWN && shutdownOK);
-    }
-
-    /**
      * Drains the task queue into a new list, normally using
      * drainTo. But if the queue is a DelayQueue or any other kind of
      * queue for which poll or drainTo may fail to remove some
@@ -918,26 +893,22 @@
      */
     private boolean addWorker(Runnable firstTask, boolean core) {
         retry:
-        for (;;) {
-            int c = ctl.get();
-            int rs = runStateOf(c);
-
+        for (int c = ctl.get();;) {
             // Check if queue empty only if necessary.
-            if (rs >= SHUTDOWN &&
-                ! (rs == SHUTDOWN &&
-                   firstTask == null &&
-                   ! workQueue.isEmpty()))
+            if (runStateAtLeast(c, SHUTDOWN)
+                && (runStateAtLeast(c, STOP)
+                    || firstTask != null
+                    || workQueue.isEmpty()))
                 return false;
 
             for (;;) {
-                int wc = workerCountOf(c);
-                if (wc >= CAPACITY ||
-                    wc >= (core ? corePoolSize : maximumPoolSize))
+                if (workerCountOf(c)
+                    >= ((core ? corePoolSize : maximumPoolSize) & COUNT_MASK))
                     return false;
                 if (compareAndIncrementWorkerCount(c))
                     break retry;
                 c = ctl.get();  // Re-read ctl
-                if (runStateOf(c) != rs)
+                if (runStateAtLeast(c, SHUTDOWN))
                     continue retry;
                 // else CAS failed due to workerCount change; retry inner loop
             }
@@ -956,10 +927,10 @@
                     // Recheck while holding lock.
                     // Back out on ThreadFactory failure or if
                     // shut down before lock acquired.
-                    int rs = runStateOf(ctl.get());
+                    int c = ctl.get();
 
-                    if (rs < SHUTDOWN ||
-                        (rs == SHUTDOWN && firstTask == null)) {
+                    if (isRunning(c) ||
+                        (runStateLessThan(c, STOP) && firstTask == null)) {
                         if (t.isAlive()) // precheck that t is startable
                             throw new IllegalThreadStateException();
                         workers.add(w);
@@ -1066,10 +1037,10 @@
 
         for (;;) {
             int c = ctl.get();
-            int rs = runStateOf(c);
 
             // Check if queue empty only if necessary.
-            if (rs >= SHUTDOWN && (rs >= STOP || workQueue.isEmpty())) {
+            if (runStateAtLeast(c, SHUTDOWN)
+                && (runStateAtLeast(c, STOP) || workQueue.isEmpty())) {
                 decrementWorkerCount();
                 return null;
             }
@@ -1162,17 +1133,12 @@
                     wt.interrupt();
                 try {
                     beforeExecute(wt, task);
-                    Throwable thrown = null;
                     try {
                         task.run();
-                    } catch (RuntimeException x) {
-                        thrown = x; throw x;
-                    } catch (Error x) {
-                        thrown = x; throw x;
-                    } catch (Throwable x) {
-                        thrown = x; throw new Error(x);
-                    } finally {
-                        afterExecute(task, thrown);
+                        afterExecute(task, null);
+                    } catch (Throwable ex) {
+                        afterExecute(task, ex);
+                        throw ex;
                     }
                 } finally {
                     task = null;
@@ -1190,9 +1156,11 @@
 
     /**
      * Creates a new {@code ThreadPoolExecutor} with the given initial
-     * parameters and default thread factory and rejected execution handler.
-     * It may be more convenient to use one of the {@link Executors} factory
-     * methods instead of this general purpose constructor.
+     * parameters, the default thread factory and the default rejected
+     * execution handler.
+     *
+     * <p>It may be more convenient to use one of the {@link Executors}
+     * factory methods instead of this general purpose constructor.
      *
      * @param corePoolSize the number of threads to keep in the pool, even
      *        if they are idle, unless {@code allowCoreThreadTimeOut} is set
@@ -1223,7 +1191,8 @@
 
     /**
      * Creates a new {@code ThreadPoolExecutor} with the given initial
-     * parameters and default rejected execution handler.
+     * parameters and {@linkplain ThreadPoolExecutor.AbortPolicy
+     * default rejected execution handler}.
      *
      * @param corePoolSize the number of threads to keep in the pool, even
      *        if they are idle, unless {@code allowCoreThreadTimeOut} is set
@@ -1258,7 +1227,8 @@
 
     /**
      * Creates a new {@code ThreadPoolExecutor} with the given initial
-     * parameters and default thread factory.
+     * parameters and
+     * {@linkplain Executors#defaultThreadFactory default thread factory}.
      *
      * @param corePoolSize the number of threads to keep in the pool, even
      *        if they are idle, unless {@code allowCoreThreadTimeOut} is set
@@ -1346,7 +1316,7 @@
      *
      * If the task cannot be submitted for execution, either because this
      * executor has been shutdown or because its capacity has been reached,
-     * the task is handled by the current {@code RejectedExecutionHandler}.
+     * the task is handled by the current {@link RejectedExecutionHandler}.
      *
      * @param command the task to execute
      * @throws RejectedExecutionException at discretion of
@@ -1451,7 +1421,12 @@
     }
 
     public boolean isShutdown() {
-        return ! isRunning(ctl.get());
+        return runStateAtLeast(ctl.get(), SHUTDOWN);
+    }
+
+    /** Used by ScheduledThreadPoolExecutor. */
+    boolean isStopped() {
+        return runStateAtLeast(ctl.get(), STOP);
     }
 
     /**
@@ -1467,7 +1442,7 @@
      */
     public boolean isTerminating() {
         int c = ctl.get();
-        return ! isRunning(c) && runStateLessThan(c, TERMINATED);
+        return runStateAtLeast(c, SHUTDOWN) && runStateLessThan(c, TERMINATED);
     }
 
     public boolean isTerminated() {
@@ -1480,7 +1455,7 @@
         final ReentrantLock mainLock = this.mainLock;
         mainLock.lock();
         try {
-            while (!runStateAtLeast(ctl.get(), TERMINATED)) {
+            while (runStateLessThan(ctl.get(), TERMINATED)) {
                 if (nanos <= 0L)
                     return false;
                 nanos = termination.awaitNanos(nanos);
@@ -1491,13 +1466,17 @@
         }
     }
 
+    // Override without "throws Throwable" for compatibility with subclasses
+    // whose finalize method invokes super.finalize() (as is recommended).
+    // Before JDK 11, finalize() had a non-empty method body.
+
     /**
-     * Invokes {@code shutdown} when this executor is no longer
-     * referenced and it has no threads.
+     * @implNote Previous versions of this class had a finalize method
+     * that shut down this executor, but in this version, finalize
+     * does nothing.
      */
-    protected void finalize() {
-        shutdown();
-    }
+    @Deprecated(since="9")
+    protected void finalize() {}
 
     /**
      * Sets the thread factory used to create new threads.
@@ -1946,7 +1925,7 @@
         }
         int c = ctl.get();
         String runState =
-            runStateLessThan(c, SHUTDOWN) ? "Running" :
+            isRunning(c) ? "Running" :
             runStateAtLeast(c, TERMINATED) ? "Terminated" :
             "Shutting down";
         return super.toString() +
@@ -2065,7 +2044,10 @@
 
     /**
      * A handler for rejected tasks that throws a
-     * {@code RejectedExecutionException}.
+     * {@link RejectedExecutionException}.
+     *
+     * This is the default handler for {@link ThreadPoolExecutor} and
+     * {@link ScheduledThreadPoolExecutor}.
      */
     public static class AbortPolicy implements RejectedExecutionHandler {
         /**
diff --git a/ojluni/src/main/java/java/util/concurrent/TimeUnit.java b/ojluni/src/main/java/java/util/concurrent/TimeUnit.java
index 44d7964..f02aa9f 100644
--- a/ojluni/src/main/java/java/util/concurrent/TimeUnit.java
+++ b/ojluni/src/main/java/java/util/concurrent/TimeUnit.java
@@ -35,6 +35,8 @@
 
 package java.util.concurrent;
 
+import java.time.Duration;
+import java.time.temporal.ChronoUnit;
 import java.util.Objects;
 
 /**
@@ -74,136 +76,93 @@
     /**
      * Time unit representing one thousandth of a microsecond.
      */
-    NANOSECONDS {
-        public long toNanos(long d)   { return d; }
-        public long toMicros(long d)  { return d/(C1/C0); }
-        public long toMillis(long d)  { return d/(C2/C0); }
-        public long toSeconds(long d) { return d/(C3/C0); }
-        public long toMinutes(long d) { return d/(C4/C0); }
-        public long toHours(long d)   { return d/(C5/C0); }
-        public long toDays(long d)    { return d/(C6/C0); }
-        public long convert(long d, TimeUnit u) { return u.toNanos(d); }
-        int excessNanos(long d, long m) { return (int)(d - (m*C2)); }
-    },
-
+    NANOSECONDS(TimeUnit.NANO_SCALE),
     /**
      * Time unit representing one thousandth of a millisecond.
      */
-    MICROSECONDS {
-        public long toNanos(long d)   { return x(d, C1/C0, MAX/(C1/C0)); }
-        public long toMicros(long d)  { return d; }
-        public long toMillis(long d)  { return d/(C2/C1); }
-        public long toSeconds(long d) { return d/(C3/C1); }
-        public long toMinutes(long d) { return d/(C4/C1); }
-        public long toHours(long d)   { return d/(C5/C1); }
-        public long toDays(long d)    { return d/(C6/C1); }
-        public long convert(long d, TimeUnit u) { return u.toMicros(d); }
-        int excessNanos(long d, long m) { return (int)((d*C1) - (m*C2)); }
-    },
-
+    MICROSECONDS(TimeUnit.MICRO_SCALE),
     /**
      * Time unit representing one thousandth of a second.
      */
-    MILLISECONDS {
-        public long toNanos(long d)   { return x(d, C2/C0, MAX/(C2/C0)); }
-        public long toMicros(long d)  { return x(d, C2/C1, MAX/(C2/C1)); }
-        public long toMillis(long d)  { return d; }
-        public long toSeconds(long d) { return d/(C3/C2); }
-        public long toMinutes(long d) { return d/(C4/C2); }
-        public long toHours(long d)   { return d/(C5/C2); }
-        public long toDays(long d)    { return d/(C6/C2); }
-        public long convert(long d, TimeUnit u) { return u.toMillis(d); }
-        int excessNanos(long d, long m) { return 0; }
-    },
-
+    MILLISECONDS(TimeUnit.MILLI_SCALE),
     /**
      * Time unit representing one second.
      */
-    SECONDS {
-        public long toNanos(long d)   { return x(d, C3/C0, MAX/(C3/C0)); }
-        public long toMicros(long d)  { return x(d, C3/C1, MAX/(C3/C1)); }
-        public long toMillis(long d)  { return x(d, C3/C2, MAX/(C3/C2)); }
-        public long toSeconds(long d) { return d; }
-        public long toMinutes(long d) { return d/(C4/C3); }
-        public long toHours(long d)   { return d/(C5/C3); }
-        public long toDays(long d)    { return d/(C6/C3); }
-        public long convert(long d, TimeUnit u) { return u.toSeconds(d); }
-        int excessNanos(long d, long m) { return 0; }
-    },
-
+    SECONDS(TimeUnit.SECOND_SCALE),
     /**
      * Time unit representing sixty seconds.
      * @since 1.6
      */
-    MINUTES {
-        public long toNanos(long d)   { return x(d, C4/C0, MAX/(C4/C0)); }
-        public long toMicros(long d)  { return x(d, C4/C1, MAX/(C4/C1)); }
-        public long toMillis(long d)  { return x(d, C4/C2, MAX/(C4/C2)); }
-        public long toSeconds(long d) { return x(d, C4/C3, MAX/(C4/C3)); }
-        public long toMinutes(long d) { return d; }
-        public long toHours(long d)   { return d/(C5/C4); }
-        public long toDays(long d)    { return d/(C6/C4); }
-        public long convert(long d, TimeUnit u) { return u.toMinutes(d); }
-        int excessNanos(long d, long m) { return 0; }
-    },
-
+    MINUTES(TimeUnit.MINUTE_SCALE),
     /**
      * Time unit representing sixty minutes.
      * @since 1.6
      */
-    HOURS {
-        public long toNanos(long d)   { return x(d, C5/C0, MAX/(C5/C0)); }
-        public long toMicros(long d)  { return x(d, C5/C1, MAX/(C5/C1)); }
-        public long toMillis(long d)  { return x(d, C5/C2, MAX/(C5/C2)); }
-        public long toSeconds(long d) { return x(d, C5/C3, MAX/(C5/C3)); }
-        public long toMinutes(long d) { return x(d, C5/C4, MAX/(C5/C4)); }
-        public long toHours(long d)   { return d; }
-        public long toDays(long d)    { return d/(C6/C5); }
-        public long convert(long d, TimeUnit u) { return u.toHours(d); }
-        int excessNanos(long d, long m) { return 0; }
-    },
-
+    HOURS(TimeUnit.HOUR_SCALE),
     /**
      * Time unit representing twenty four hours.
      * @since 1.6
      */
-    DAYS {
-        public long toNanos(long d)   { return x(d, C6/C0, MAX/(C6/C0)); }
-        public long toMicros(long d)  { return x(d, C6/C1, MAX/(C6/C1)); }
-        public long toMillis(long d)  { return x(d, C6/C2, MAX/(C6/C2)); }
-        public long toSeconds(long d) { return x(d, C6/C3, MAX/(C6/C3)); }
-        public long toMinutes(long d) { return x(d, C6/C4, MAX/(C6/C4)); }
-        public long toHours(long d)   { return x(d, C6/C5, MAX/(C6/C5)); }
-        public long toDays(long d)    { return d; }
-        public long convert(long d, TimeUnit u) { return u.toDays(d); }
-        int excessNanos(long d, long m) { return 0; }
-    };
+    DAYS(TimeUnit.DAY_SCALE);
 
-    // Handy constants for conversion methods
-    static final long C0 = 1L;
-    static final long C1 = C0 * 1000L;
-    static final long C2 = C1 * 1000L;
-    static final long C3 = C2 * 1000L;
-    static final long C4 = C3 * 60L;
-    static final long C5 = C4 * 60L;
-    static final long C6 = C5 * 24L;
+    // Scales as constants
+    private static final long NANO_SCALE   = 1L;
+    private static final long MICRO_SCALE  = 1000L * NANO_SCALE;
+    private static final long MILLI_SCALE  = 1000L * MICRO_SCALE;
+    private static final long SECOND_SCALE = 1000L * MILLI_SCALE;
+    private static final long MINUTE_SCALE = 60L * SECOND_SCALE;
+    private static final long HOUR_SCALE   = 60L * MINUTE_SCALE;
+    private static final long DAY_SCALE    = 24L * HOUR_SCALE;
 
-    static final long MAX = Long.MAX_VALUE;
-
-    /**
-     * Scale d by m, checking for overflow.
-     * This has a short name to make above code more readable.
+    /*
+     * Instances cache conversion ratios and saturation cutoffs for
+     * the units up through SECONDS. Other cases compute them, in
+     * method cvt.
      */
-    static long x(long d, long m, long over) {
-        if (d > +over) return Long.MAX_VALUE;
-        if (d < -over) return Long.MIN_VALUE;
-        return d * m;
+
+    private final long scale;
+    private final long maxNanos;
+    private final long maxMicros;
+    private final long maxMillis;
+    private final long maxSecs;
+    private final long microRatio;
+    private final int milliRatio;   // fits in 32 bits
+    private final int secRatio;     // fits in 32 bits
+
+    private TimeUnit(long s) {
+        this.scale = s;
+        this.maxNanos = Long.MAX_VALUE / s;
+        long ur = (s >= MICRO_SCALE) ? (s / MICRO_SCALE) : (MICRO_SCALE / s);
+        this.microRatio = ur;
+        this.maxMicros = Long.MAX_VALUE / ur;
+        long mr = (s >= MILLI_SCALE) ? (s / MILLI_SCALE) : (MILLI_SCALE / s);
+        this.milliRatio = (int)mr;
+        this.maxMillis = Long.MAX_VALUE / mr;
+        long sr = (s >= SECOND_SCALE) ? (s / SECOND_SCALE) : (SECOND_SCALE / s);
+        this.secRatio = (int)sr;
+        this.maxSecs = Long.MAX_VALUE / sr;
     }
 
-    // To maintain full signature compatibility with 1.5, and to improve the
-    // clarity of the generated javadoc (see 6287639: Abstract methods in
-    // enum classes should not be listed as abstract), method convert
-    // etc. are not declared abstract but otherwise act as abstract methods.
+    /**
+     * General conversion utility.
+     *
+     * @param d duration
+     * @param dst result unit scale
+     * @param src source unit scale
+     */
+    private static long cvt(long d, long dst, long src) {
+        long r, m;
+        if (src == dst)
+            return d;
+        else if (src < dst)
+            return d / (dst / src);
+        else if (d > (m = Long.MAX_VALUE / (r = src / dst)))
+            return Long.MAX_VALUE;
+        else if (d < -m)
+            return Long.MIN_VALUE;
+        else
+            return d * r;
+    }
 
     /**
      * Converts the given time duration in the given unit to this unit.
@@ -220,11 +179,65 @@
      * @param sourceDuration the time duration in the given {@code sourceUnit}
      * @param sourceUnit the unit of the {@code sourceDuration} argument
      * @return the converted duration in this unit,
-     * or {@code Long.MIN_VALUE} if conversion would negatively
-     * overflow, or {@code Long.MAX_VALUE} if it would positively overflow.
+     * or {@code Long.MIN_VALUE} if conversion would negatively overflow,
+     * or {@code Long.MAX_VALUE} if it would positively overflow.
      */
     public long convert(long sourceDuration, TimeUnit sourceUnit) {
-        throw new AbstractMethodError();
+        switch (this) {
+        case NANOSECONDS:  return sourceUnit.toNanos(sourceDuration);
+        case MICROSECONDS: return sourceUnit.toMicros(sourceDuration);
+        case MILLISECONDS: return sourceUnit.toMillis(sourceDuration);
+        case SECONDS:      return sourceUnit.toSeconds(sourceDuration);
+        default: return cvt(sourceDuration, scale, sourceUnit.scale);
+        }
+    }
+
+    /**
+     * Converts the given time duration to this unit.
+     *
+     * <p>For any TimeUnit {@code unit},
+     * {@code unit.convert(Duration.ofNanos(n))}
+     * is equivalent to
+     * {@code unit.convert(n, NANOSECONDS)}, and
+     * {@code unit.convert(Duration.of(n, unit.toChronoUnit()))}
+     * is equivalent to {@code n} (in the absence of overflow).
+     *
+     * @apiNote
+     * This method differs from {@link Duration#toNanos()} in that it
+     * does not throw {@link ArithmeticException} on numeric overflow.
+     *
+     * @param duration the time duration
+     * @return the converted duration in this unit,
+     * or {@code Long.MIN_VALUE} if conversion would negatively overflow,
+     * or {@code Long.MAX_VALUE} if it would positively overflow.
+     * @throws NullPointerException if {@code duration} is null
+     * @see Duration#of(long,TemporalUnit)
+     * @since 11
+     */
+    public long convert(Duration duration) {
+        long secs = duration.getSeconds();
+        int nano = duration.getNano();
+        if (secs < 0 && nano > 0) {
+            // use representation compatible with integer division
+            secs++;
+            nano -= (int) SECOND_SCALE;
+        }
+        final long s, nanoVal;
+        // Optimize for the common case - NANOSECONDS without overflow
+        if (this == NANOSECONDS)
+            nanoVal = nano;
+        else if ((s = scale) < SECOND_SCALE)
+            nanoVal = nano / s;
+        else if (this == SECONDS)
+            return secs;
+        else
+            return secs / secRatio;
+        long val = secs * secRatio + nanoVal;
+        return ((secs < maxSecs && secs > -maxSecs) ||
+                (secs == maxSecs && val > 0) ||
+                (secs == -maxSecs && val < 0))
+            ? val
+            : (secs > 0) ? Long.MAX_VALUE : Long.MIN_VALUE;
     }
 
     /**
@@ -232,11 +245,19 @@
      * {@link #convert(long, TimeUnit) NANOSECONDS.convert(duration, this)}.
      * @param duration the duration
      * @return the converted duration,
-     * or {@code Long.MIN_VALUE} if conversion would negatively
-     * overflow, or {@code Long.MAX_VALUE} if it would positively overflow.
+     * or {@code Long.MIN_VALUE} if conversion would negatively overflow,
+     * or {@code Long.MAX_VALUE} if it would positively overflow.
      */
     public long toNanos(long duration) {
-        throw new AbstractMethodError();
+        long s, m;
+        if ((s = scale) == NANO_SCALE)
+            return duration;
+        else if (duration > (m = maxNanos))
+            return Long.MAX_VALUE;
+        else if (duration < -m)
+            return Long.MIN_VALUE;
+        else
+            return duration * s;
     }
 
     /**
@@ -244,11 +265,19 @@
      * {@link #convert(long, TimeUnit) MICROSECONDS.convert(duration, this)}.
      * @param duration the duration
      * @return the converted duration,
-     * or {@code Long.MIN_VALUE} if conversion would negatively
-     * overflow, or {@code Long.MAX_VALUE} if it would positively overflow.
+     * or {@code Long.MIN_VALUE} if conversion would negatively overflow,
+     * or {@code Long.MAX_VALUE} if it would positively overflow.
      */
     public long toMicros(long duration) {
-        throw new AbstractMethodError();
+        long s, m;
+        if ((s = scale) <= MICRO_SCALE)
+            return (s == MICRO_SCALE) ? duration : duration / microRatio;
+        else if (duration > (m = maxMicros))
+            return Long.MAX_VALUE;
+        else if (duration < -m)
+            return Long.MIN_VALUE;
+        else
+            return duration * microRatio;
     }
 
     /**
@@ -256,11 +285,19 @@
      * {@link #convert(long, TimeUnit) MILLISECONDS.convert(duration, this)}.
      * @param duration the duration
      * @return the converted duration,
-     * or {@code Long.MIN_VALUE} if conversion would negatively
-     * overflow, or {@code Long.MAX_VALUE} if it would positively overflow.
+     * or {@code Long.MIN_VALUE} if conversion would negatively overflow,
+     * or {@code Long.MAX_VALUE} if it would positively overflow.
      */
     public long toMillis(long duration) {
-        throw new AbstractMethodError();
+        long s, m;
+        if ((s = scale) <= MILLI_SCALE)
+            return (s == MILLI_SCALE) ? duration : duration / milliRatio;
+        else if (duration > (m = maxMillis))
+            return Long.MAX_VALUE;
+        else if (duration < -m)
+            return Long.MIN_VALUE;
+        else
+            return duration * milliRatio;
     }
 
     /**
@@ -268,11 +305,19 @@
      * {@link #convert(long, TimeUnit) SECONDS.convert(duration, this)}.
      * @param duration the duration
      * @return the converted duration,
-     * or {@code Long.MIN_VALUE} if conversion would negatively
-     * overflow, or {@code Long.MAX_VALUE} if it would positively overflow.
+     * or {@code Long.MIN_VALUE} if conversion would negatively overflow,
+     * or {@code Long.MAX_VALUE} if it would positively overflow.
      */
     public long toSeconds(long duration) {
-        throw new AbstractMethodError();
+        long s, m;
+        if ((s = scale) <= SECOND_SCALE)
+            return (s == SECOND_SCALE) ? duration : duration / secRatio;
+        else if (duration > (m = maxSecs))
+            return Long.MAX_VALUE;
+        else if (duration < -m)
+            return Long.MIN_VALUE;
+        else
+            return duration * secRatio;
     }
 
     /**
@@ -280,12 +325,12 @@
      * {@link #convert(long, TimeUnit) MINUTES.convert(duration, this)}.
      * @param duration the duration
      * @return the converted duration,
-     * or {@code Long.MIN_VALUE} if conversion would negatively
-     * overflow, or {@code Long.MAX_VALUE} if it would positively overflow.
+     * or {@code Long.MIN_VALUE} if conversion would negatively overflow,
+     * or {@code Long.MAX_VALUE} if it would positively overflow.
      * @since 1.6
      */
     public long toMinutes(long duration) {
-        throw new AbstractMethodError();
+        return cvt(duration, MINUTE_SCALE, scale);
     }
 
     /**
@@ -293,12 +338,12 @@
      * {@link #convert(long, TimeUnit) HOURS.convert(duration, this)}.
      * @param duration the duration
      * @return the converted duration,
-     * or {@code Long.MIN_VALUE} if conversion would negatively
-     * overflow, or {@code Long.MAX_VALUE} if it would positively overflow.
+     * or {@code Long.MIN_VALUE} if conversion would negatively overflow,
+     * or {@code Long.MAX_VALUE} if it would positively overflow.
      * @since 1.6
      */
     public long toHours(long duration) {
-        throw new AbstractMethodError();
+        return cvt(duration, HOUR_SCALE, scale);
     }
 
     /**
@@ -309,7 +354,7 @@
      * @since 1.6
      */
     public long toDays(long duration) {
-        throw new AbstractMethodError();
+        return cvt(duration, DAY_SCALE, scale);
     }
 
     /**
@@ -319,7 +364,15 @@
      * @param m the number of milliseconds
      * @return the number of nanoseconds
      */
-    abstract int excessNanos(long d, long m);
+    private int excessNanos(long d, long m) {
+        long s;
+        if ((s = scale) == NANO_SCALE)
+            return (int)(d - (m * MILLI_SCALE));
+        else if (s == MICRO_SCALE)
+            return (int)((d * 1000L) - (m * MILLI_SCALE));
+        else
+            return 0;
+    }
 
     /**
      * Performs a timed {@link Object#wait(long, int) Object.wait}
@@ -327,16 +380,18 @@
      * This is a convenience method that converts timeout arguments
      * into the form required by the {@code Object.wait} method.
      *
-     * <p>For example, you could implement a blocking {@code poll}
-     * method (see {@link BlockingQueue#poll BlockingQueue.poll})
+     * <p>For example, you could implement a blocking {@code poll} method
+     * (see {@link BlockingQueue#poll(long, TimeUnit) BlockingQueue.poll})
      * using:
      *
      * <pre> {@code
-     * public synchronized Object poll(long timeout, TimeUnit unit)
+     * public E poll(long timeout, TimeUnit unit)
      *     throws InterruptedException {
-     *   while (empty) {
-     *     unit.timedWait(this, timeout);
-     *     ...
+     *   synchronized (lock) {
+     *     while (isEmpty()) {
+     *       unit.timedWait(lock, timeout);
+     *       ...
+     *     }
      *   }
      * }}</pre>
      *
@@ -392,14 +447,12 @@
         }
     }
 
-    // BEGIN Android-removed: OpenJDK 9 ChronoUnit related code.
-    /*
     /**
      * Converts this {@code TimeUnit} to the equivalent {@code ChronoUnit}.
      *
      * @return the converted equivalent ChronoUnit
      * @since 9
-     *
+     */
     public ChronoUnit toChronoUnit() {
         switch (this) {
         case NANOSECONDS:  return ChronoUnit.NANOS;
@@ -422,7 +475,7 @@
      *         equivalent TimeUnit
      * @throws NullPointerException if {@code chronoUnit} is null
      * @since 9
-     *
+     */
     public static TimeUnit of(ChronoUnit chronoUnit) {
         switch (Objects.requireNonNull(chronoUnit, "chronoUnit")) {
         case NANOS:   return TimeUnit.NANOSECONDS;
@@ -437,7 +490,5 @@
                 "No TimeUnit equivalent for " + chronoUnit);
         }
     }
-    */
-    // END Android-removed: OpenJDK 9 ChronoUnit related code.
 
 }
diff --git a/ojluni/src/main/java/java/util/concurrent/TransferQueue.java b/ojluni/src/main/java/java/util/concurrent/TransferQueue.java
index 53da597..72a4108 100644
--- a/ojluni/src/main/java/java/util/concurrent/TransferQueue.java
+++ b/ojluni/src/main/java/java/util/concurrent/TransferQueue.java
@@ -35,10 +35,6 @@
 
 package java.util.concurrent;
 
-// BEGIN android-note
-// removed link to collections framework docs
-// END android-note
-
 /**
  * A {@link BlockingQueue} in which producers may wait for consumers
  * to receive elements.  A {@code TransferQueue} may be useful for
@@ -61,6 +57,10 @@
  * with zero capacity, such as {@link SynchronousQueue}, {@code put}
  * and {@code transfer} are effectively synonymous.
  *
+ * <p>This interface is a member of the
+ * <a href="{@docRoot}/java.base/java/util/package-summary.html#CollectionsFramework">
+ * Java Collections Framework</a>.
+ *
  * @since 1.7
  * @author Doug Lea
  * @param <E> the type of elements held in this queue
diff --git a/ojluni/src/main/java/java/util/concurrent/atomic/AtomicLongFieldUpdater.java b/ojluni/src/main/java/java/util/concurrent/atomic/AtomicLongFieldUpdater.java
index 447a642..34a200b 100644
--- a/ojluni/src/main/java/java/util/concurrent/atomic/AtomicLongFieldUpdater.java
+++ b/ojluni/src/main/java/java/util/concurrent/atomic/AtomicLongFieldUpdater.java
@@ -40,10 +40,13 @@
 import java.security.AccessController;
 import java.security.PrivilegedActionException;
 import java.security.PrivilegedExceptionAction;
+import java.util.Objects;
 import java.util.function.LongBinaryOperator;
 import java.util.function.LongUnaryOperator;
-import sun.reflect.CallerSensitive;
-import sun.reflect.Reflection;
+import jdk.internal.misc.Unsafe;
+import jdk.internal.reflect.CallerSensitive;
+import jdk.internal.reflect.Reflection;
+import java.lang.invoke.VarHandle;
 
 /**
  * A reflection-based utility that enables atomic updates to
@@ -59,6 +62,10 @@
  * guarantee atomicity only with respect to other invocations of
  * {@code compareAndSet} and {@code set} on the same updater.
  *
+ * <p>Object arguments for parameters of type {@code T} that are not
+ * instances of the class passed to {@link #newUpdater} will result in
+ * a {@link ClassCastException} being thrown.
+ *
  * @since 1.5
  * @author Doug Lea
  * @param <T> The type of the object holding the updatable field
@@ -107,8 +114,6 @@
      * @param expect the expected value
      * @param update the new value
      * @return {@code true} if successful
-     * @throws ClassCastException if {@code obj} is not an instance
-     * of the class possessing the field established in the constructor
      */
     public abstract boolean compareAndSet(T obj, long expect, long update);
 
@@ -127,8 +132,6 @@
      * @param expect the expected value
      * @param update the new value
      * @return {@code true} if successful
-     * @throws ClassCastException if {@code obj} is not an instance
-     * of the class possessing the field established in the constructor
      */
     public abstract boolean weakCompareAndSet(T obj, long expect, long update);
 
@@ -153,8 +156,8 @@
     public abstract void lazySet(T obj, long newValue);
 
     /**
-     * Gets the current value held in the field of the given object managed
-     * by this updater.
+     * Returns the current value held in the field of the given object
+     * managed by this updater.
      *
      * @param obj An object whose field to get
      * @return the current value
@@ -276,10 +279,12 @@
     }
 
     /**
-     * Atomically updates the field of the given object managed by this updater
-     * with the results of applying the given function, returning the previous
-     * value. The function should be side-effect-free, since it may be
-     * re-applied when attempted updates fail due to contention among threads.
+     * Atomically updates (with memory effects as specified by {@link
+     * VarHandle#compareAndSet}) the field of the given object managed
+     * by this updater with the results of applying the given
+     * function, returning the previous value. The function should be
+     * side-effect-free, since it may be re-applied when attempted
+     * updates fail due to contention among threads.
      *
      * @param obj An object whose field to get and set
      * @param updateFunction a side-effect-free function
@@ -296,10 +301,12 @@
     }
 
     /**
-     * Atomically updates the field of the given object managed by this updater
-     * with the results of applying the given function, returning the updated
-     * value. The function should be side-effect-free, since it may be
-     * re-applied when attempted updates fail due to contention among threads.
+     * Atomically updates (with memory effects as specified by {@link
+     * VarHandle#compareAndSet}) the field of the given object managed
+     * by this updater with the results of applying the given
+     * function, returning the updated value. The function should be
+     * side-effect-free, since it may be re-applied when attempted
+     * updates fail due to contention among threads.
      *
      * @param obj An object whose field to get and set
      * @param updateFunction a side-effect-free function
@@ -316,13 +323,14 @@
     }
 
     /**
-     * Atomically updates the field of the given object managed by this
-     * updater with the results of applying the given function to the
-     * current and given values, returning the previous value. The
-     * function should be side-effect-free, since it may be re-applied
-     * when attempted updates fail due to contention among threads.  The
-     * function is applied with the current value as its first argument,
-     * and the given update as the second argument.
+     * Atomically updates (with memory effects as specified by {@link
+     * VarHandle#compareAndSet}) the field of the given object managed
+     * by this updater with the results of applying the given function
+     * to the current and given values, returning the previous value.
+     * The function should be side-effect-free, since it may be
+     * re-applied when attempted updates fail due to contention among
+     * threads.  The function is applied with the current value as its
+     * first argument, and the given update as the second argument.
      *
      * @param obj An object whose field to get and set
      * @param x the update value
@@ -341,13 +349,14 @@
     }
 
     /**
-     * Atomically updates the field of the given object managed by this
-     * updater with the results of applying the given function to the
-     * current and given values, returning the updated value. The
-     * function should be side-effect-free, since it may be re-applied
-     * when attempted updates fail due to contention among threads.  The
-     * function is applied with the current value as its first argument,
-     * and the given update as the second argument.
+     * Atomically updates (with memory effects as specified by {@link
+     * VarHandle#compareAndSet}) the field of the given object managed
+     * by this updater with the results of applying the given function
+     * to the current and given values, returning the updated value.
+     * The function should be side-effect-free, since it may be
+     * re-applied when attempted updates fail due to contention among
+     * threads.  The function is applied with the current value as its
+     * first argument, and the given update as the second argument.
      *
      * @param obj An object whose field to get and set
      * @param x the update value
@@ -366,7 +375,7 @@
     }
 
     private static final class CASUpdater<T> extends AtomicLongFieldUpdater<T> {
-        private static final sun.misc.Unsafe U = sun.misc.Unsafe.getUnsafe();
+        private static final Unsafe U = Unsafe.getUnsafe();
         private final long offset;
         /**
          * if field is protected, the subclass constructing updater, else
@@ -418,7 +427,17 @@
             if (!Modifier.isVolatile(modifiers))
                 throw new IllegalArgumentException("Must be volatile type");
 
-            this.cclass = (Modifier.isProtected(modifiers)) ? caller : tclass;
+            // Access to protected field members is restricted to receivers only
+            // of the accessing class, or one of its subclasses, and the
+            // accessing class must in turn be a subclass (or package sibling)
+            // of the protected member's defining class.
+            // If the updater refers to a protected field of a declaring class
+            // outside the current package, the receiver argument will be
+            // narrowed to the type of the accessing class.
+            this.cclass = (Modifier.isProtected(modifiers) &&
+                           tclass.isAssignableFrom(caller) &&
+                           !isSamePackage(tclass, caller))
+                          ? caller : tclass;
             this.tclass = tclass;
             this.offset = U.objectFieldOffset(field);
         }
@@ -452,12 +471,12 @@
 
         public final boolean compareAndSet(T obj, long expect, long update) {
             accessCheck(obj);
-            return U.compareAndSwapLong(obj, offset, expect, update);
+            return U.compareAndSetLong(obj, offset, expect, update);
         }
 
         public final boolean weakCompareAndSet(T obj, long expect, long update) {
             accessCheck(obj);
-            return U.compareAndSwapLong(obj, offset, expect, update);
+            return U.compareAndSetLong(obj, offset, expect, update);
         }
 
         public final void set(T obj, long newValue) {
@@ -467,7 +486,7 @@
 
         public final void lazySet(T obj, long newValue) {
             accessCheck(obj);
-            U.putOrderedLong(obj, offset, newValue);
+            U.putLongRelease(obj, offset, newValue);
         }
 
         public final long get(T obj) {
@@ -507,7 +526,7 @@
     }
 
     private static final class LockedUpdater<T> extends AtomicLongFieldUpdater<T> {
-        private static final sun.misc.Unsafe U = sun.misc.Unsafe.getUnsafe();
+        private static final Unsafe U = Unsafe.getUnsafe();
         private final long offset;
         /**
          * if field is protected, the subclass constructing updater, else
@@ -519,8 +538,8 @@
 
         LockedUpdater(final Class<T> tclass, final String fieldName,
                       final Class<?> caller) {
-            Field field = null;
-            int modifiers = 0;
+            final Field field;
+            final int modifiers;
             try {
                 // Android-changed: Skip privilege escalation which is a noop on Android.
                 /*
@@ -559,7 +578,17 @@
             if (!Modifier.isVolatile(modifiers))
                 throw new IllegalArgumentException("Must be volatile type");
 
-            this.cclass = (Modifier.isProtected(modifiers)) ? caller : tclass;
+            // Access to protected field members is restricted to receivers only
+            // of the accessing class, or one of its subclasses, and the
+            // accessing class must in turn be a subclass (or package sibling)
+            // of the protected member's defining class.
+            // If the updater refers to a protected field of a declaring class
+            // outside the current package, the receiver argument will be
+            // narrowed to the type of the accessing class.
+            this.cclass = (Modifier.isProtected(modifiers) &&
+                           tclass.isAssignableFrom(caller) &&
+                           !isSamePackage(tclass, caller))
+                          ? caller : tclass;
             this.tclass = tclass;
             this.offset = U.objectFieldOffset(field);
         }
@@ -643,4 +672,13 @@
         return false;
     }
     */
+
+    /**
+     * Returns true if the two classes have the same class loader and
+     * package qualifier
+     */
+    static boolean isSamePackage(Class<?> class1, Class<?> class2) {
+        return class1.getClassLoader() == class2.getClassLoader()
+            && Objects.equals(class1.getPackageName(), class2.getPackageName());
+    }
 }
diff --git a/ojluni/src/main/java/java/util/concurrent/atomic/AtomicMarkableReference.java b/ojluni/src/main/java/java/util/concurrent/atomic/AtomicMarkableReference.java
index b49118b..51ea84c 100644
--- a/ojluni/src/main/java/java/util/concurrent/atomic/AtomicMarkableReference.java
+++ b/ojluni/src/main/java/java/util/concurrent/atomic/AtomicMarkableReference.java
@@ -35,6 +35,9 @@
 
 package java.util.concurrent.atomic;
 
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.VarHandle;
+
 /**
  * An {@code AtomicMarkableReference} maintains an object reference
  * along with a mark bit, that can be updated atomically.
@@ -188,20 +191,19 @@
              casPair(current, Pair.of(expectedReference, newMark)));
     }
 
-    // Unsafe mechanics
-
-    private static final sun.misc.Unsafe U = sun.misc.Unsafe.getUnsafe();
-    private static final long PAIR;
+    // VarHandle mechanics
+    private static final VarHandle PAIR;
     static {
         try {
-            PAIR = U.objectFieldOffset
-                (AtomicMarkableReference.class.getDeclaredField("pair"));
+            MethodHandles.Lookup l = MethodHandles.lookup();
+            PAIR = l.findVarHandle(AtomicMarkableReference.class, "pair",
+                                   Pair.class);
         } catch (ReflectiveOperationException e) {
-            throw new Error(e);
+            throw new ExceptionInInitializerError(e);
         }
     }
 
     private boolean casPair(Pair<V> cmp, Pair<V> val) {
-        return U.compareAndSwapObject(this, PAIR, cmp, val);
+        return PAIR.compareAndSet(this, cmp, val);
     }
 }
diff --git a/ojluni/src/main/java/java/util/concurrent/atomic/AtomicReferenceFieldUpdater.java b/ojluni/src/main/java/java/util/concurrent/atomic/AtomicReferenceFieldUpdater.java
index 17423ad..513a243 100644
--- a/ojluni/src/main/java/java/util/concurrent/atomic/AtomicReferenceFieldUpdater.java
+++ b/ojluni/src/main/java/java/util/concurrent/atomic/AtomicReferenceFieldUpdater.java
@@ -40,10 +40,13 @@
 import java.security.AccessController;
 import java.security.PrivilegedActionException;
 import java.security.PrivilegedExceptionAction;
+import java.util.Objects;
 import java.util.function.BinaryOperator;
 import java.util.function.UnaryOperator;
-import sun.reflect.CallerSensitive;
-import sun.reflect.Reflection;
+import jdk.internal.misc.Unsafe;
+import jdk.internal.reflect.CallerSensitive;
+import jdk.internal.reflect.Reflection;
+import java.lang.invoke.VarHandle;
 
 /**
  * A reflection-based utility that enables atomic updates to
@@ -76,6 +79,10 @@
  * guarantee atomicity only with respect to other invocations of
  * {@code compareAndSet} and {@code set} on the same updater.
  *
+ * <p>Object arguments for parameters of type {@code T} that are not
+ * instances of the class passed to {@link #newUpdater} will result in
+ * a {@link ClassCastException} being thrown.
+ *
  * @since 1.5
  * @author Doug Lea
  * @param <T> The type of the object holding the updatable field
@@ -168,8 +175,8 @@
     public abstract void lazySet(T obj, V newValue);
 
     /**
-     * Gets the current value held in the field of the given object managed
-     * by this updater.
+     * Returns the current value held in the field of the given object
+     * managed by this updater.
      *
      * @param obj An object whose field to get
      * @return the current value
@@ -193,10 +200,12 @@
     }
 
     /**
-     * Atomically updates the field of the given object managed by this updater
-     * with the results of applying the given function, returning the previous
-     * value. The function should be side-effect-free, since it may be
-     * re-applied when attempted updates fail due to contention among threads.
+     * Atomically updates (with memory effects as specified by {@link
+     * VarHandle#compareAndSet}) the field of the given object managed
+     * by this updater with the results of applying the given
+     * function, returning the previous value. The function should be
+     * side-effect-free, since it may be re-applied when attempted
+     * updates fail due to contention among threads.
      *
      * @param obj An object whose field to get and set
      * @param updateFunction a side-effect-free function
@@ -213,10 +222,12 @@
     }
 
     /**
-     * Atomically updates the field of the given object managed by this updater
-     * with the results of applying the given function, returning the updated
-     * value. The function should be side-effect-free, since it may be
-     * re-applied when attempted updates fail due to contention among threads.
+     * Atomically updates (with memory effects as specified by {@link
+     * VarHandle#compareAndSet}) the field of the given object managed
+     * by this updater with the results of applying the given
+     * function, returning the updated value. The function should be
+     * side-effect-free, since it may be re-applied when attempted
+     * updates fail due to contention among threads.
      *
      * @param obj An object whose field to get and set
      * @param updateFunction a side-effect-free function
@@ -233,13 +244,14 @@
     }
 
     /**
-     * Atomically updates the field of the given object managed by this
-     * updater with the results of applying the given function to the
-     * current and given values, returning the previous value. The
-     * function should be side-effect-free, since it may be re-applied
-     * when attempted updates fail due to contention among threads.  The
-     * function is applied with the current value as its first argument,
-     * and the given update as the second argument.
+     * Atomically updates (with memory effects as specified by {@link
+     * VarHandle#compareAndSet}) the field of the given object managed
+     * by this updater with the results of applying the given function
+     * to the current and given values, returning the previous value.
+     * The function should be side-effect-free, since it may be
+     * re-applied when attempted updates fail due to contention among
+     * threads.  The function is applied with the current value as its
+     * first argument, and the given update as the second argument.
      *
      * @param obj An object whose field to get and set
      * @param x the update value
@@ -258,13 +270,14 @@
     }
 
     /**
-     * Atomically updates the field of the given object managed by this
-     * updater with the results of applying the given function to the
-     * current and given values, returning the updated value. The
-     * function should be side-effect-free, since it may be re-applied
-     * when attempted updates fail due to contention among threads.  The
-     * function is applied with the current value as its first argument,
-     * and the given update as the second argument.
+     * Atomically updates (with memory effects as specified by {@link
+     * VarHandle#compareAndSet}) the field of the given object managed
+     * by this updater with the results of applying the given function
+     * to the current and given values, returning the updated value.
+     * The function should be side-effect-free, since it may be
+     * re-applied when attempted updates fail due to contention among
+     * threads.  The function is applied with the current value as its
+     * first argument, and the given update as the second argument.
      *
      * @param obj An object whose field to get and set
      * @param x the update value
@@ -284,7 +297,7 @@
 
     private static final class AtomicReferenceFieldUpdaterImpl<T,V>
         extends AtomicReferenceFieldUpdater<T,V> {
-        private static final sun.misc.Unsafe U = sun.misc.Unsafe.getUnsafe();
+        private static final Unsafe U = Unsafe.getUnsafe();
         private final long offset;
         /**
          * if field is protected, the subclass constructing updater, else
@@ -356,7 +369,17 @@
             if (!Modifier.isVolatile(modifiers))
                 throw new IllegalArgumentException("Must be volatile type");
 
-            this.cclass = (Modifier.isProtected(modifiers)) ? caller : tclass;
+            // Access to protected field members is restricted to receivers only
+            // of the accessing class, or one of its subclasses, and the
+            // accessing class must in turn be a subclass (or package sibling)
+            // of the protected member's defining class.
+            // If the updater refers to a protected field of a declaring class
+            // outside the current package, the receiver argument will be
+            // narrowed to the type of the accessing class.
+            this.cclass = (Modifier.isProtected(modifiers) &&
+                           tclass.isAssignableFrom(caller) &&
+                           !isSamePackage(tclass, caller))
+                          ? caller : tclass;
             this.tclass = tclass;
             this.vclass = vclass;
             this.offset = U.objectFieldOffset(field);
@@ -382,6 +405,15 @@
         */
 
         /**
+         * Returns true if the two classes have the same class loader and
+         * package qualifier
+         */
+        private static boolean isSamePackage(Class<?> class1, Class<?> class2) {
+            return class1.getClassLoader() == class2.getClassLoader()
+                   && Objects.equals(class1.getPackageName(), class2.getPackageName());
+        }
+
+        /**
          * Checks that target argument is instance of cclass.  On
          * failure, throws cause.
          */
@@ -420,14 +452,14 @@
         public final boolean compareAndSet(T obj, V expect, V update) {
             accessCheck(obj);
             valueCheck(update);
-            return U.compareAndSwapObject(obj, offset, expect, update);
+            return U.compareAndSetObject(obj, offset, expect, update);
         }
 
         public final boolean weakCompareAndSet(T obj, V expect, V update) {
             // same implementation as strong form for now
             accessCheck(obj);
             valueCheck(update);
-            return U.compareAndSwapObject(obj, offset, expect, update);
+            return U.compareAndSetObject(obj, offset, expect, update);
         }
 
         public final void set(T obj, V newValue) {
@@ -439,7 +471,7 @@
         public final void lazySet(T obj, V newValue) {
             accessCheck(obj);
             valueCheck(newValue);
-            U.putOrderedObject(obj, offset, newValue);
+            U.putObjectRelease(obj, offset, newValue);
         }
 
         @SuppressWarnings("unchecked")
diff --git a/ojluni/src/main/java/java/util/concurrent/atomic/AtomicStampedReference.java b/ojluni/src/main/java/java/util/concurrent/atomic/AtomicStampedReference.java
index 40ceeb2..47b7c7b 100644
--- a/ojluni/src/main/java/java/util/concurrent/atomic/AtomicStampedReference.java
+++ b/ojluni/src/main/java/java/util/concurrent/atomic/AtomicStampedReference.java
@@ -35,6 +35,9 @@
 
 package java.util.concurrent.atomic;
 
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.VarHandle;
+
 /**
  * An {@code AtomicStampedReference} maintains an object reference
  * along with an integer "stamp", that can be updated atomically.
@@ -188,20 +191,19 @@
              casPair(current, Pair.of(expectedReference, newStamp)));
     }
 
-    // Unsafe mechanics
-
-    private static final sun.misc.Unsafe U = sun.misc.Unsafe.getUnsafe();
-    private static final long PAIR;
+    // VarHandle mechanics
+    private static final VarHandle PAIR;
     static {
         try {
-            PAIR = U.objectFieldOffset
-                (AtomicStampedReference.class.getDeclaredField("pair"));
+            MethodHandles.Lookup l = MethodHandles.lookup();
+            PAIR = l.findVarHandle(AtomicStampedReference.class, "pair",
+                                   Pair.class);
         } catch (ReflectiveOperationException e) {
-            throw new Error(e);
+            throw new ExceptionInInitializerError(e);
         }
     }
 
     private boolean casPair(Pair<V> cmp, Pair<V> val) {
-        return U.compareAndSwapObject(this, PAIR, cmp, val);
+        return PAIR.compareAndSet(this, cmp, val);
     }
 }
diff --git a/ojluni/src/main/java/java/util/concurrent/package-info.java b/ojluni/src/main/java/java/util/concurrent/package-info.java
index 387068d..0e992a2 100644
--- a/ojluni/src/main/java/java/util/concurrent/package-info.java
+++ b/ojluni/src/main/java/java/util/concurrent/package-info.java
@@ -200,7 +200,7 @@
  * concurrent collection is thread-safe, but not governed by a
  * single exclusion lock.  In the particular case of
  * ConcurrentHashMap, it safely permits any number of
- * concurrent reads as well as a tunable number of concurrent
+ * concurrent reads as well as a large number of concurrent
  * writes.  "Synchronized" classes can be useful when you need
  * to prevent all access to a collection via a single lock, at
  * the expense of poorer scalability.  In other cases in which
@@ -262,7 +262,6 @@
  *
  * </ul>
  *
- *
  * The methods of all classes in {@code java.util.concurrent} and its
  * subpackages extend these guarantees to higher-level
  * synchronization.  In particular: