Type cache overhaul

This change overhauls the core type cache

The new approach aims to achieve several things...
1 - cache is strictly bounded -- no variance for number of classes of ClassLoaders
2 - cache is significantly smaller
3 - cache doesn't compromise start-up time
4 - primary eviction policy isn't driven by time
5 - primary eviction policy isn't driven by GC

There are some slight compromises here.
In practice, start-up does increase slightly in a memory rich environment; however, start-up improves considerably in a memory poor environment.

The basic approcach is to have a single unified Guava cache for all ClassLoaders -- nominally keyed a composite of ClassLoader & class name

The ByteBuddy CacheProvider are simply thin wrappers around the Guava cache associated to a particular ClassLoader

However rather than having a large number of WeakReferences floating around.  The cache assigns an ID to each ClassLoader.

To further avoid, consuming memory the cache only preserves a small map of Loader / ID assignments.  This means a ClassLoader may have more than one active ID.

This introduce the possibility for ID exhaustion.  That unlikely case is handle by retiring the internal CacheInstance and starting anew.
This commit is contained in:
dougqh 2020-01-23 10:55:16 -05:00
parent 17af9b752c
commit 726236bd64
4 changed files with 457 additions and 199 deletions

View File

@ -16,7 +16,7 @@ public class AgentTooling {
}
private static final DDLocationStrategy LOCATION_STRATEGY = new DDLocationStrategy();
private static final DDCachingPoolStrategy POOL_STRATEGY = new DDCachingPoolStrategy(CLEANER);
private static final DDCachingPoolStrategy POOL_STRATEGY = new DDCachingPoolStrategy();
public static void init() {
// Only need to trigger static initializers for now.

View File

@ -1,146 +1,283 @@
package datadog.trace.agent.tooling;
import static datadog.trace.agent.tooling.ClassLoaderMatcher.BOOTSTRAP_CLASSLOADER;
import static datadog.trace.agent.tooling.ClassLoaderMatcher.skipClassLoader;
import static net.bytebuddy.agent.builder.AgentBuilder.PoolStrategy;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import datadog.trace.bootstrap.WeakMap;
import java.security.SecureClassLoader;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import lombok.extern.slf4j.Slf4j;
import net.bytebuddy.description.type.TypeDescription;
import net.bytebuddy.dynamic.ClassFileLocator;
import net.bytebuddy.pool.TypePool;
/**
* Custom Pool strategy.
* NEW (Jan 2020) Custom Pool strategy.
*
* <p>Here we are using WeakMap.Provider as the backing ClassLoader -> CacheProvider lookup.
* <ul>
* Uses a Guava Cache directly...
* <li>better control over locking than WeakMap.Provider
* <li>provides direct control over concurrency level
* <li>initial and maximum capacity
* </ul>
*
* <p>We also use our bootstrap proxy when matching against the bootstrap loader.
* <ul>
* There two core parts to the cache...
* <li>a cache of ID assignments for ClassLoaders
* <li>a single cache of TypeResolutions for all ClassLoaders - keyed by a custom composite key
* that combines loader ID & name
* </ul>
*
* <p>The CacheProvider is a custom implementation that uses guava's cache to expire and limit size.
* <p>This design was chosen to create a single limited size cache that can be adjusted
* for the entire application -- without having to create a large number of WeakReference objects.
*
* <p>By evicting from the cache we are able to reduce the memory overhead of the agent for apps
* that have many classes.
* <p>The ID assignment mostly assigns a single ID to each ClassLoader, but the maximumSize
* restriction means that an evicted ClassLoader could be assigned another ID later on.
*
* <p>See eviction policy below.
* <p>For the validity of the cache, the important part is that ID assignment guarantees that
* no two ClassLoaders share the same ID.
*
* <p>NOTE: As an additional safe-guard, a new CacheInstance can be created if the original loader ID
* sequence is exhausted.
*/
public class DDCachingPoolStrategy
implements PoolStrategy, WeakMap.ValueSupplier<ClassLoader, TypePool.CacheProvider> {
// Need this because we can't put null into the typePoolCache map.
private static final ClassLoader BOOTSTRAP_CLASSLOADER_PLACEHOLDER =
new SecureClassLoader(null) {};
private final WeakMap<ClassLoader, TypePool.CacheProvider> typePoolCache =
WeakMap.Provider.newWeakMap();
private final Cleaner cleaner;
public DDCachingPoolStrategy(final Cleaner cleaner) {
this.cleaner = cleaner;
}
@Slf4j
public class DDCachingPoolStrategy implements PoolStrategy {
/**
* Most of the logic exists in CacheInstance This volatile + exhaustion checking is defense
* against loader ID exhaustion
*/
volatile CacheInstance cacheInstance = new CacheInstance();
@Override
public TypePool typePool(final ClassFileLocator classFileLocator, final ClassLoader classLoader) {
final ClassLoader key =
BOOTSTRAP_CLASSLOADER == classLoader ? BOOTSTRAP_CLASSLOADER_PLACEHOLDER : classLoader;
final TypePool.CacheProvider cache = typePoolCache.computeIfAbsent(key, this);
CacheInstance cacheInstance = this.cacheInstance;
return new TypePool.Default.WithLazyResolution(
cache, classFileLocator, TypePool.Default.ReaderMode.FAST);
TypePool typePool = cacheInstance.typePool(classFileLocator, classLoader);
if (cacheInstance.exhaustedLoaderIdSeq()) {
// If the loader ID sequence is exhausted, drop the prior cache & start over
// The ID space is so large that this shouldn't occur
log.error("cacheInstance exhausted - rebuilding cache");
this.cacheInstance = new CacheInstance();
}
return typePool;
}
@Override
public TypePool.CacheProvider get(final ClassLoader key) {
if (BOOTSTRAP_CLASSLOADER_PLACEHOLDER != key && skipClassLoader().matches(key)) {
// Don't bother creating a cache for a classloader that won't match.
// (avoiding a lot of DelegatingClassLoader instances)
// This is primarily an optimization.
return TypePool.CacheProvider.NoOp.INSTANCE;
} else {
return EvictingCacheProvider.withObjectType(cleaner, 1, TimeUnit.MINUTES);
/*
* CacheInstance embodies the core of the cache. In general, we only
* expect a single CacheInstance object to ever be created.
*
* However, CacheInstance does provide an extra layer of protection
* against loaderIdSeq exhaustion. If ever the loaderIdSeq of
* CacheInstance is exhausted, then DDCachingPoolStrategy.typePool
* will detect that and discard the CacheInstance.
*
* At that time, a new CacheInstance with a fresh sequence will
* be created in its place.
*/
private static final class CacheInstance {
static final int CONCURRENCY_LEVEL = 8;
static final int LOADER_CAPACITY = 64;
static final int TYPE_CAPACITY = 64;
static final long BOOTSTRAP_ID = Long.MIN_VALUE;
static final long START_ID = BOOTSTRAP_ID + 1;
static final long LIMIT_ID = Long.MAX_VALUE - 10;
static final long EXHAUSTED_ID = LIMIT_ID;
// Many things are package visible for testing purposes --
// others to avoid creation of synthetic accessors
/**
* Cache of recent loaderIds: guarantee is that no two loaders are given the same ID; however, a
* loader may be given more than one ID if it falls out the cache.
*/
final Cache<ClassLoader, Long> loaderIdCache =
CacheBuilder.newBuilder()
.weakKeys()
.concurrencyLevel(CONCURRENCY_LEVEL)
.initialCapacity(LOADER_CAPACITY / 2)
.maximumSize(LOADER_CAPACITY)
.build();
/**
* Single shared Type.Resolution cache -- uses a composite key of loader ID & class name The
* initial capacity is set to the maximum capacity to avoid expansion overhead.
*/
final Cache<TypeCacheKey, TypePool.Resolution> sharedResolutionCache =
CacheBuilder.newBuilder()
.softValues()
.concurrencyLevel(CONCURRENCY_LEVEL)
.initialCapacity(TYPE_CAPACITY)
.maximumSize(TYPE_CAPACITY)
.build();
/**
* ID sequence for loaders -- BOOTSTRAP_ID is reserved -- starts higher at START_ID Sequence
* proceeds up until LIMIT_ID at which the sequence and this cacheInstance are considered to be
* exhausted
*/
final AtomicLong loaderIdSeq = new AtomicLong(START_ID);
/** Fast path for bootstrap */
final SharedResolutionCacheAdapter bootstrapCacheProvider =
new SharedResolutionCacheAdapter(BOOTSTRAP_ID, sharedResolutionCache);
private final Callable<Long> provisionIdCallable =
new Callable<Long>() {
@Override
public final Long call() throws Exception {
return provisionId();
}
};
final TypePool typePool(
final ClassFileLocator classFileLocator, final ClassLoader classLoader) {
if (classLoader == null) {
return createCachingTypePool(bootstrapCacheProvider, classFileLocator);
}
Long existingId = loaderIdCache.getIfPresent(classLoader);
if (existingId != null) {
return createCachingTypePool(existingId, classFileLocator);
}
if (exhaustedLoaderIdSeq()) {
return createNonCachingTypePool(classFileLocator);
}
long provisionedId = 0;
try {
provisionedId = loaderIdCache.get(classLoader, this.provisionIdCallable);
} catch (ExecutionException e) {
log.error("unexpected exception", e);
return createNonCachingTypePool(classFileLocator);
}
if (provisionedId == EXHAUSTED_ID) {
return createNonCachingTypePool(classFileLocator);
} else {
return createCachingTypePool(provisionedId, classFileLocator);
}
}
final boolean exhaustedLoaderIdSeq() {
return (loaderIdSeq.get() >= LIMIT_ID);
}
final long provisionId() {
do {
long curId = loaderIdSeq.get();
if (curId >= LIMIT_ID) return EXHAUSTED_ID;
long newId = curId + 1;
boolean acquired = loaderIdSeq.compareAndSet(curId, newId);
if (acquired) return newId;
} while (!Thread.currentThread().isInterrupted());
return EXHAUSTED_ID;
}
private final TypePool createNonCachingTypePool(final ClassFileLocator classFileLocator) {
return new TypePool.Default.WithLazyResolution(
TypePool.CacheProvider.NoOp.INSTANCE, classFileLocator, TypePool.Default.ReaderMode.FAST);
}
private final TypePool.CacheProvider createCacheProvider(final long loaderId) {
return new SharedResolutionCacheAdapter(loaderId, sharedResolutionCache);
}
private final TypePool createCachingTypePool(
final long loaderId, final ClassFileLocator classFileLocator) {
return new TypePool.Default.WithLazyResolution(
createCacheProvider(loaderId),
classFileLocator,
TypePool.Default.ReaderMode.FAST);
}
private final TypePool createCachingTypePool(
final TypePool.CacheProvider cacheProvider, final ClassFileLocator classFileLocator) {
return new TypePool.Default.WithLazyResolution(
cacheProvider, classFileLocator, TypePool.Default.ReaderMode.FAST);
}
final long approximateSize() {
return sharedResolutionCache.size();
}
}
private static class EvictingCacheProvider implements TypePool.CacheProvider {
/**
* TypeCacheKey is key for the sharedResolutionCache. It is a mix of a cacheId/loaderId & a type
* name.
*/
static final class TypeCacheKey {
private final long cacheId;
private final String name;
/** A map containing all cached resolutions by their names. */
private final Cache<String, TypePool.Resolution> cache;
private final int hashCode;
/** Creates a new simple cache. */
private EvictingCacheProvider(
final Cleaner cleaner, final long expireDuration, final TimeUnit unit) {
cache =
CacheBuilder.newBuilder()
.initialCapacity(100) // Per classloader, so we want a small default.
.maximumSize(5000)
.softValues()
.expireAfterAccess(expireDuration, unit)
.build();
TypeCacheKey(final long cacheId, final String name) {
this.cacheId = cacheId;
this.name = name;
/*
* The cache only does cleanup on occasional reads and writes.
* We want to ensure this happens more regularly, so we schedule a thread to do run cleanup manually.
*/
cleaner.scheduleCleaning(cache, CacheCleaner.CLEANER, expireDuration, unit);
hashCode = (int) (31 * cacheId) ^ name.hashCode();
}
private static EvictingCacheProvider withObjectType(
final Cleaner cleaner, final long expireDuration, final TimeUnit unit) {
final EvictingCacheProvider cacheProvider =
new EvictingCacheProvider(cleaner, expireDuration, unit);
cacheProvider.register(
Object.class.getName(), new TypePool.Resolution.Simple(TypeDescription.OBJECT));
return cacheProvider;
@Override
public final int hashCode() {
return hashCode;
}
@Override
public boolean equals(final Object obj) {
if (!(obj instanceof TypeCacheKey)) return false;
TypeCacheKey that = (TypeCacheKey) obj;
return (cacheId == that.cacheId) && name.equals(that.name);
}
}
static final class SharedResolutionCacheAdapter implements TypePool.CacheProvider {
private static final String OBJECT_NAME = "java.lang.Object";
private static final TypePool.Resolution OBJECT_RESOLUTION =
new TypePool.Resolution.Simple(TypeDescription.OBJECT);
private final long cacheId;
private final Cache<TypeCacheKey, TypePool.Resolution> sharedResolutionCache;
SharedResolutionCacheAdapter(
final long cacheId, final Cache<TypeCacheKey, TypePool.Resolution> sharedResolutionCache) {
this.cacheId = cacheId;
this.sharedResolutionCache = sharedResolutionCache;
}
@Override
public TypePool.Resolution find(final String name) {
return cache.getIfPresent(name);
TypePool.Resolution existingResolution = sharedResolutionCache.getIfPresent(new TypeCacheKey(cacheId, name));
if ( existingResolution != null ) return existingResolution;
if ( OBJECT_NAME.equals(name) ) {
return OBJECT_RESOLUTION;
}
return null;
}
@Override
public TypePool.Resolution register(final String name, final TypePool.Resolution resolution) {
try {
return cache.get(name, new ResolutionProvider(resolution));
} catch (final ExecutionException e) {
if ( OBJECT_NAME.equals(name) ) {
return resolution;
}
sharedResolutionCache.put(new TypeCacheKey(cacheId, name), resolution);
return resolution;
}
@Override
public void clear() {
cache.invalidateAll();
}
public long size() {
return cache.size();
}
private static class CacheCleaner implements Cleaner.Adapter<Cache> {
private static final CacheCleaner CLEANER = new CacheCleaner();
@Override
public void clean(final Cache target) {
target.cleanUp();
}
}
private static class ResolutionProvider implements Callable<TypePool.Resolution> {
private final TypePool.Resolution value;
private ResolutionProvider(final TypePool.Resolution value) {
this.value = value;
}
@Override
public TypePool.Resolution call() {
return value;
}
// Allowing the high-level eviction policy make the clearing decisions
}
}
}

View File

@ -0,0 +1,223 @@
package datadog.trace.agent.tooling
import datadog.trace.util.gc.GCUtils
import datadog.trace.util.test.DDSpecification
import net.bytebuddy.description.type.TypeDescription
import net.bytebuddy.dynamic.ClassFileLocator
import net.bytebuddy.pool.TypePool
import spock.lang.Timeout
import java.lang.ref.WeakReference
import java.security.SecureClassLoader
import java.util.concurrent.TimeUnit
import java.util.concurrent.atomic.AtomicReference
import static datadog.trace.agent.tooling.AgentTooling.CLEANER
@Timeout(5)
class CacheProviderTest extends DDSpecification {
def "key equivalence"() {
setup:
def key1 = new DDCachingPoolStrategy.TypeCacheKey(1, "foo")
def key2 = new DDCachingPoolStrategy.TypeCacheKey(1, "foo")
expect:
key1.hashCode() == key2.hashCode()
key1.equals(key2)
}
def "different loader - same name"() {
setup:
def key1 = new DDCachingPoolStrategy.TypeCacheKey(1, "foo")
def key2 = new DDCachingPoolStrategy.TypeCacheKey(2, "foo")
expect:
// not strictly guaranteed, but important for performance
key1.hashCode() != key2.hashCode()
!key1.equals(key2)
}
def "same loader - different name"() {
setup:
def key1 = new DDCachingPoolStrategy.TypeCacheKey(1, "foo")
def key2 = new DDCachingPoolStrategy.TypeCacheKey(1, "foobar")
expect:
// not strictly guaranteed, but important for performance
key1.hashCode() != key2.hashCode()
!key1.equals(key2)
}
def "test basic caching"() {
setup:
def cacheInstance = new DDCachingPoolStrategy.CacheInstance();
def cacheProvider = cacheInstance.createCacheProvider(1);
when:
cacheProvider.register("foo", new TypePool.Resolution.Simple(TypeDescription.VOID))
then:
// not strictly guaranteed, but fine for this test
cacheProvider.find("foo") != null
cacheInstance.approximateSize() == 1
}
def "test ID equivalence"() {
setup:
def cacheInstance = new DDCachingPoolStrategy.CacheInstance();
def cacheProvider1A = cacheInstance.createCacheProvider(1);
def cacheProvider1B = cacheInstance.createCacheProvider(1);
when:
cacheProvider1A.register("foo", newVoid())
then:
// not strictly guaranteed, but fine for this test
cacheProvider1A.find("foo") != null
cacheProvider1B.find("foo") != null
cacheProvider1A.find("foo").is(cacheProvider1B.find("foo"))
cacheInstance.approximateSize() == 1
}
def "test ID separation"() {
setup:
def cacheInstance = new DDCachingPoolStrategy.CacheInstance();
def cacheProvider1 = cacheInstance.createCacheProvider(1);
def cacheProvider2 = cacheInstance.createCacheProvider(2);
when:
cacheProvider1.register("foo", newVoid())
cacheProvider2.register("foo", newVoid())
then:
// not strictly guaranteed, but fine for this test
cacheProvider1.find("foo") != null
cacheProvider2.find("foo") != null
!cacheProvider1.find("foo").is(cacheProvider2.find("foo"))
cacheInstance.approximateSize() == 2
}
def "test loader ID assignment"() {
setup:
def cacheInstance = new DDCachingPoolStrategy.CacheInstance()
def locator1 = newLocator()
def loader1 = newClassLoader()
def locator2 = newLocator()
def loader2 = newClassLoader()
when:
cacheInstance.typePool(locator1, loader1)
cacheInstance.typePool(locator2, loader2)
then:
def loaderId1 = cacheInstance.loaderIdCache.getIfPresent(loader1)
def loaderId2 = cacheInstance.loaderIdCache.getIfPresent(loader2)
// both were assigned an ID -- technically these can fall out of the ID cache
loaderId1 != null
loaderId2 != null
// both IDs are not the BOOTSTRAP_ID
loaderId1 != DDCachingPoolStrategy.CacheInstance.BOOTSTRAP_ID
loaderId2 != DDCachingPoolStrategy.CacheInstance.BOOTSTRAP_ID
// class loaders don't share an ID
cacheInstance.loaderIdCache.getIfPresent(loader1) != cacheInstance.loaderIdCache.getIfPresent(loader2)
}
def "test loader ID exhaustion"() {
setup:
def cacheInstance = new DDCachingPoolStrategy.CacheInstance()
when:
cacheInstance.loaderIdSeq.set(DDCachingPoolStrategy.CacheInstance.LIMIT_ID - 2)
then:
cacheInstance.provisionId() != DDCachingPoolStrategy.CacheInstance.EXHAUSTED_ID
then:
// once exhausted provisioning -- stays exhausted
cacheInstance.provisionId() == DDCachingPoolStrategy.CacheInstance.EXHAUSTED_ID
cacheInstance.exhaustedLoaderIdSeq()
cacheInstance.provisionId() == DDCachingPoolStrategy.CacheInstance.EXHAUSTED_ID
cacheInstance.exhaustedLoaderIdSeq()
cacheInstance.provisionId() == DDCachingPoolStrategy.CacheInstance.EXHAUSTED_ID
cacheInstance.exhaustedLoaderIdSeq()
}
def "test exhaustion cacheInstance switch"() {
setup:
def cachingStrat = new DDCachingPoolStrategy()
def origCacheInstance = cachingStrat.cacheInstance
cachingStrat.cacheInstance.loaderIdSeq.set(DDCachingPoolStrategy.CacheInstance.LIMIT_ID)
when:
cachingStrat.typePool(newLocator(), newClassLoader())
then:
cachingStrat.cacheInstance != origCacheInstance
}
def "test cacheInstance capacity"() {
setup:
def cacheInstance = new DDCachingPoolStrategy.CacheInstance()
def capacity = DDCachingPoolStrategy.CacheInstance.TYPE_CAPACITY
def cacheProvider1 = cacheInstance.createCacheProvider(1);
def cacheProvider2 = cacheInstance.createCacheProvider(2);
def id = 0
when:
(capacity / 2).times {
id += 1
cacheProvider1.register("foo${id}", newVoid())
cacheProvider2.register("foo${id}", newVoid())
}
then:
// cache will start to proactively free slots & size calc is approximate
cacheInstance.approximateSize() > capacity - 4
when:
10.times {
id += 1
cacheProvider1.register("foo${id}", newVoid())
cacheProvider2.register("foo${id}", newVoid())
}
then:
// cache will start to proactively free slots & size calc is approximate
cacheInstance.approximateSize() > capacity - 4
}
static def newVoid() {
return new TypePool.Resolution.Simple(TypeDescription.VOID)
}
static def newClassLoader() {
return new SecureClassLoader(null) {};
}
static def newLocator() {
return new ClassFileLocator() {
@Override
ClassFileLocator.Resolution locate(String name) throws IOException {
return null
}
@Override
void close() throws IOException {}
}
}
}

View File

@ -1,102 +0,0 @@
package datadog.trace.agent.tooling
import datadog.trace.util.gc.GCUtils
import datadog.trace.util.test.DDSpecification
import net.bytebuddy.description.type.TypeDescription
import net.bytebuddy.pool.TypePool
import spock.lang.Timeout
import java.lang.ref.WeakReference
import java.util.concurrent.TimeUnit
import java.util.concurrent.atomic.AtomicReference
import static datadog.trace.agent.tooling.AgentTooling.CLEANER
@Timeout(5)
class EvictingCacheProviderTest extends DDSpecification {
def "test provider"() {
setup:
def provider = new DDCachingPoolStrategy.EvictingCacheProvider(CLEANER, 2, TimeUnit.MINUTES)
expect:
provider.size() == 0
provider.find(className) == null
when:
provider.register(className, new TypePool.Resolution.Simple(TypeDescription.VOID))
then:
provider.size() == 1
provider.find(className) == new TypePool.Resolution.Simple(TypeDescription.VOID)
when:
provider.clear()
then:
provider.size() == 0
provider.find(className) == null
where:
className = "SomeClass"
}
def "test timeout eviction"() {
setup:
def provider = new DDCachingPoolStrategy.EvictingCacheProvider(CLEANER, timeout, TimeUnit.MILLISECONDS)
def resolutionRef = new AtomicReference<TypePool.Resolution>(new TypePool.Resolution.Simple(TypeDescription.VOID))
def weakRef = new WeakReference(resolutionRef.get())
when:
def lastAccess = System.nanoTime()
provider.register(className, resolutionRef.get())
then:
// Ensure continued access prevents expiration.
for (int i = 0; i < timeout + 10; i++) {
assert TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - lastAccess) < timeout: "test took too long on " + i
assert provider.find(className) != null
assert provider.size() == 1
lastAccess = System.nanoTime()
Thread.sleep(1)
}
when:
Thread.sleep(timeout)
then:
provider.find(className) == null
when:
provider.register(className, resolutionRef.get())
resolutionRef.set(null)
GCUtils.awaitGC(weakRef)
then:
// Verify properly GC'd
provider.find(className) == null
weakRef.get() == null
where:
className = "SomeClass"
timeout = 500 // Takes about 50 ms locally, adding an order of magnitude for CI.
}
def "test size limit"() {
setup:
def provider = new DDCachingPoolStrategy.EvictingCacheProvider(CLEANER, 2, TimeUnit.MINUTES)
def typeDef = new TypePool.Resolution.Simple(TypeDescription.VOID)
for (int i = 0; i < 10000; i++) {
provider.register("ClassName$i", typeDef)
}
expect:
provider.size() == 5000
when:
provider.clear()
then:
provider.size() == 0
}
}