Add an option to force profiling right in the beginning of premain

Currently this blows up JVMs before 14
This commit is contained in:
Nikolay Martynov 2020-02-19 11:50:19 -05:00
parent a26f08957f
commit 2c33f0ace7
6 changed files with 149 additions and 69 deletions

View File

@ -11,7 +11,7 @@ import org.slf4j.LoggerFactory;
/**
* Agent start up logic.
*
* <p>This class is loaded and called by {@code datadog.trace.agent.AgentBootstrap}
* <p>This class is loaded and called by {@code datadog.trace.bootstrap.AgentBootstrap}
*
* <p>The intention is for this class to be loaded by bootstrap classloader to make sure we have
* unimpeded access to the rest of Datadog's agent parts.
@ -46,6 +46,9 @@ public class Agent {
public static void start(final Instrumentation inst, final URL bootstrapURL) {
createParentClassloader(bootstrapURL);
startProfilingAgent(bootstrapURL, true);
startDatadogAgent(inst, bootstrapURL);
final boolean appUsingCustomLogManager = isAppUsingCustomLogManager();
@ -91,7 +94,7 @@ public class Agent {
log.debug("Custom logger detected. Delaying Profiling Agent startup.");
registerLogManagerCallback(new StartProfilingAgentCallback(inst, bootstrapURL));
} else {
startProfilingAgent(bootstrapURL);
startProfilingAgent(bootstrapURL, false);
}
}
@ -188,7 +191,7 @@ public class Agent {
@Override
public void execute() {
startProfilingAgent(bootstrapURL);
startProfilingAgent(bootstrapURL, false);
}
}
@ -275,29 +278,29 @@ public class Agent {
}
}
private static synchronized void startProfilingAgent(final URL bootstrapURL) {
if (PROFILING_CLASSLOADER == null) {
final ClassLoader contextLoader = Thread.currentThread().getContextClassLoader();
try {
final ClassLoader profilingClassLoader =
private static synchronized void startProfilingAgent(
final URL bootstrapURL, final boolean early) {
final ClassLoader contextLoader = Thread.currentThread().getContextClassLoader();
try {
if (PROFILING_CLASSLOADER == null) {
PROFILING_CLASSLOADER =
createDatadogClassLoader("agent-profiling.isolated", bootstrapURL, PARENT_CLASSLOADER);
Thread.currentThread().setContextClassLoader(profilingClassLoader);
final Class<?> profilingAgentClass =
profilingClassLoader.loadClass("com.datadog.profiling.agent.ProfilingAgent");
final Method profilingInstallerMethod = profilingAgentClass.getMethod("run");
profilingInstallerMethod.invoke(null);
PROFILING_CLASSLOADER = profilingClassLoader;
} catch (final ClassFormatError e) {
/*
Profiling is compiled for Java8. Loading it on Java7 results in ClassFormatError
(more specifically UnsupportedClassVersionError). Just ignore and continue when this happens.
*/
log.error("Cannot start profiling agent ", e);
} catch (final Throwable ex) {
log.error("Throwable thrown while starting profiling agent", ex);
} finally {
Thread.currentThread().setContextClassLoader(contextLoader);
}
Thread.currentThread().setContextClassLoader(PROFILING_CLASSLOADER);
final Class<?> profilingAgentClass =
PROFILING_CLASSLOADER.loadClass("com.datadog.profiling.agent.ProfilingAgent");
final Method profilingInstallerMethod = profilingAgentClass.getMethod("run", Boolean.TYPE);
profilingInstallerMethod.invoke(null, early);
} catch (final ClassFormatError e) {
/*
Profiling is compiled for Java8. Loading it on Java7 results in ClassFormatError
(more specifically UnsupportedClassVersionError). Just ignore and continue when this happens.
*/
log.error("Cannot start profiling agent ", e);
} catch (final Throwable ex) {
log.error("Throwable thrown while starting profiling agent", ex);
} finally {
Thread.currentThread().setContextClassLoader(contextLoader);
}
}

View File

@ -38,6 +38,7 @@ public final class ProfilingSystem {
private final Duration startupDelay;
private final Duration uploadPeriod;
private final boolean forceEarly;
private OngoingRecording recording;
private boolean started = false;
@ -57,7 +58,8 @@ public final class ProfilingSystem {
final RecordingDataListener dataListener,
final Duration startupDelay,
final Duration startupDelayRandomRange,
final Duration uploadPeriod)
final Duration uploadPeriod,
final boolean forceEarly)
throws ConfigurationException {
this(
controller,
@ -65,6 +67,7 @@ public final class ProfilingSystem {
startupDelay,
startupDelayRandomRange,
uploadPeriod,
forceEarly,
Executors.newScheduledThreadPool(
1, new ProfilingThreadFactory("dd-profiler-recording-scheduler")),
ThreadLocalRandom.current());
@ -76,12 +79,14 @@ public final class ProfilingSystem {
final Duration baseStartupDelay,
final Duration startupDelayRandomRange,
final Duration uploadPeriod,
final boolean forceEarly,
final ScheduledExecutorService executorService,
final ThreadLocalRandom threadLocalRandom)
throws ConfigurationException {
this.controller = controller;
this.dataListener = dataListener;
this.uploadPeriod = uploadPeriod;
this.forceEarly = forceEarly;
this.executorService = executorService;
if (baseStartupDelay.isNegative()) {
@ -103,31 +108,37 @@ public final class ProfilingSystem {
public final void start() {
log.info(
"Starting profiling system: startupDelay={}ms, uploadPeriod={}ms",
"Starting profiling system: startupDelay={}ms, uploadPeriod={}ms, forceEarly={}",
startupDelay.toMillis(),
uploadPeriod.toMillis());
uploadPeriod.toMillis(),
forceEarly);
// Delay JFR initialization. This code is run from 'premain' and there is a known bug in JVM
// which makes it crash if JFR is run before 'main' starts.
// See https://bugs.openjdk.java.net/browse/JDK-8227011
executorService.schedule(
() -> {
try {
final Instant now = Instant.now();
recording = controller.createRecording(RECORDING_NAME);
executorService.scheduleAtFixedRate(
new SnapshotRecording(now),
uploadPeriod.toMillis(),
uploadPeriod.toMillis(),
TimeUnit.MILLISECONDS);
started = true;
} catch (final Throwable t) {
log.error("Fatal exception during profiling startup", t);
throw t;
}
},
startupDelay.toMillis(),
TimeUnit.MILLISECONDS);
if (forceEarly) {
startProfilingRecording();
} else {
// Delay JFR initialization. This code is run from 'premain' and there is a known bug in JVM
// which makes it crash if JFR is run before 'main' starts.
// See https://bugs.openjdk.java.net/browse/JDK-8227011 and
// https://bugs.openjdk.java.net/browse/JDK-8233197.
executorService.schedule(
this::startProfilingRecording, startupDelay.toMillis(), TimeUnit.MILLISECONDS);
}
}
private void startProfilingRecording() {
try {
final Instant now = Instant.now();
recording = controller.createRecording(RECORDING_NAME);
executorService.scheduleAtFixedRate(
new SnapshotRecording(now),
uploadPeriod.toMillis(),
uploadPeriod.toMillis(),
TimeUnit.MILLISECONDS);
started = true;
} catch (final Throwable t) {
log.error("Fatal exception during profiling startup", t);
throw t;
}
}
/** Shuts down the profiling system. */

View File

@ -87,6 +87,7 @@ public class ProfilingSystemTest {
Duration.ofMillis(10),
Duration.ZERO,
Duration.ofMillis(300),
false,
pool,
threadLocalRandom);
startProfilingSystem(system);
@ -106,6 +107,7 @@ public class ProfilingSystemTest {
Duration.ofMillis(10),
Duration.ZERO,
Duration.ofMillis(300),
false,
pool,
threadLocalRandom);
startProfilingSystem(system);
@ -116,6 +118,23 @@ public class ProfilingSystemTest {
assertTrue(pool.isTerminated());
}
@Test
public void testForceEarlySTartup() throws ConfigurationException {
final ProfilingSystem system =
new ProfilingSystem(
controller,
listener,
Duration.ofMillis(10),
Duration.ZERO,
Duration.ofMillis(300),
true,
pool,
threadLocalRandom);
system.start();
assertTrue(system.isStarted());
verify(controller).createRecording(any());
}
@Test
public void testShutdownInterruption() throws ConfigurationException {
final Thread mainThread = Thread.currentThread();
@ -142,6 +161,7 @@ public class ProfilingSystemTest {
Duration.ofMillis(10),
Duration.ofMillis(5),
Duration.ofMillis(100),
false,
pool,
threadLocalRandom);
startProfilingSystem(system);
@ -160,6 +180,7 @@ public class ProfilingSystemTest {
Duration.ofMillis(10),
Duration.ofMillis(5),
Duration.ofMillis(300),
false,
pool,
threadLocalRandom);
system.shutdown();
@ -174,7 +195,8 @@ public class ProfilingSystemTest {
listener,
Duration.ofMillis(10),
Duration.ofMillis(5),
Duration.ofMillis(1));
Duration.ofMillis(1),
false);
Thread.sleep(50);
system.shutdown();
verify(controller, never()).createRecording(any());
@ -191,7 +213,8 @@ public class ProfilingSystemTest {
listener,
Duration.ofMillis(10),
Duration.ofMillis(5),
Duration.ofMillis(10));
Duration.ofMillis(10),
false);
startProfilingSystem(system);
Thread.sleep(200);
system.shutdown();
@ -204,7 +227,12 @@ public class ProfilingSystemTest {
ConfigurationException.class,
() -> {
new ProfilingSystem(
controller, listener, Duration.ofMillis(-10), Duration.ZERO, Duration.ofMillis(200));
controller,
listener,
Duration.ofMillis(-10),
Duration.ZERO,
Duration.ofMillis(200),
false);
});
}
@ -218,7 +246,8 @@ public class ProfilingSystemTest {
listener,
Duration.ofMillis(10),
Duration.ofMillis(-20),
Duration.ofMillis(200));
Duration.ofMillis(200),
false);
});
}
@ -232,7 +261,8 @@ public class ProfilingSystemTest {
listener,
Duration.ofMillis(10),
Duration.ofMillis(20),
Duration.ofMillis(-200));
Duration.ofMillis(-200),
false);
});
}
@ -252,6 +282,7 @@ public class ProfilingSystemTest {
Duration.ofMillis(10),
Duration.ofMillis(5),
uploadPeriod,
false,
pool,
threadLocalRandom);
startProfilingSystem(system);
@ -279,6 +310,7 @@ public class ProfilingSystemTest {
Duration.ofMillis(10),
Duration.ofMillis(5),
uploadPeriod,
false,
pool,
threadLocalRandom);
startProfilingSystem(system);
@ -307,6 +339,7 @@ public class ProfilingSystemTest {
startupDelay,
startupDelayRandomRange,
Duration.ofMillis(100),
false,
pool,
threadLocalRandom);
@ -326,6 +359,7 @@ public class ProfilingSystemTest {
startupDelay,
Duration.ZERO,
Duration.ofMillis(100),
false,
pool,
threadLocalRandom);

View File

@ -17,9 +17,18 @@ public class ProfilingAgent {
private static volatile ProfilingSystem PROFILER;
public static synchronized void run() throws IllegalArgumentException {
/**
* Main entry point into profiling Note: this must be reentrant because we may want to start
* profiling early, and then attempt to start it again at normal time
*/
public static synchronized void run(final boolean early) throws IllegalArgumentException {
if (PROFILER == null) {
final Config config = Config.get();
if (early && !config.isProfilingStartForceEarly()) {
log.info("Profiling: not starting early");
// early startup is disabled;
return;
}
if (!config.isProfilingEnabled()) {
log.info("Profiling: disabled");
return;
@ -34,7 +43,7 @@ public class ProfilingAgent {
final RecordingUploader uploader = new RecordingUploader(config);
final Duration startupDelay = Duration.ofSeconds(config.getProfilingStartupDelay());
final Duration startupDelay = Duration.ofSeconds(config.getProfilingStartDelay());
final Duration uploadPeriod = Duration.ofSeconds(config.getProfilingUploadPeriod());
// Randomize startup delay for up to one upload period. Consider having separate setting for
@ -43,7 +52,12 @@ public class ProfilingAgent {
PROFILER =
new ProfilingSystem(
controller, uploader::upload, startupDelay, startupDelayRandomRange, uploadPeriod);
controller,
uploader::upload,
startupDelay,
startupDelayRandomRange,
uploadPeriod,
config.isProfilingStartForceEarly());
PROFILER.start();
log.info("Profiling has started!");

View File

@ -113,7 +113,11 @@ public class Config {
public static final String PROFILING_API_KEY_OLD = "profiling.apikey";
public static final String PROFILING_API_KEY_FILE_OLD = "profiling.apikey.file";
public static final String PROFILING_TAGS = "profiling.tags";
public static final String PROFILING_STARTUP_DELAY = "profiling.start-delay";
public static final String PROFILING_START_DELAY = "profiling.start-delay";
// DANGEROUS! May lead on sigsegv on JVMs before 14
// Not intended for production use
public static final String PROFILING_START_FORCE_EARLY =
"profiling.experimental.start-force-early";
public static final String PROFILING_UPLOAD_PERIOD = "profiling.upload.period";
public static final String PROFILING_TEMPLATE_OVERRIDE_FILE =
"profiling.jfr-template-override-file";
@ -171,7 +175,8 @@ public class Config {
public static final boolean DEFAULT_PROFILING_ENABLED = false;
public static final String DEFAULT_PROFILING_URL =
"https://intake.profile.datadoghq.com/v1/input";
public static final int DEFAULT_PROFILING_STARTUP_DELAY = 10;
public static final int DEFAULT_PROFILING_START_DELAY = 10;
public static final boolean DEFAULT_PROFILING_START_FORCE_EARLY = false;
public static final int DEFAULT_PROFILING_UPLOAD_PERIOD = 60; // 1 min
public static final int DEFAULT_PROFILING_UPLOAD_TIMEOUT = 30; // seconds
public static final String DEFAULT_PROFILING_UPLOAD_COMPRESSION = "on";
@ -267,7 +272,8 @@ public class Config {
@Getter private final String profilingUrl;
@Getter private final String profilingApiKey;
private final Map<String, String> profilingTags;
@Getter private final int profilingStartupDelay;
@Getter private final int profilingStartDelay;
@Getter private final boolean profilingStartForceEarly;
@Getter private final int profilingUploadPeriod;
@Getter private final String profilingTemplateOverrideFile;
@Getter private final int profilingUploadTimeout;
@ -444,8 +450,11 @@ public class Config {
profilingApiKey = tmpProfilingApiKey;
profilingTags = getMapSettingFromEnvironment(PROFILING_TAGS, null);
profilingStartupDelay =
getIntegerSettingFromEnvironment(PROFILING_STARTUP_DELAY, DEFAULT_PROFILING_STARTUP_DELAY);
profilingStartDelay =
getIntegerSettingFromEnvironment(PROFILING_START_DELAY, DEFAULT_PROFILING_START_DELAY);
profilingStartForceEarly =
getBooleanSettingFromEnvironment(
PROFILING_START_FORCE_EARLY, DEFAULT_PROFILING_START_FORCE_EARLY);
profilingUploadPeriod =
getIntegerSettingFromEnvironment(PROFILING_UPLOAD_PERIOD, DEFAULT_PROFILING_UPLOAD_PERIOD);
profilingTemplateOverrideFile =
@ -603,8 +612,11 @@ public class Config {
profilingUrl = properties.getProperty(PROFILING_URL, parent.profilingUrl);
profilingApiKey = properties.getProperty(PROFILING_API_KEY, parent.profilingApiKey);
profilingTags = getPropertyMapValue(properties, PROFILING_TAGS, parent.profilingTags);
profilingStartupDelay =
getPropertyIntegerValue(properties, PROFILING_STARTUP_DELAY, parent.profilingStartupDelay);
profilingStartDelay =
getPropertyIntegerValue(properties, PROFILING_START_DELAY, parent.profilingStartDelay);
profilingStartForceEarly =
getPropertyBooleanValue(
properties, PROFILING_START_FORCE_EARLY, parent.profilingStartForceEarly);
profilingUploadPeriod =
getPropertyIntegerValue(properties, PROFILING_UPLOAD_PERIOD, parent.profilingUploadPeriod);
profilingTemplateOverrideFile =

View File

@ -40,7 +40,8 @@ import static datadog.trace.api.Config.PROFILING_PROXY_HOST
import static datadog.trace.api.Config.PROFILING_PROXY_PASSWORD
import static datadog.trace.api.Config.PROFILING_PROXY_PORT
import static datadog.trace.api.Config.PROFILING_PROXY_USERNAME
import static datadog.trace.api.Config.PROFILING_STARTUP_DELAY
import static datadog.trace.api.Config.PROFILING_START_DELAY
import static datadog.trace.api.Config.PROFILING_START_FORCE_EARLY
import static datadog.trace.api.Config.PROFILING_TAGS
import static datadog.trace.api.Config.PROFILING_TEMPLATE_OVERRIDE_FILE
import static datadog.trace.api.Config.PROFILING_UPLOAD_COMPRESSION
@ -135,7 +136,8 @@ class ConfigTest extends DDSpecification {
config.profilingUrl == Config.DEFAULT_PROFILING_URL
config.profilingApiKey == null
config.mergedProfilingTags == [(HOST_TAG): config.getHostName(), (RUNTIME_ID_TAG): config.getRuntimeId(), (SERVICE_TAG): config.serviceName, (LANGUAGE_TAG_KEY): LANGUAGE_TAG_VALUE]
config.profilingStartupDelay == 10
config.profilingStartDelay == 10
config.profilingStartForceEarly == false
config.profilingUploadPeriod == 60
config.profilingTemplateOverrideFile == null
config.profilingUploadTimeout == 30
@ -199,7 +201,8 @@ class ConfigTest extends DDSpecification {
prop.setProperty(PROFILING_URL, "new url")
prop.setProperty(PROFILING_API_KEY, "new api key")
prop.setProperty(PROFILING_TAGS, "f:6,host:test-host")
prop.setProperty(PROFILING_STARTUP_DELAY, "1111")
prop.setProperty(PROFILING_START_DELAY, "1111")
prop.setProperty(PROFILING_START_FORCE_EARLY, "true")
prop.setProperty(PROFILING_UPLOAD_PERIOD, "1112")
prop.setProperty(PROFILING_TEMPLATE_OVERRIDE_FILE, "/path")
prop.setProperty(PROFILING_UPLOAD_TIMEOUT, "1116")
@ -254,7 +257,8 @@ class ConfigTest extends DDSpecification {
config.profilingUrl == "new url"
config.profilingApiKey == "new api key" // we can still override via internal properties object
config.mergedProfilingTags == [b: "2", f: "6", (HOST_TAG): "test-host", (RUNTIME_ID_TAG): config.getRuntimeId(), (SERVICE_TAG): config.serviceName, (LANGUAGE_TAG_KEY): LANGUAGE_TAG_VALUE]
config.profilingStartupDelay == 1111
config.profilingStartDelay == 1111
config.profilingStartForceEarly == true
config.profilingUploadPeriod == 1112
config.profilingUploadCompression == "off"
config.profilingTemplateOverrideFile == "/path"
@ -309,7 +313,8 @@ class ConfigTest extends DDSpecification {
System.setProperty(PREFIX + PROFILING_URL, "new url")
System.setProperty(PREFIX + PROFILING_API_KEY, "new api key")
System.setProperty(PREFIX + PROFILING_TAGS, "f:6,host:test-host")
System.setProperty(PREFIX + PROFILING_STARTUP_DELAY, "1111")
System.setProperty(PREFIX + PROFILING_START_DELAY, "1111")
System.setProperty(PREFIX + PROFILING_START_FORCE_EARLY, "true")
System.setProperty(PREFIX + PROFILING_UPLOAD_PERIOD, "1112")
System.setProperty(PREFIX + PROFILING_TEMPLATE_OVERRIDE_FILE, "/path")
System.setProperty(PREFIX + PROFILING_UPLOAD_TIMEOUT, "1116")
@ -364,7 +369,8 @@ class ConfigTest extends DDSpecification {
config.profilingUrl == "new url"
config.profilingApiKey == null // system properties cannot be used to provide a key
config.mergedProfilingTags == [b: "2", f: "6", (HOST_TAG): "test-host", (RUNTIME_ID_TAG): config.getRuntimeId(), (SERVICE_TAG): config.serviceName, (LANGUAGE_TAG_KEY): LANGUAGE_TAG_VALUE]
config.profilingStartupDelay == 1111
config.profilingStartDelay == 1111
config.profilingStartForceEarly == true
config.profilingUploadPeriod == 1112
config.profilingTemplateOverrideFile == "/path"
config.profilingUploadTimeout == 1116