From 2c33f0ace768a0d0a5eee61f7a104cc493879602 Mon Sep 17 00:00:00 2001 From: Nikolay Martynov Date: Wed, 19 Feb 2020 11:50:19 -0500 Subject: [PATCH] Add an option to force profiling right in the beginning of premain Currently this blows up JVMs before 14 --- .../java/datadog/trace/bootstrap/Agent.java | 51 ++++++++-------- .../profiling/controller/ProfilingSystem.java | 59 +++++++++++-------- .../controller/ProfilingSystemTest.java | 44 ++++++++++++-- .../profiling/agent/ProfilingAgent.java | 20 ++++++- .../main/java/datadog/trace/api/Config.java | 26 +++++--- .../datadog/trace/api/ConfigTest.groovy | 18 ++++-- 6 files changed, 149 insertions(+), 69 deletions(-) diff --git a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/Agent.java b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/Agent.java index fc6906a4bd..3e737c3618 100644 --- a/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/Agent.java +++ b/dd-java-agent/agent-bootstrap/src/main/java/datadog/trace/bootstrap/Agent.java @@ -11,7 +11,7 @@ import org.slf4j.LoggerFactory; /** * Agent start up logic. * - *

This class is loaded and called by {@code datadog.trace.agent.AgentBootstrap} + *

This class is loaded and called by {@code datadog.trace.bootstrap.AgentBootstrap} * *

The intention is for this class to be loaded by bootstrap classloader to make sure we have * unimpeded access to the rest of Datadog's agent parts. @@ -46,6 +46,9 @@ public class Agent { public static void start(final Instrumentation inst, final URL bootstrapURL) { createParentClassloader(bootstrapURL); + + startProfilingAgent(bootstrapURL, true); + startDatadogAgent(inst, bootstrapURL); final boolean appUsingCustomLogManager = isAppUsingCustomLogManager(); @@ -91,7 +94,7 @@ public class Agent { log.debug("Custom logger detected. Delaying Profiling Agent startup."); registerLogManagerCallback(new StartProfilingAgentCallback(inst, bootstrapURL)); } else { - startProfilingAgent(bootstrapURL); + startProfilingAgent(bootstrapURL, false); } } @@ -188,7 +191,7 @@ public class Agent { @Override public void execute() { - startProfilingAgent(bootstrapURL); + startProfilingAgent(bootstrapURL, false); } } @@ -275,29 +278,29 @@ public class Agent { } } - private static synchronized void startProfilingAgent(final URL bootstrapURL) { - if (PROFILING_CLASSLOADER == null) { - final ClassLoader contextLoader = Thread.currentThread().getContextClassLoader(); - try { - final ClassLoader profilingClassLoader = + private static synchronized void startProfilingAgent( + final URL bootstrapURL, final boolean early) { + final ClassLoader contextLoader = Thread.currentThread().getContextClassLoader(); + try { + if (PROFILING_CLASSLOADER == null) { + PROFILING_CLASSLOADER = createDatadogClassLoader("agent-profiling.isolated", bootstrapURL, PARENT_CLASSLOADER); - Thread.currentThread().setContextClassLoader(profilingClassLoader); - final Class profilingAgentClass = - profilingClassLoader.loadClass("com.datadog.profiling.agent.ProfilingAgent"); - final Method profilingInstallerMethod = profilingAgentClass.getMethod("run"); - profilingInstallerMethod.invoke(null); - PROFILING_CLASSLOADER = profilingClassLoader; - } catch (final ClassFormatError e) { - /* - Profiling is compiled for Java8. Loading it on Java7 results in ClassFormatError - (more specifically UnsupportedClassVersionError). Just ignore and continue when this happens. - */ - log.error("Cannot start profiling agent ", e); - } catch (final Throwable ex) { - log.error("Throwable thrown while starting profiling agent", ex); - } finally { - Thread.currentThread().setContextClassLoader(contextLoader); } + Thread.currentThread().setContextClassLoader(PROFILING_CLASSLOADER); + final Class profilingAgentClass = + PROFILING_CLASSLOADER.loadClass("com.datadog.profiling.agent.ProfilingAgent"); + final Method profilingInstallerMethod = profilingAgentClass.getMethod("run", Boolean.TYPE); + profilingInstallerMethod.invoke(null, early); + } catch (final ClassFormatError e) { + /* + Profiling is compiled for Java8. Loading it on Java7 results in ClassFormatError + (more specifically UnsupportedClassVersionError). Just ignore and continue when this happens. + */ + log.error("Cannot start profiling agent ", e); + } catch (final Throwable ex) { + log.error("Throwable thrown while starting profiling agent", ex); + } finally { + Thread.currentThread().setContextClassLoader(contextLoader); } } diff --git a/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/ProfilingSystem.java b/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/ProfilingSystem.java index 4d73cdf4ef..d6a95fa84d 100644 --- a/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/ProfilingSystem.java +++ b/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/ProfilingSystem.java @@ -38,6 +38,7 @@ public final class ProfilingSystem { private final Duration startupDelay; private final Duration uploadPeriod; + private final boolean forceEarly; private OngoingRecording recording; private boolean started = false; @@ -57,7 +58,8 @@ public final class ProfilingSystem { final RecordingDataListener dataListener, final Duration startupDelay, final Duration startupDelayRandomRange, - final Duration uploadPeriod) + final Duration uploadPeriod, + final boolean forceEarly) throws ConfigurationException { this( controller, @@ -65,6 +67,7 @@ public final class ProfilingSystem { startupDelay, startupDelayRandomRange, uploadPeriod, + forceEarly, Executors.newScheduledThreadPool( 1, new ProfilingThreadFactory("dd-profiler-recording-scheduler")), ThreadLocalRandom.current()); @@ -76,12 +79,14 @@ public final class ProfilingSystem { final Duration baseStartupDelay, final Duration startupDelayRandomRange, final Duration uploadPeriod, + final boolean forceEarly, final ScheduledExecutorService executorService, final ThreadLocalRandom threadLocalRandom) throws ConfigurationException { this.controller = controller; this.dataListener = dataListener; this.uploadPeriod = uploadPeriod; + this.forceEarly = forceEarly; this.executorService = executorService; if (baseStartupDelay.isNegative()) { @@ -103,31 +108,37 @@ public final class ProfilingSystem { public final void start() { log.info( - "Starting profiling system: startupDelay={}ms, uploadPeriod={}ms", + "Starting profiling system: startupDelay={}ms, uploadPeriod={}ms, forceEarly={}", startupDelay.toMillis(), - uploadPeriod.toMillis()); + uploadPeriod.toMillis(), + forceEarly); - // Delay JFR initialization. This code is run from 'premain' and there is a known bug in JVM - // which makes it crash if JFR is run before 'main' starts. - // See https://bugs.openjdk.java.net/browse/JDK-8227011 - executorService.schedule( - () -> { - try { - final Instant now = Instant.now(); - recording = controller.createRecording(RECORDING_NAME); - executorService.scheduleAtFixedRate( - new SnapshotRecording(now), - uploadPeriod.toMillis(), - uploadPeriod.toMillis(), - TimeUnit.MILLISECONDS); - started = true; - } catch (final Throwable t) { - log.error("Fatal exception during profiling startup", t); - throw t; - } - }, - startupDelay.toMillis(), - TimeUnit.MILLISECONDS); + if (forceEarly) { + startProfilingRecording(); + } else { + // Delay JFR initialization. This code is run from 'premain' and there is a known bug in JVM + // which makes it crash if JFR is run before 'main' starts. + // See https://bugs.openjdk.java.net/browse/JDK-8227011 and + // https://bugs.openjdk.java.net/browse/JDK-8233197. + executorService.schedule( + this::startProfilingRecording, startupDelay.toMillis(), TimeUnit.MILLISECONDS); + } + } + + private void startProfilingRecording() { + try { + final Instant now = Instant.now(); + recording = controller.createRecording(RECORDING_NAME); + executorService.scheduleAtFixedRate( + new SnapshotRecording(now), + uploadPeriod.toMillis(), + uploadPeriod.toMillis(), + TimeUnit.MILLISECONDS); + started = true; + } catch (final Throwable t) { + log.error("Fatal exception during profiling startup", t); + throw t; + } } /** Shuts down the profiling system. */ diff --git a/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/ProfilingSystemTest.java b/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/ProfilingSystemTest.java index 878c13808a..2aee5cd2af 100644 --- a/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/ProfilingSystemTest.java +++ b/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/ProfilingSystemTest.java @@ -87,6 +87,7 @@ public class ProfilingSystemTest { Duration.ofMillis(10), Duration.ZERO, Duration.ofMillis(300), + false, pool, threadLocalRandom); startProfilingSystem(system); @@ -106,6 +107,7 @@ public class ProfilingSystemTest { Duration.ofMillis(10), Duration.ZERO, Duration.ofMillis(300), + false, pool, threadLocalRandom); startProfilingSystem(system); @@ -116,6 +118,23 @@ public class ProfilingSystemTest { assertTrue(pool.isTerminated()); } + @Test + public void testForceEarlySTartup() throws ConfigurationException { + final ProfilingSystem system = + new ProfilingSystem( + controller, + listener, + Duration.ofMillis(10), + Duration.ZERO, + Duration.ofMillis(300), + true, + pool, + threadLocalRandom); + system.start(); + assertTrue(system.isStarted()); + verify(controller).createRecording(any()); + } + @Test public void testShutdownInterruption() throws ConfigurationException { final Thread mainThread = Thread.currentThread(); @@ -142,6 +161,7 @@ public class ProfilingSystemTest { Duration.ofMillis(10), Duration.ofMillis(5), Duration.ofMillis(100), + false, pool, threadLocalRandom); startProfilingSystem(system); @@ -160,6 +180,7 @@ public class ProfilingSystemTest { Duration.ofMillis(10), Duration.ofMillis(5), Duration.ofMillis(300), + false, pool, threadLocalRandom); system.shutdown(); @@ -174,7 +195,8 @@ public class ProfilingSystemTest { listener, Duration.ofMillis(10), Duration.ofMillis(5), - Duration.ofMillis(1)); + Duration.ofMillis(1), + false); Thread.sleep(50); system.shutdown(); verify(controller, never()).createRecording(any()); @@ -191,7 +213,8 @@ public class ProfilingSystemTest { listener, Duration.ofMillis(10), Duration.ofMillis(5), - Duration.ofMillis(10)); + Duration.ofMillis(10), + false); startProfilingSystem(system); Thread.sleep(200); system.shutdown(); @@ -204,7 +227,12 @@ public class ProfilingSystemTest { ConfigurationException.class, () -> { new ProfilingSystem( - controller, listener, Duration.ofMillis(-10), Duration.ZERO, Duration.ofMillis(200)); + controller, + listener, + Duration.ofMillis(-10), + Duration.ZERO, + Duration.ofMillis(200), + false); }); } @@ -218,7 +246,8 @@ public class ProfilingSystemTest { listener, Duration.ofMillis(10), Duration.ofMillis(-20), - Duration.ofMillis(200)); + Duration.ofMillis(200), + false); }); } @@ -232,7 +261,8 @@ public class ProfilingSystemTest { listener, Duration.ofMillis(10), Duration.ofMillis(20), - Duration.ofMillis(-200)); + Duration.ofMillis(-200), + false); }); } @@ -252,6 +282,7 @@ public class ProfilingSystemTest { Duration.ofMillis(10), Duration.ofMillis(5), uploadPeriod, + false, pool, threadLocalRandom); startProfilingSystem(system); @@ -279,6 +310,7 @@ public class ProfilingSystemTest { Duration.ofMillis(10), Duration.ofMillis(5), uploadPeriod, + false, pool, threadLocalRandom); startProfilingSystem(system); @@ -307,6 +339,7 @@ public class ProfilingSystemTest { startupDelay, startupDelayRandomRange, Duration.ofMillis(100), + false, pool, threadLocalRandom); @@ -326,6 +359,7 @@ public class ProfilingSystemTest { startupDelay, Duration.ZERO, Duration.ofMillis(100), + false, pool, threadLocalRandom); diff --git a/dd-java-agent/agent-profiling/src/main/java/com/datadog/profiling/agent/ProfilingAgent.java b/dd-java-agent/agent-profiling/src/main/java/com/datadog/profiling/agent/ProfilingAgent.java index e09f14b474..d4a878e994 100644 --- a/dd-java-agent/agent-profiling/src/main/java/com/datadog/profiling/agent/ProfilingAgent.java +++ b/dd-java-agent/agent-profiling/src/main/java/com/datadog/profiling/agent/ProfilingAgent.java @@ -17,9 +17,18 @@ public class ProfilingAgent { private static volatile ProfilingSystem PROFILER; - public static synchronized void run() throws IllegalArgumentException { + /** + * Main entry point into profiling Note: this must be reentrant because we may want to start + * profiling early, and then attempt to start it again at normal time + */ + public static synchronized void run(final boolean early) throws IllegalArgumentException { if (PROFILER == null) { final Config config = Config.get(); + if (early && !config.isProfilingStartForceEarly()) { + log.info("Profiling: not starting early"); + // early startup is disabled; + return; + } if (!config.isProfilingEnabled()) { log.info("Profiling: disabled"); return; @@ -34,7 +43,7 @@ public class ProfilingAgent { final RecordingUploader uploader = new RecordingUploader(config); - final Duration startupDelay = Duration.ofSeconds(config.getProfilingStartupDelay()); + final Duration startupDelay = Duration.ofSeconds(config.getProfilingStartDelay()); final Duration uploadPeriod = Duration.ofSeconds(config.getProfilingUploadPeriod()); // Randomize startup delay for up to one upload period. Consider having separate setting for @@ -43,7 +52,12 @@ public class ProfilingAgent { PROFILER = new ProfilingSystem( - controller, uploader::upload, startupDelay, startupDelayRandomRange, uploadPeriod); + controller, + uploader::upload, + startupDelay, + startupDelayRandomRange, + uploadPeriod, + config.isProfilingStartForceEarly()); PROFILER.start(); log.info("Profiling has started!"); diff --git a/dd-trace-api/src/main/java/datadog/trace/api/Config.java b/dd-trace-api/src/main/java/datadog/trace/api/Config.java index e52a915620..237703aaf7 100644 --- a/dd-trace-api/src/main/java/datadog/trace/api/Config.java +++ b/dd-trace-api/src/main/java/datadog/trace/api/Config.java @@ -113,7 +113,11 @@ public class Config { public static final String PROFILING_API_KEY_OLD = "profiling.apikey"; public static final String PROFILING_API_KEY_FILE_OLD = "profiling.apikey.file"; public static final String PROFILING_TAGS = "profiling.tags"; - public static final String PROFILING_STARTUP_DELAY = "profiling.start-delay"; + public static final String PROFILING_START_DELAY = "profiling.start-delay"; + // DANGEROUS! May lead on sigsegv on JVMs before 14 + // Not intended for production use + public static final String PROFILING_START_FORCE_EARLY = + "profiling.experimental.start-force-early"; public static final String PROFILING_UPLOAD_PERIOD = "profiling.upload.period"; public static final String PROFILING_TEMPLATE_OVERRIDE_FILE = "profiling.jfr-template-override-file"; @@ -171,7 +175,8 @@ public class Config { public static final boolean DEFAULT_PROFILING_ENABLED = false; public static final String DEFAULT_PROFILING_URL = "https://intake.profile.datadoghq.com/v1/input"; - public static final int DEFAULT_PROFILING_STARTUP_DELAY = 10; + public static final int DEFAULT_PROFILING_START_DELAY = 10; + public static final boolean DEFAULT_PROFILING_START_FORCE_EARLY = false; public static final int DEFAULT_PROFILING_UPLOAD_PERIOD = 60; // 1 min public static final int DEFAULT_PROFILING_UPLOAD_TIMEOUT = 30; // seconds public static final String DEFAULT_PROFILING_UPLOAD_COMPRESSION = "on"; @@ -267,7 +272,8 @@ public class Config { @Getter private final String profilingUrl; @Getter private final String profilingApiKey; private final Map profilingTags; - @Getter private final int profilingStartupDelay; + @Getter private final int profilingStartDelay; + @Getter private final boolean profilingStartForceEarly; @Getter private final int profilingUploadPeriod; @Getter private final String profilingTemplateOverrideFile; @Getter private final int profilingUploadTimeout; @@ -444,8 +450,11 @@ public class Config { profilingApiKey = tmpProfilingApiKey; profilingTags = getMapSettingFromEnvironment(PROFILING_TAGS, null); - profilingStartupDelay = - getIntegerSettingFromEnvironment(PROFILING_STARTUP_DELAY, DEFAULT_PROFILING_STARTUP_DELAY); + profilingStartDelay = + getIntegerSettingFromEnvironment(PROFILING_START_DELAY, DEFAULT_PROFILING_START_DELAY); + profilingStartForceEarly = + getBooleanSettingFromEnvironment( + PROFILING_START_FORCE_EARLY, DEFAULT_PROFILING_START_FORCE_EARLY); profilingUploadPeriod = getIntegerSettingFromEnvironment(PROFILING_UPLOAD_PERIOD, DEFAULT_PROFILING_UPLOAD_PERIOD); profilingTemplateOverrideFile = @@ -603,8 +612,11 @@ public class Config { profilingUrl = properties.getProperty(PROFILING_URL, parent.profilingUrl); profilingApiKey = properties.getProperty(PROFILING_API_KEY, parent.profilingApiKey); profilingTags = getPropertyMapValue(properties, PROFILING_TAGS, parent.profilingTags); - profilingStartupDelay = - getPropertyIntegerValue(properties, PROFILING_STARTUP_DELAY, parent.profilingStartupDelay); + profilingStartDelay = + getPropertyIntegerValue(properties, PROFILING_START_DELAY, parent.profilingStartDelay); + profilingStartForceEarly = + getPropertyBooleanValue( + properties, PROFILING_START_FORCE_EARLY, parent.profilingStartForceEarly); profilingUploadPeriod = getPropertyIntegerValue(properties, PROFILING_UPLOAD_PERIOD, parent.profilingUploadPeriod); profilingTemplateOverrideFile = diff --git a/dd-trace-api/src/test/groovy/datadog/trace/api/ConfigTest.groovy b/dd-trace-api/src/test/groovy/datadog/trace/api/ConfigTest.groovy index c42adc0ea4..da4fdcb0dd 100644 --- a/dd-trace-api/src/test/groovy/datadog/trace/api/ConfigTest.groovy +++ b/dd-trace-api/src/test/groovy/datadog/trace/api/ConfigTest.groovy @@ -40,7 +40,8 @@ import static datadog.trace.api.Config.PROFILING_PROXY_HOST import static datadog.trace.api.Config.PROFILING_PROXY_PASSWORD import static datadog.trace.api.Config.PROFILING_PROXY_PORT import static datadog.trace.api.Config.PROFILING_PROXY_USERNAME -import static datadog.trace.api.Config.PROFILING_STARTUP_DELAY +import static datadog.trace.api.Config.PROFILING_START_DELAY +import static datadog.trace.api.Config.PROFILING_START_FORCE_EARLY import static datadog.trace.api.Config.PROFILING_TAGS import static datadog.trace.api.Config.PROFILING_TEMPLATE_OVERRIDE_FILE import static datadog.trace.api.Config.PROFILING_UPLOAD_COMPRESSION @@ -135,7 +136,8 @@ class ConfigTest extends DDSpecification { config.profilingUrl == Config.DEFAULT_PROFILING_URL config.profilingApiKey == null config.mergedProfilingTags == [(HOST_TAG): config.getHostName(), (RUNTIME_ID_TAG): config.getRuntimeId(), (SERVICE_TAG): config.serviceName, (LANGUAGE_TAG_KEY): LANGUAGE_TAG_VALUE] - config.profilingStartupDelay == 10 + config.profilingStartDelay == 10 + config.profilingStartForceEarly == false config.profilingUploadPeriod == 60 config.profilingTemplateOverrideFile == null config.profilingUploadTimeout == 30 @@ -199,7 +201,8 @@ class ConfigTest extends DDSpecification { prop.setProperty(PROFILING_URL, "new url") prop.setProperty(PROFILING_API_KEY, "new api key") prop.setProperty(PROFILING_TAGS, "f:6,host:test-host") - prop.setProperty(PROFILING_STARTUP_DELAY, "1111") + prop.setProperty(PROFILING_START_DELAY, "1111") + prop.setProperty(PROFILING_START_FORCE_EARLY, "true") prop.setProperty(PROFILING_UPLOAD_PERIOD, "1112") prop.setProperty(PROFILING_TEMPLATE_OVERRIDE_FILE, "/path") prop.setProperty(PROFILING_UPLOAD_TIMEOUT, "1116") @@ -254,7 +257,8 @@ class ConfigTest extends DDSpecification { config.profilingUrl == "new url" config.profilingApiKey == "new api key" // we can still override via internal properties object config.mergedProfilingTags == [b: "2", f: "6", (HOST_TAG): "test-host", (RUNTIME_ID_TAG): config.getRuntimeId(), (SERVICE_TAG): config.serviceName, (LANGUAGE_TAG_KEY): LANGUAGE_TAG_VALUE] - config.profilingStartupDelay == 1111 + config.profilingStartDelay == 1111 + config.profilingStartForceEarly == true config.profilingUploadPeriod == 1112 config.profilingUploadCompression == "off" config.profilingTemplateOverrideFile == "/path" @@ -309,7 +313,8 @@ class ConfigTest extends DDSpecification { System.setProperty(PREFIX + PROFILING_URL, "new url") System.setProperty(PREFIX + PROFILING_API_KEY, "new api key") System.setProperty(PREFIX + PROFILING_TAGS, "f:6,host:test-host") - System.setProperty(PREFIX + PROFILING_STARTUP_DELAY, "1111") + System.setProperty(PREFIX + PROFILING_START_DELAY, "1111") + System.setProperty(PREFIX + PROFILING_START_FORCE_EARLY, "true") System.setProperty(PREFIX + PROFILING_UPLOAD_PERIOD, "1112") System.setProperty(PREFIX + PROFILING_TEMPLATE_OVERRIDE_FILE, "/path") System.setProperty(PREFIX + PROFILING_UPLOAD_TIMEOUT, "1116") @@ -364,7 +369,8 @@ class ConfigTest extends DDSpecification { config.profilingUrl == "new url" config.profilingApiKey == null // system properties cannot be used to provide a key config.mergedProfilingTags == [b: "2", f: "6", (HOST_TAG): "test-host", (RUNTIME_ID_TAG): config.getRuntimeId(), (SERVICE_TAG): config.serviceName, (LANGUAGE_TAG_KEY): LANGUAGE_TAG_VALUE] - config.profilingStartupDelay == 1111 + config.profilingStartDelay == 1111 + config.profilingStartForceEarly == true config.profilingUploadPeriod == 1112 config.profilingTemplateOverrideFile == "/path" config.profilingUploadTimeout == 1116