Don't cache sanitization results for large sql statements (#13353)

This commit is contained in:
Lauri Tulmin 2025-04-10 22:12:55 +03:00 committed by GitHub
parent 95cc300125
commit 8cd11e4688
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 214 additions and 18 deletions

View File

@ -24,7 +24,9 @@ public interface DbClientCommonAttributesGetter<REQUEST, RESPONSE> {
@Deprecated
@Nullable
String getUser(REQUEST request);
default String getUser(REQUEST request) {
return null;
}
/**
* @deprecated Use {@link #getDbNamespace(Object)} instead.
@ -43,7 +45,9 @@ public interface DbClientCommonAttributesGetter<REQUEST, RESPONSE> {
@Deprecated
@Nullable
String getConnectionString(REQUEST request);
default String getConnectionString(REQUEST request) {
return null;
}
@Nullable
default String getResponseStatus(@Nullable RESPONSE response, @Nullable Throwable error) {

View File

@ -84,9 +84,6 @@ public abstract class DbClientSpanNameExtractor<REQUEST> implements SpanNameExtr
private static final class SqlClientSpanNameExtractor<REQUEST>
extends DbClientSpanNameExtractor<REQUEST> {
// a dedicated sanitizer just for extracting the operation and identifier name
private static final SqlStatementSanitizer sanitizer = SqlStatementSanitizer.create(true);
private final SqlClientAttributesGetter<REQUEST, ?> getter;
private SqlClientSpanNameExtractor(SqlClientAttributesGetter<REQUEST, ?> getter) {
@ -106,13 +103,15 @@ public abstract class DbClientSpanNameExtractor<REQUEST> implements SpanNameExtr
if (rawQueryTexts.size() > 1) { // for backcompat(?)
return computeSpanName(namespace, null, null);
}
SqlStatementInfo sanitizedStatement = sanitizer.sanitize(rawQueryTexts.iterator().next());
SqlStatementInfo sanitizedStatement =
SqlStatementSanitizerUtil.sanitize(rawQueryTexts.iterator().next());
return computeSpanName(
namespace, sanitizedStatement.getOperation(), sanitizedStatement.getMainIdentifier());
}
if (rawQueryTexts.size() == 1) {
SqlStatementInfo sanitizedStatement = sanitizer.sanitize(rawQueryTexts.iterator().next());
SqlStatementInfo sanitizedStatement =
SqlStatementSanitizerUtil.sanitize(rawQueryTexts.iterator().next());
String operation = sanitizedStatement.getOperation();
if (isBatch(request)) {
operation = "BATCH " + operation;

View File

@ -10,7 +10,6 @@ import java.util.LinkedHashSet;
import java.util.Set;
class MultiQuery {
private static final SqlStatementSanitizer sanitizer = SqlStatementSanitizer.create(true);
private final String mainIdentifier;
private final String operation;
@ -28,7 +27,7 @@ class MultiQuery {
UniqueValue uniqueOperation = new UniqueValue();
Set<String> uniqueStatements = new LinkedHashSet<>();
for (String rawQueryText : rawQueryTexts) {
SqlStatementInfo sanitizedStatement = sanitizer.sanitize(rawQueryText);
SqlStatementInfo sanitizedStatement = SqlStatementSanitizerUtil.sanitize(rawQueryText);
String mainIdentifier = sanitizedStatement.getMainIdentifier();
uniqueMainIdentifier.set(mainIdentifier);
String operation = sanitizedStatement.getOperation();

View File

@ -55,8 +55,6 @@ public final class SqlClientAttributesExtractor<REQUEST, RESPONSE>
}
private static final String SQL_CALL = "CALL";
// sanitizer is also used to extract operation and table name, so we have it always enabled here
private static final SqlStatementSanitizer sanitizer = SqlStatementSanitizer.create(true);
private final AttributeKey<String> oldSemconvTableAttribute;
private final boolean statementSanitizationEnabled;
@ -83,7 +81,7 @@ public final class SqlClientAttributesExtractor<REQUEST, RESPONSE>
if (SemconvStability.emitOldDatabaseSemconv()) {
if (rawQueryTexts.size() == 1) { // for backcompat(?)
String rawQueryText = rawQueryTexts.iterator().next();
SqlStatementInfo sanitizedStatement = sanitizer.sanitize(rawQueryText);
SqlStatementInfo sanitizedStatement = SqlStatementSanitizerUtil.sanitize(rawQueryText);
String operation = sanitizedStatement.getOperation();
internalSet(
attributes,
@ -104,7 +102,7 @@ public final class SqlClientAttributesExtractor<REQUEST, RESPONSE>
}
if (rawQueryTexts.size() == 1) {
String rawQueryText = rawQueryTexts.iterator().next();
SqlStatementInfo sanitizedStatement = sanitizer.sanitize(rawQueryText);
SqlStatementInfo sanitizedStatement = SqlStatementSanitizerUtil.sanitize(rawQueryText);
String operation = sanitizedStatement.getOperation();
internalSet(
attributes,

View File

@ -21,6 +21,7 @@ public final class SqlStatementSanitizer {
private static final Cache<CacheKey, SqlStatementInfo> sqlToStatementInfoCache =
Cache.bounded(1000);
private static final int LARGE_STATEMENT_THRESHOLD = 10 * 1024;
public static SqlStatementSanitizer create(boolean statementSanitizationEnabled) {
return new SqlStatementSanitizer(statementSanitizationEnabled);
@ -40,12 +41,24 @@ public final class SqlStatementSanitizer {
if (!statementSanitizationEnabled || statement == null) {
return SqlStatementInfo.create(statement, null, null);
}
// sanitization result will not be cached for statements larger than the threshold to avoid
// cache growing too large
// https://github.com/open-telemetry/opentelemetry-java-instrumentation/issues/13180
if (statement.length() > LARGE_STATEMENT_THRESHOLD) {
return sanitizeImpl(statement, dialect);
}
return sqlToStatementInfoCache.computeIfAbsent(
CacheKey.create(statement, dialect),
k -> {
supportability.incrementCounter(SQL_STATEMENT_SANITIZER_CACHE_MISS);
return AutoSqlSanitizer.sanitize(statement, dialect);
});
CacheKey.create(statement, dialect), k -> sanitizeImpl(statement, dialect));
}
private static SqlStatementInfo sanitizeImpl(@Nullable String statement, SqlDialect dialect) {
supportability.incrementCounter(SQL_STATEMENT_SANITIZER_CACHE_MISS);
return AutoSqlSanitizer.sanitize(statement, dialect);
}
// visible for tests
static boolean isCached(String statement) {
return sqlToStatementInfoCache.get(CacheKey.create(statement, SqlDialect.DEFAULT)) != null;
}
@AutoValue

View File

@ -0,0 +1,27 @@
/*
* Copyright The OpenTelemetry Authors
* SPDX-License-Identifier: Apache-2.0
*/
package io.opentelemetry.instrumentation.api.incubator.semconv.db;
import io.opentelemetry.instrumentation.api.instrumenter.Instrumenter;
import io.opentelemetry.instrumentation.api.internal.InstrumenterContext;
import java.util.HashMap;
import java.util.Map;
/**
* Helper class for sanitizing sql that keeps sanitization results in {@link InstrumenterContext} so
* that each statement would be sanitized only once for given {@link Instrumenter} call.
*/
class SqlStatementSanitizerUtil {
private static final SqlStatementSanitizer sanitizer = SqlStatementSanitizer.create(true);
static SqlStatementInfo sanitize(String queryText) {
Map<String, SqlStatementInfo> map =
InstrumenterContext.computeIfAbsent("sanitized-sql-map", unused -> new HashMap<>());
return map.computeIfAbsent(queryText, sanitizer::sanitize);
}
private SqlStatementSanitizerUtil() {}
}

View File

@ -137,6 +137,27 @@ public class SqlStatementSanitizerTest {
assertThat(sanitized).isEqualTo("select col from table where col in (?)");
}
@Test
public void largeStatementCached() {
// test that short statement is cached
String shortStatement = "SELECT * FROM TABLE WHERE FIELD = 1234";
String sanitizedShort =
SqlStatementSanitizer.create(true).sanitize(shortStatement).getFullStatement();
assertThat(sanitizedShort).doesNotContain("1234");
assertThat(SqlStatementSanitizer.isCached(shortStatement)).isTrue();
// test that large statement is not cached
StringBuffer s = new StringBuffer();
for (int i = 0; i < 10000; i++) {
s.append("SELECT * FROM TABLE WHERE FIELD = 1234 AND ");
}
String largeStatement = s.toString();
String sanitizedLarge =
SqlStatementSanitizer.create(true).sanitize(largeStatement).getFullStatement();
assertThat(sanitizedLarge).doesNotContain("1234");
assertThat(SqlStatementSanitizer.isCached(largeStatement)).isFalse();
}
static class SqlArgs implements ArgumentsProvider {
@Override

View File

@ -14,6 +14,7 @@ import io.opentelemetry.context.Context;
import io.opentelemetry.context.ContextKey;
import io.opentelemetry.instrumentation.api.internal.HttpRouteState;
import io.opentelemetry.instrumentation.api.internal.InstrumenterAccess;
import io.opentelemetry.instrumentation.api.internal.InstrumenterContext;
import io.opentelemetry.instrumentation.api.internal.InstrumenterUtil;
import io.opentelemetry.instrumentation.api.internal.SupportabilityMetrics;
import java.time.Instant;
@ -164,6 +165,14 @@ public class Instrumenter<REQUEST, RESPONSE> {
}
private Context doStart(Context parentContext, REQUEST request, @Nullable Instant startTime) {
try {
return doStartImpl(parentContext, request, startTime);
} finally {
InstrumenterContext.reset();
}
}
private Context doStartImpl(Context parentContext, REQUEST request, @Nullable Instant startTime) {
SpanKind spanKind = spanKindExtractor.extract(request);
SpanBuilder spanBuilder =
tracer.spanBuilder(spanNameExtractor.extract(request)).setSpanKind(spanKind);

View File

@ -0,0 +1,48 @@
/*
* Copyright The OpenTelemetry Authors
* SPDX-License-Identifier: Apache-2.0
*/
package io.opentelemetry.instrumentation.api.internal;
import io.opentelemetry.instrumentation.api.instrumenter.AttributesExtractor;
import io.opentelemetry.instrumentation.api.instrumenter.Instrumenter;
import io.opentelemetry.instrumentation.api.instrumenter.SpanNameExtractor;
import java.util.HashMap;
import java.util.Map;
import java.util.function.Function;
/**
* Helper class for sharing computed values between different {@link AttributesExtractor}s and
* {@link SpanNameExtractor} called in the start phase of the {@link Instrumenter}.
*
* <p>This class is internal and is hence not for public use. Its APIs are unstable and can change
* at any time.
*/
public final class InstrumenterContext {
private static final ThreadLocal<InstrumenterContext> instrumenterContext =
new ThreadLocal<InstrumenterContext>() {
@Override
protected InstrumenterContext initialValue() {
return new InstrumenterContext();
}
};
private final Map<String, Object> map = new HashMap<>();
private InstrumenterContext() {}
@SuppressWarnings("unchecked")
public static <T> T computeIfAbsent(String key, Function<String, T> function) {
return (T) get().computeIfAbsent(key, function);
}
// visible for testing
static Map<String, Object> get() {
return instrumenterContext.get().map;
}
public static void reset() {
instrumenterContext.remove();
}
}

View File

@ -0,0 +1,78 @@
/*
* Copyright The OpenTelemetry Authors
* SPDX-License-Identifier: Apache-2.0
*/
package io.opentelemetry.instrumentation.api.internal;
import static io.opentelemetry.instrumentation.testing.junit.db.SemconvStabilityUtil.maybeStable;
import static org.assertj.core.api.Assertions.assertThat;
import io.opentelemetry.api.common.Attributes;
import io.opentelemetry.api.common.AttributesBuilder;
import io.opentelemetry.context.Context;
import io.opentelemetry.instrumentation.api.incubator.semconv.db.DbClientSpanNameExtractor;
import io.opentelemetry.instrumentation.api.incubator.semconv.db.SqlClientAttributesExtractor;
import io.opentelemetry.instrumentation.api.incubator.semconv.db.SqlClientAttributesGetter;
import io.opentelemetry.instrumentation.api.incubator.semconv.db.SqlStatementInfo;
import io.opentelemetry.instrumentation.api.instrumenter.AttributesExtractor;
import io.opentelemetry.instrumentation.api.instrumenter.SpanNameExtractor;
import io.opentelemetry.instrumentation.testing.internal.AutoCleanupExtension;
import io.opentelemetry.semconv.incubating.DbIncubatingAttributes;
import java.util.Collection;
import java.util.Collections;
import java.util.Map;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.RegisterExtension;
class InstrumenterContextTest {
@RegisterExtension static final AutoCleanupExtension cleanup = AutoCleanupExtension.create();
@SuppressWarnings({"unchecked", "deprecation"}) // using deprecated DB_SQL_TABLE
@Test
void testSqlSanitizer() {
String testQuery = "SELECT name FROM test WHERE id = 1";
SqlClientAttributesGetter<Object, Void> getter =
new SqlClientAttributesGetter<Object, Void>() {
@Override
public Collection<String> getRawQueryTexts(Object request) {
return Collections.singletonList(testQuery);
}
};
SpanNameExtractor<Object> spanNameExtractor = DbClientSpanNameExtractor.create(getter);
AttributesExtractor<Object, Void> attributesExtractor =
SqlClientAttributesExtractor.create(getter);
InstrumenterContext.reset();
cleanup.deferCleanup(InstrumenterContext::reset);
assertThat(InstrumenterContext.get()).isEmpty();
assertThat(spanNameExtractor.extract(null)).isEqualTo("SELECT test");
// verify that sanitized statement was cached, see SqlStatementSanitizerUtil
assertThat(InstrumenterContext.get()).containsKey("sanitized-sql-map");
Map<String, SqlStatementInfo> sanitizedMap =
(Map<String, SqlStatementInfo>) InstrumenterContext.get().get("sanitized-sql-map");
assertThat(sanitizedMap).containsKey(testQuery);
// replace cached sanitization result to verify it is used
sanitizedMap.put(
testQuery,
SqlStatementInfo.create("SELECT name2 FROM test2 WHERE id = ?", "SELECT", "test2"));
{
AttributesBuilder builder = Attributes.builder();
attributesExtractor.onStart(builder, Context.root(), null);
assertThat(builder.build().get(maybeStable(DbIncubatingAttributes.DB_SQL_TABLE)))
.isEqualTo("test2");
}
// clear cached value to see whether it gets recomputed correctly
sanitizedMap.clear();
{
AttributesBuilder builder = Attributes.builder();
attributesExtractor.onStart(builder, Context.root(), null);
assertThat(builder.build().get(maybeStable(DbIncubatingAttributes.DB_SQL_TABLE)))
.isEqualTo("test");
}
}
}