From 625febaeb4680da997d26a6d29c2a9f7c82cf660 Mon Sep 17 00:00:00 2001 From: Lauri Tulmin Date: Wed, 10 Nov 2021 23:43:03 +0200 Subject: [PATCH] Sql sanitizer: sanitize double quoted strings only in couchbase queries (#4615) * Sql sanitizer: sanitize double quotes strings only in couchbase queries * remove unused method * use AutoValue --- .../instrumentation/api/db/SqlDialect.java | 13 +++++++++ .../api/db/SqlStatementSanitizer.java | 23 +++++++++++++-- .../src/main/jflex/SqlSanitizer.jflex | 15 ++++++++-- .../api/db/SqlStatementSanitizerTest.groovy | 29 +++++++++++++------ .../v2_0/CouchbaseQuerySanitizer.java | 3 +- 5 files changed, 68 insertions(+), 15 deletions(-) create mode 100644 instrumentation-api/src/main/java/io/opentelemetry/instrumentation/api/db/SqlDialect.java diff --git a/instrumentation-api/src/main/java/io/opentelemetry/instrumentation/api/db/SqlDialect.java b/instrumentation-api/src/main/java/io/opentelemetry/instrumentation/api/db/SqlDialect.java new file mode 100644 index 0000000000..9fd640ded6 --- /dev/null +++ b/instrumentation-api/src/main/java/io/opentelemetry/instrumentation/api/db/SqlDialect.java @@ -0,0 +1,13 @@ +/* + * Copyright The OpenTelemetry Authors + * SPDX-License-Identifier: Apache-2.0 + */ + +package io.opentelemetry.instrumentation.api.db; + +/** Enumeration of sql dialects that are handled differently by {@link SqlStatementSanitizer}. */ +public enum SqlDialect { + DEFAULT, + // couchbase uses double quotes for string literals + COUCHBASE +} diff --git a/instrumentation-api/src/main/java/io/opentelemetry/instrumentation/api/db/SqlStatementSanitizer.java b/instrumentation-api/src/main/java/io/opentelemetry/instrumentation/api/db/SqlStatementSanitizer.java index 6d5696eba1..29c14c26f5 100644 --- a/instrumentation-api/src/main/java/io/opentelemetry/instrumentation/api/db/SqlStatementSanitizer.java +++ b/instrumentation-api/src/main/java/io/opentelemetry/instrumentation/api/db/SqlStatementSanitizer.java @@ -8,6 +8,7 @@ package io.opentelemetry.instrumentation.api.db; import static io.opentelemetry.instrumentation.api.db.StatementSanitizationConfig.isStatementSanitizationEnabled; import static io.opentelemetry.instrumentation.api.internal.SupportabilityMetrics.CounterNames.SQL_STATEMENT_SANITIZER_CACHE_MISS; +import com.google.auto.value.AutoValue; import io.opentelemetry.instrumentation.api.caching.Cache; import io.opentelemetry.instrumentation.api.internal.SupportabilityMetrics; import javax.annotation.Nullable; @@ -19,20 +20,36 @@ import javax.annotation.Nullable; public final class SqlStatementSanitizer { private static final SupportabilityMetrics supportability = SupportabilityMetrics.instance(); - private static final Cache sqlToStatementInfoCache = + private static final Cache sqlToStatementInfoCache = Cache.builder().setMaximumSize(1000).build(); public static SqlStatementInfo sanitize(@Nullable String statement) { + return sanitize(statement, SqlDialect.DEFAULT); + } + + public static SqlStatementInfo sanitize(@Nullable String statement, SqlDialect dialect) { if (!isStatementSanitizationEnabled() || statement == null) { return SqlStatementInfo.create(statement, null, null); } return sqlToStatementInfoCache.computeIfAbsent( - statement, + CacheKey.create(statement, dialect), k -> { supportability.incrementCounter(SQL_STATEMENT_SANITIZER_CACHE_MISS); - return AutoSqlSanitizer.sanitize(statement); + return AutoSqlSanitizer.sanitize(statement, dialect); }); } + @AutoValue + abstract static class CacheKey { + + static CacheKey create(String statement, SqlDialect dialect) { + return new AutoValue_SqlStatementSanitizer_CacheKey(statement, dialect); + } + + abstract String getStatement(); + + abstract SqlDialect getDialect(); + } + private SqlStatementSanitizer() {} } diff --git a/instrumentation-api/src/main/jflex/SqlSanitizer.jflex b/instrumentation-api/src/main/jflex/SqlSanitizer.jflex index 7596591079..3c3767498f 100644 --- a/instrumentation-api/src/main/jflex/SqlSanitizer.jflex +++ b/instrumentation-api/src/main/jflex/SqlSanitizer.jflex @@ -30,8 +30,9 @@ DOLLAR_QUOTED_STR = "$$" [^$]* "$$" WHITESPACE = [ \t\r\n]+ %{ - static SqlStatementInfo sanitize(String statement) { + static SqlStatementInfo sanitize(String statement, SqlDialect dialect) { AutoSqlSanitizer sanitizer = new AutoSqlSanitizer(new java.io.StringReader(statement)); + sanitizer.dialect = dialect; try { while (!sanitizer.yyatEOF()) { int token = sanitizer.yylex(); @@ -71,6 +72,7 @@ WHITESPACE = [ \t\r\n]+ private boolean insideComment = false; private Operation operation = NoOp.INSTANCE; private boolean extractionDone = false; + private SqlDialect dialect; private void setOperation(Operation operation) { if (this.operation == NoOp.INSTANCE) { @@ -361,11 +363,20 @@ WHITESPACE = [ \t\r\n]+ } // here is where the actual sanitization happens - {BASIC_NUM} | {HEX_NUM} | {QUOTED_STR} | {DOUBLE_QUOTED_STR} | {DOLLAR_QUOTED_STR} { + {BASIC_NUM} | {HEX_NUM} | {QUOTED_STR} | {DOLLAR_QUOTED_STR} { builder.append('?'); if (isOverLimit()) return YYEOF; } + {DOUBLE_QUOTED_STR} { + if (dialect == SqlDialect.COUCHBASE) { + builder.append('?'); + } else { + appendCurrentFragment(); + } + if (isOverLimit()) return YYEOF; + } + {WHITESPACE} { builder.append(' '); if (isOverLimit()) return YYEOF; diff --git a/instrumentation-api/src/test/groovy/io/opentelemetry/instrumentation/api/db/SqlStatementSanitizerTest.groovy b/instrumentation-api/src/test/groovy/io/opentelemetry/instrumentation/api/db/SqlStatementSanitizerTest.groovy index 991198e447..1010b8ee34 100644 --- a/instrumentation-api/src/test/groovy/io/opentelemetry/instrumentation/api/db/SqlStatementSanitizerTest.groovy +++ b/instrumentation-api/src/test/groovy/io/opentelemetry/instrumentation/api/db/SqlStatementSanitizerTest.groovy @@ -37,6 +37,7 @@ class SqlStatementSanitizerTest extends Specification { "+7" | "?" "SELECT 0x0af764" | "SELECT ?" "SELECT 0xdeadBEEF" | "SELECT ?" + "SELECT * FROM \"TABLE\"" | "SELECT * FROM \"TABLE\"" // Not numbers but could be confused as such "SELECT A + B" | "SELECT A + B" @@ -59,15 +60,6 @@ class SqlStatementSanitizerTest extends Specification { "SELECT * FROM TABLE WHERE FIELD = '\"\$\$\$\$\"'" | "SELECT * FROM TABLE WHERE FIELD = ?" "SELECT * FROM TABLE WHERE FIELD = 'a single \" doublequote inside'" | "SELECT * FROM TABLE WHERE FIELD = ?" - // Some databases support/encourage " instead of ' with same escape rules - "SELECT * FROM TABLE WHERE FIELD = \"\"" | "SELECT * FROM TABLE WHERE FIELD = ?" - "SELECT * FROM TABLE WHERE FIELD = \"words and spaces'\"" | "SELECT * FROM TABLE WHERE FIELD = ?" - "SELECT * FROM TABLE WHERE FIELD = \" an escaped \"\" quote mark inside\"" | "SELECT * FROM TABLE WHERE FIELD = ?" - "SELECT * FROM TABLE WHERE FIELD = \"\\\\\"" | "SELECT * FROM TABLE WHERE FIELD = ?" - "SELECT * FROM TABLE WHERE FIELD = \"'inside singles'\"" | "SELECT * FROM TABLE WHERE FIELD = ?" - "SELECT * FROM TABLE WHERE FIELD = \"'\$\$\$\$'\"" | "SELECT * FROM TABLE WHERE FIELD = ?" - "SELECT * FROM TABLE WHERE FIELD = \"a single ' singlequote inside\"" | "SELECT * FROM TABLE WHERE FIELD = ?" - // Some databases allow using dollar-quoted strings "SELECT * FROM TABLE WHERE FIELD = \$\$\$\$" | "SELECT * FROM TABLE WHERE FIELD = ?" "SELECT * FROM TABLE WHERE FIELD = \$\$words and spaces\$\$" | "SELECT * FROM TABLE WHERE FIELD = ?" @@ -85,6 +77,25 @@ class SqlStatementSanitizerTest extends Specification { "FROM TABLE WHERE FIELD=1234" | "FROM TABLE WHERE FIELD=?" } + def "normalize couchbase #originalSql"() { + setup: + def actualSanitized = SqlStatementSanitizer.sanitize(originalSql, SqlDialect.COUCHBASE) + + expect: + actualSanitized.getFullStatement() == sanitizedSql + + where: + originalSql | sanitizedSql + // Some databases support/encourage " instead of ' with same escape rules + "SELECT * FROM TABLE WHERE FIELD = \"\"" | "SELECT * FROM TABLE WHERE FIELD = ?" + "SELECT * FROM TABLE WHERE FIELD = \"words and spaces'\"" | "SELECT * FROM TABLE WHERE FIELD = ?" + "SELECT * FROM TABLE WHERE FIELD = \" an escaped \"\" quote mark inside\"" | "SELECT * FROM TABLE WHERE FIELD = ?" + "SELECT * FROM TABLE WHERE FIELD = \"\\\\\"" | "SELECT * FROM TABLE WHERE FIELD = ?" + "SELECT * FROM TABLE WHERE FIELD = \"'inside singles'\"" | "SELECT * FROM TABLE WHERE FIELD = ?" + "SELECT * FROM TABLE WHERE FIELD = \"'\$\$\$\$'\"" | "SELECT * FROM TABLE WHERE FIELD = ?" + "SELECT * FROM TABLE WHERE FIELD = \"a single ' singlequote inside\"" | "SELECT * FROM TABLE WHERE FIELD = ?" + } + @Unroll def "should simplify #sql"() { expect: diff --git a/instrumentation/couchbase/couchbase-2.0/javaagent/src/main/java/io/opentelemetry/javaagent/instrumentation/couchbase/v2_0/CouchbaseQuerySanitizer.java b/instrumentation/couchbase/couchbase-2.0/javaagent/src/main/java/io/opentelemetry/javaagent/instrumentation/couchbase/v2_0/CouchbaseQuerySanitizer.java index 23ddbe8088..10c6a533d1 100644 --- a/instrumentation/couchbase/couchbase-2.0/javaagent/src/main/java/io/opentelemetry/javaagent/instrumentation/couchbase/v2_0/CouchbaseQuerySanitizer.java +++ b/instrumentation/couchbase/couchbase-2.0/javaagent/src/main/java/io/opentelemetry/javaagent/instrumentation/couchbase/v2_0/CouchbaseQuerySanitizer.java @@ -5,6 +5,7 @@ package io.opentelemetry.javaagent.instrumentation.couchbase.v2_0; +import io.opentelemetry.instrumentation.api.db.SqlDialect; import io.opentelemetry.instrumentation.api.db.SqlStatementInfo; import io.opentelemetry.instrumentation.api.db.SqlStatementSanitizer; import java.lang.invoke.MethodHandle; @@ -115,7 +116,7 @@ public final class CouchbaseQuerySanitizer { } private static SqlStatementInfo sanitizeString(String query) { - return SqlStatementSanitizer.sanitize(query); + return SqlStatementSanitizer.sanitize(query, SqlDialect.COUCHBASE); } private CouchbaseQuerySanitizer() {}