Improve command scrubber in MongoClientTracer (#1587) (#1663)

* Improve command scrubber in MongoClientTracer (#1587)

* Don't scrub the command field value at all if it's a string
* Use JsonWriter to improve efficiency of the scrubber
* If available, user JsonWriter.Builder.maxLength to limit size of the query string
This commit is contained in:
Jeff Yemin 2020-11-19 16:21:29 -05:00 committed by GitHub
parent dca64662f6
commit 65a6293714
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 266 additions and 42 deletions

View File

@ -167,7 +167,7 @@ class MongoClientTest extends MongoBaseTest {
collection.count() == 1
assertTraces(2) {
trace(0, 1) {
mongoSpan(it, 0, "update", collectionName, dbName, "{\"update\":\"?\",\"ordered\":\"?\",\"updates\":[{\"q\":{\"password\":\"?\"},\"u\":{\"\$set\":{\"password\":\"?\"}}}]}")
mongoSpan(it, 0, "update", collectionName, dbName, "{\"update\":\"$collectionName\",\"ordered\":\"?\",\"updates\":[{\"q\":{\"password\":\"?\"},\"u\":{\"\$set\":{\"password\":\"?\"}}}]}")
}
trace(1, 1) {
mongoSpan(it, 0, "count", collectionName, dbName, "{\"count\":\"$collectionName\",\"query\":{}}")
@ -199,7 +199,7 @@ class MongoClientTest extends MongoBaseTest {
collection.count() == 0
assertTraces(2) {
trace(0, 1) {
mongoSpan(it, 0, "delete", collectionName, dbName, "{\"delete\":\"?\",\"ordered\":\"?\",\"deletes\":[{\"q\":{\"password\":\"?\"},\"limit\":\"?\"}]}")
mongoSpan(it, 0, "delete", collectionName, dbName, "{\"delete\":\"$collectionName\",\"ordered\":\"?\",\"deletes\":[{\"q\":{\"password\":\"?\"},\"limit\":\"?\"}]}")
}
trace(1, 1) {
mongoSpan(it, 0, "count", collectionName, dbName, "{\"count\":\"$collectionName\",\"query\":{}}")

View File

@ -0,0 +1,71 @@
/*
* Copyright The OpenTelemetry Authors
* SPDX-License-Identifier: Apache-2.0
*/
import io.opentelemetry.javaagent.instrumentation.mongo.MongoClientTracer
import org.bson.BsonArray
import org.bson.BsonDocument
import org.bson.BsonInt32
import org.bson.BsonString
import spock.lang.Specification
import static java.util.Arrays.asList
class MongoClientTracerTest extends Specification {
def 'should normalize queries to json'() {
setup:
def tracer = new MongoClientTracer()
expect:
tracer.normalizeQuery(
new BsonDocument("cmd", new BsonInt32(1))) ==
'{ "cmd" : "?" }'
tracer.normalizeQuery(
new BsonDocument("cmd", new BsonInt32(1))
.append("sub", new BsonDocument("a", new BsonInt32(1)))) ==
'{ "cmd" : "?", "sub" : { "a" : "?" } }'
tracer.normalizeQuery(
new BsonDocument("cmd", new BsonInt32(1))
.append("sub", new BsonArray(asList(new BsonInt32(1))))) ==
'{ "cmd" : "?", "sub" : ["?"] }'
}
def 'should only preserve string value if it is the value of the first top-level key'() {
setup:
def tracer = new MongoClientTracer()
expect:
tracer.normalizeQuery(
new BsonDocument("cmd", new BsonString("c"))
.append("f", new BsonString("c"))
.append("sub", new BsonString("c"))) ==
'{ "cmd" : "c", "f" : "?", "sub" : "?" }'
}
def 'should truncate simple command'() {
setup:
def tracer = new MongoClientTracer(20)
expect:
tracer.normalizeQuery(
new BsonDocument("cmd", new BsonString("c"))
.append("f1", new BsonString("c1"))
.append("f2", new BsonString("c2"))) ==
'{ "cmd" : "c", "f1" '
}
def 'should truncate array'() {
setup:
def tracer = new MongoClientTracer(27)
expect:
tracer.normalizeQuery(
new BsonDocument("cmd", new BsonString("c"))
.append("f1", new BsonArray(asList(new BsonString("c1"), new BsonString("c2"))))
.append("f2", new BsonString("c3"))) ==
'{ "cmd" : "c", "f1" : ["?",'
}
}

View File

@ -177,7 +177,7 @@ class MongoClientTest extends MongoBaseTest {
collection.count() == 1
assertTraces(2) {
trace(0, 1) {
mongoSpan(it, 0, "update", collectionName, dbName, "{\"update\":\"?\",\"ordered\":\"?\",\"updates\":[{\"q\":{\"password\":\"?\"},\"u\":{\"\$set\":{\"password\":\"?\"}}}]}")
mongoSpan(it, 0, "update", collectionName, dbName, "{\"update\":\"$collectionName\",\"ordered\":\"?\",\"updates\":[{\"q\":{\"password\":\"?\"},\"u\":{\"\$set\":{\"password\":\"?\"}}}]}")
}
trace(1, 1) {
mongoSpan(it, 0, "count", collectionName, dbName, "{\"count\":\"$collectionName\",\"query\":{}}")
@ -209,7 +209,7 @@ class MongoClientTest extends MongoBaseTest {
collection.count() == 0
assertTraces(2) {
trace(0, 1) {
mongoSpan(it, 0, "delete", collectionName, dbName, "{\"delete\":\"?\",\"ordered\":\"?\",\"deletes\":[{\"q\":{\"password\":\"?\"},\"limit\":\"?\"}]}")
mongoSpan(it, 0, "delete", collectionName, dbName, "{\"delete\":\"$collectionName\",\"ordered\":\"?\",\"deletes\":[{\"q\":{\"password\":\"?\"},\"limit\":\"?\"}]}")
}
trace(1, 1) {
mongoSpan(it, 0, "count", collectionName, dbName, "{\"count\":\"$collectionName\",\"query\":{}}")

View File

@ -0,0 +1,71 @@
/*
* Copyright The OpenTelemetry Authors
* SPDX-License-Identifier: Apache-2.0
*/
import io.opentelemetry.javaagent.instrumentation.mongo.MongoClientTracer
import org.bson.BsonArray
import org.bson.BsonDocument
import org.bson.BsonInt32
import org.bson.BsonString
import spock.lang.Specification
import static java.util.Arrays.asList
class MongoClientTracerTest extends Specification {
def 'should normalize queries to json'() {
setup:
def tracer = new MongoClientTracer()
expect:
tracer.normalizeQuery(
new BsonDocument("cmd", new BsonInt32(1))) ==
'{ "cmd" : "?" }'
tracer.normalizeQuery(
new BsonDocument("cmd", new BsonInt32(1))
.append("sub", new BsonDocument("a", new BsonInt32(1)))) ==
'{ "cmd" : "?", "sub" : { "a" : "?" } }'
tracer.normalizeQuery(
new BsonDocument("cmd", new BsonInt32(1))
.append("sub", new BsonArray(asList(new BsonInt32(1))))) ==
'{ "cmd" : "?", "sub" : ["?"] }'
}
def 'should only preserve string value if it is the value of the first top-level key'() {
setup:
def tracer = new MongoClientTracer()
expect:
tracer.normalizeQuery(
new BsonDocument("cmd", new BsonString("c"))
.append("f", new BsonString("c"))
.append("sub", new BsonString("c"))) ==
'{ "cmd" : "c", "f" : "?", "sub" : "?" }'
}
def 'should truncate simple command'() {
setup:
def tracer = new MongoClientTracer(20)
expect:
tracer.normalizeQuery(
new BsonDocument("cmd", new BsonString("c"))
.append("f1", new BsonString("c1"))
.append("f2", new BsonString("c2"))) ==
'{ "cmd" : "c", "f1" '
}
def 'should truncate array'() {
setup:
def tracer = new MongoClientTracer(27)
expect:
tracer.normalizeQuery(
new BsonDocument("cmd", new BsonString("c"))
.append("f1", new BsonArray(asList(new BsonString("c1"), new BsonString("c2"))))
.append("f2", new BsonString("c3"))) ==
'{ "cmd" : "c", "f1" : ["?",'
}
}

View File

@ -222,7 +222,7 @@ class MongoAsyncClientTest extends MongoBaseTest {
assertTraces(2) {
trace(0, 1) {
mongoSpan(it, 0, "update", collectionName, dbName) {
assert it.replaceAll(" ", "") == "{\"update\":\"?\",\"ordered\":\"?\",\"updates\":[{\"q\":{\"password\":\"?\"},\"u\":{\"\$set\":{\"password\":\"?\"}}}]}" ||
assert it.replaceAll(" ", "") == "{\"update\":\"$collectionName\",\"ordered\":\"?\",\"updates\":[{\"q\":{\"password\":\"?\"},\"u\":{\"\$set\":{\"password\":\"?\"}}}]}" ||
it == "{\"update\": \"?\", \"ordered\": \"?\", \"\$db\": \"?\", \"updates\": [{\"q\": {\"password\": \"?\"}, \"u\": {\"\$set\": {\"password\": \"?\"}}}]}"
true
}
@ -271,7 +271,7 @@ class MongoAsyncClientTest extends MongoBaseTest {
assertTraces(2) {
trace(0, 1) {
mongoSpan(it, 0, "delete", collectionName, dbName) {
assert it.replaceAll(" ", "") == "{\"delete\":\"?\",\"ordered\":\"?\",\"deletes\":[{\"q\":{\"password\":\"?\"},\"limit\":\"?\"}]}" ||
assert it.replaceAll(" ", "") == "{\"delete\":\"$collectionName\",\"ordered\":\"?\",\"deletes\":[{\"q\":{\"password\":\"?\"},\"limit\":\"?\"}]}" ||
it == "{\"delete\": \"?\", \"ordered\": \"?\", \"\$db\": \"?\", \"deletes\": [{\"q\": {\"password\": \"?\"}, \"limit\": \"?\"}]}"
true
}

View File

@ -12,18 +12,34 @@ import io.opentelemetry.api.trace.Span;
import io.opentelemetry.api.trace.attributes.SemanticAttributes;
import io.opentelemetry.instrumentation.api.tracer.DatabaseClientTracer;
import io.opentelemetry.javaagent.instrumentation.api.db.DbSystem;
import java.io.StringWriter;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.net.InetSocketAddress;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import org.bson.BsonArray;
import org.bson.BsonDocument;
import org.bson.BsonString;
import org.bson.BsonValue;
import org.bson.json.JsonWriter;
import org.bson.json.JsonWriterSettings;
public class MongoClientTracer extends DatabaseClientTracer<CommandStartedEvent, BsonDocument> {
private static final MongoClientTracer TRACER = new MongoClientTracer();
private final int maxNormalizedQueryLength;
private final JsonWriterSettings jsonWriterSettings;
public MongoClientTracer() {
this(32 * 1024);
}
public MongoClientTracer(int maxNormalizedQueryLength) {
this.maxNormalizedQueryLength = maxNormalizedQueryLength;
this.jsonWriterSettings = createJsonWriterSettings(maxNormalizedQueryLength);
}
public static MongoClientTracer tracer() {
return TRACER;
}
@ -80,54 +96,120 @@ public class MongoClientTracer extends DatabaseClientTracer<CommandStartedEvent,
return null;
}
@Override
public String normalizeQuery(BsonDocument statement) {
// scrub the Mongo command so that parameters are removed from the string
BsonDocument scrubbed = scrub(statement);
return scrubbed.toString();
private static final Method IS_TRUNCATED_METHOD;
static {
IS_TRUNCATED_METHOD =
Arrays.stream(JsonWriter.class.getMethods())
.filter(method -> method.getName().equals("isTruncated"))
.findFirst()
.orElse(null);
}
/**
* The values of these mongo fields will not be scrubbed out. This allows the non-sensitive
* collection names to be captured.
*/
private static final List<String> UNSCRUBBED_FIELDS =
Arrays.asList("ordered", "insert", "count", "find", "create");
private JsonWriterSettings createJsonWriterSettings(int maxNormalizedQueryLength) {
JsonWriterSettings settings = new JsonWriterSettings(false);
try {
// The static JsonWriterSettings.builder() method was introduced in the 3.5 release
Optional<Method> buildMethod =
Arrays.stream(JsonWriterSettings.class.getMethods())
.filter(method -> method.getName().equals("builder"))
.findFirst();
if (buildMethod.isPresent()) {
Class<?> builderClass = buildMethod.get().getReturnType();
Object builder = buildMethod.get().invoke(null, (Object[]) null);
private static final BsonValue HIDDEN_CHAR = new BsonString("?");
// The JsonWriterSettings.Builder.indent method was introduced in the 3.5 release,
// but checking anyway
Optional<Method> indentMethod =
Arrays.stream(builderClass.getMethods())
.filter(method -> method.getName().equals("indent"))
.findFirst();
if (indentMethod.isPresent()) {
indentMethod.get().invoke(builder, false);
}
private static BsonDocument scrub(BsonDocument origin) {
BsonDocument scrub = new BsonDocument();
// The JsonWriterSettings.Builder.maxLength method was introduced in the 3.7 release
Optional<Method> maxLengthMethod =
Arrays.stream(builderClass.getMethods())
.filter(method -> method.getName().equals("maxLength"))
.findFirst();
if (maxLengthMethod.isPresent()) {
maxLengthMethod.get().invoke(builder, maxNormalizedQueryLength);
}
settings =
(JsonWriterSettings)
builderClass.getMethod("build", (Class<?>[]) null).invoke(builder, (Object[]) null);
}
} catch (IllegalAccessException | InvocationTargetException | NoSuchMethodException ignored) {
}
return settings;
}
@Override
public String normalizeQuery(BsonDocument command) {
StringWriter stringWriter = new StringWriter(128);
writeScrubbed(command, new JsonWriter(stringWriter, jsonWriterSettings), true);
// If using MongoDB driver >= 3.7, the substring invocation will be a no-op due to use of
// JsonWriterSettings.Builder.maxLength in the static initializer for JSON_WRITER_SETTINGS
return stringWriter
.getBuffer()
.substring(0, Math.min(maxNormalizedQueryLength, stringWriter.getBuffer().length()));
}
private static final String HIDDEN_CHAR = "?";
private static boolean writeScrubbed(BsonDocument origin, JsonWriter writer, boolean isRoot) {
writer.writeStartDocument();
boolean firstField = true;
for (Map.Entry<String, BsonValue> entry : origin.entrySet()) {
if (UNSCRUBBED_FIELDS.contains(entry.getKey()) && entry.getValue().isString()) {
scrub.put(entry.getKey(), entry.getValue());
writer.writeName(entry.getKey());
// the first field of the root document is the command name, so we preserve its value
// (which for most CRUD commands is the collection name)
if (isRoot && firstField && entry.getValue().isString()) {
writer.writeString(entry.getValue().asString().getValue());
} else {
BsonValue child = scrub(entry.getValue());
scrub.put(entry.getKey(), child);
if (writeScrubbed(entry.getValue(), writer)) {
return true;
}
}
firstField = false;
}
writer.writeEndDocument();
return false;
}
private static boolean writeScrubbed(BsonArray origin, JsonWriter writer) {
writer.writeStartArray();
for (BsonValue value : origin) {
if (writeScrubbed(value, writer)) {
return true;
}
}
return scrub;
writer.writeEndArray();
return false;
}
private static BsonValue scrub(BsonArray origin) {
BsonArray scrub = new BsonArray();
for (BsonValue value : origin) {
BsonValue child = scrub(value);
scrub.add(child);
}
return scrub;
}
private static BsonValue scrub(BsonValue origin) {
BsonValue scrubbed;
private static boolean writeScrubbed(BsonValue origin, JsonWriter writer) {
if (origin.isDocument()) {
scrubbed = scrub(origin.asDocument());
return writeScrubbed(origin.asDocument(), writer, false);
} else if (origin.isArray()) {
scrubbed = scrub(origin.asArray());
return writeScrubbed(origin.asArray(), writer);
} else {
scrubbed = HIDDEN_CHAR;
writer.writeString(HIDDEN_CHAR);
return isTruncated(writer);
}
}
private static boolean isTruncated(JsonWriter writer) {
if (IS_TRUNCATED_METHOD == null) {
return false;
} else {
try {
return (boolean) IS_TRUNCATED_METHOD.invoke(writer, (Object[]) null);
} catch (IllegalAccessException | InvocationTargetException ignored) {
return false;
}
}
return scrubbed;
}
private static String collectionName(CommandStartedEvent event) {