Implement validation for CloudEventAttributeType.String

Signed-off-by: Jon Skeet <jonskeet@google.com>
This commit is contained in:
Jon Skeet 2021-03-03 07:39:52 +00:00 committed by Jon Skeet
parent 8e56fe32b9
commit 6656b5c123
3 changed files with 79 additions and 14 deletions

View File

@ -118,7 +118,7 @@ namespace CloudNative.CloudEvents
{
throw new ArgumentException($"Text for attribute '{Name}' is invalid: {e.Message}", nameof(value), e);
}
return Validate(Type.Parse(text));
return Validate(value);
}
public string Format(object value) => Type.Format(Validate(value));

View File

@ -8,6 +8,8 @@ using System.Globalization;
namespace CloudNative.CloudEvents
{
// TODO: Expose the generic type? That might avoid boxing, and make various aspects more type-safe at compile time.
// TODO: Clarify validation requirements. At the moment I suspect we're validating more often than we need to.
// (This is just a little inefficient.)
/// <summary>
/// The type of an event attribute, providing simple formatting and parsing functionality.
@ -143,9 +145,68 @@ namespace CloudNative.CloudEvents
{
}
// TODO: Validation
// Note: these methods deliberately don't validate, to avoid repeated validation.
// The "owning attribute" already validates the value when parsing or formatting.
protected override string FormatImpl(string value) => value;
protected override string ParseImpl(string value) => value;
protected override void ValidateImpl(string value)
{
bool lastCharWasHighSurrogate = false;
for (int i = 0; i < value.Length; i++)
{
char c = value[i];
// Directly from the spec
if (c <= 0x1f || (c >= 0x7f && c <= 0x9f))
{
throw new ArgumentException($"Control character U+{(ushort)c:x4} is not permitted in string attributes");
}
// First two ranges in http://www.unicode.org/faq/private_use.html#noncharacters
if (c >= 0xfffe || (c >= 0xfdd0 && c <= 0xfdef))
{
throw new ArgumentException($"Noncharacter U+{(ushort)c:x4} is not permitted in string attributes");
}
// Handle surrogate pairs, based on this character and whether the last character was a high surrogate.
// Every high surrogate must be followed by a low surrogate, and every low surrogate must be preceded by a high surrogate.
// Confusingly, the "high surrogate" region [U+D800, U+DBFF] is lower in value than the "low surrogate" region [U+DC00, U+DFFF].
if (char.IsSurrogate(c))
{
if (char.IsHighSurrogate(c))
{
if (lastCharWasHighSurrogate)
{
throw new ArgumentException($"High surrogate character U+{(ushort)value[i - 1]:x4} must be followed by a low surrogate character");
}
lastCharWasHighSurrogate = true;
}
else
{
if (!lastCharWasHighSurrogate)
{
throw new ArgumentException($"Low surrogate character U+{(ushort)c:x4} must be preceded by a high surrogate character");
}
// Convert the surrogate pair to validate it's not a non-character.
// This is the third rule in http://www.unicode.org/faq/private_use.html#noncharacters
int utf32 = char.ConvertToUtf32(value[i - 1], c);
var last16Bits = utf32 & 0xffff;
if (last16Bits == 0xffff || last16Bits == 0xfffe)
{
throw new ArgumentException($"Noncharacter U+{utf32:x} is not permitted in string attributes");
}
lastCharWasHighSurrogate = false;
}
}
else if (lastCharWasHighSurrogate)
{
throw new ArgumentException($"High surrogate character U+{(ushort)value[i - 1]:x4} must be followed by a low surrogate character");
}
}
if (lastCharWasHighSurrogate)
{
throw new ArgumentException($"String must not end with high surrogate character U+{(ushort)value[value.Length - 1]:x4}");
}
}
}
private class TimestampType : GenericCloudEventsAttributeType<DateTimeOffset>

View File

@ -150,37 +150,41 @@ namespace CloudNative.CloudEvents.UnitTests
[InlineData("test")]
[InlineData("TEST")]
[InlineData("\U0001F600")]
[InlineData("x\U0001F600y")]
[InlineData("x\U0001F600y\U0001F600z")]
public void ParseAndFormat_Valid(string text)
{
var parseResult = (string) CloudEventAttributeType.String.Parse(text);
Assert.Equal(parseResult, text);
var formatResult = CloudEventAttributeType.String.Format(text);
Assert.Equal(text, formatResult);
CloudEventAttributeType.String.Validate(text);
}
[Theory(Skip = "String validation isn't implemented yet")]
[InlineData("\n")] // Control character
[InlineData("\ufdd0")] // Non-character
[Theory]
[InlineData("\n")] // Control character (first range)
[InlineData("\u007f")] // Control character (second range)
[InlineData("\ufdd0")] // Non-character (first range)
[InlineData("\ufffe")] // Non-character (second range)
[InlineData("\U0001FFFE")] // Non-character (surrogate range)
[InlineData("\U0010FFFE")] // Non-character (surrogate range)
public void InvalidCharacters(string text)
{
Assert.Throws<ArgumentException>(() => CloudEventAttributeType.String.Parse(text));
Assert.Throws<ArgumentException>(() => CloudEventAttributeType.String.Format(text));
Assert.Throws<ArgumentException>(() => CloudEventAttributeType.String.Validate(text));
}
// Note: these are specified separately as .NET string attributes are stored as UTF-8
// internally, which means you can't express invalid strings in attributes (and get them
// out again).
[Theory(Skip = "String validation isn't implemented yet")]
[InlineData(0xd800, 0x20)] // Low surrogate without following high
[InlineData(0xdc00, 0x20)] // High surrogate without preceding low
[InlineData(0xdc00, 0xd800)] // High surrogate followed by low
[InlineData(0x20, 0xd800)] // Low surrogate at end of string
[Theory]
[InlineData(0xd800, 0x20)] // High surrogate followed by non-surrogate
[InlineData(0xdc00, 0x20)] // Low surrogate at start of string
[InlineData(0x20, 0xdc00)] // Non-surrogate followed by low surrogate
[InlineData(0xd800, 0xd800)] // High surrogate followed by high surrogate
[InlineData(0x20, 0xd800)] // High surrogate at end of string
public void InvalidSurrogates(int first, int second)
{
string text = $"{(char)first}{(char)second}";
Assert.Throws<ArgumentException>(() => CloudEventAttributeType.String.Parse(text));
Assert.Throws<ArgumentException>(() => CloudEventAttributeType.String.Format(text));
Assert.Throws<ArgumentException>(() => CloudEventAttributeType.String.Validate(text));
}
}