diff --git a/src/CloudNative.CloudEvents/CloudEventAttribute.cs b/src/CloudNative.CloudEvents/CloudEventAttribute.cs index 49eb9dd..2a1e67f 100644 --- a/src/CloudNative.CloudEvents/CloudEventAttribute.cs +++ b/src/CloudNative.CloudEvents/CloudEventAttribute.cs @@ -118,7 +118,7 @@ namespace CloudNative.CloudEvents { throw new ArgumentException($"Text for attribute '{Name}' is invalid: {e.Message}", nameof(value), e); } - return Validate(Type.Parse(text)); + return Validate(value); } public string Format(object value) => Type.Format(Validate(value)); diff --git a/src/CloudNative.CloudEvents/CloudEventAttributeType.cs b/src/CloudNative.CloudEvents/CloudEventAttributeType.cs index 60317ec..ee151e6 100644 --- a/src/CloudNative.CloudEvents/CloudEventAttributeType.cs +++ b/src/CloudNative.CloudEvents/CloudEventAttributeType.cs @@ -8,6 +8,8 @@ using System.Globalization; namespace CloudNative.CloudEvents { // TODO: Expose the generic type? That might avoid boxing, and make various aspects more type-safe at compile time. + // TODO: Clarify validation requirements. At the moment I suspect we're validating more often than we need to. + // (This is just a little inefficient.) /// /// The type of an event attribute, providing simple formatting and parsing functionality. @@ -143,9 +145,68 @@ namespace CloudNative.CloudEvents { } - // TODO: Validation + // Note: these methods deliberately don't validate, to avoid repeated validation. + // The "owning attribute" already validates the value when parsing or formatting. protected override string FormatImpl(string value) => value; protected override string ParseImpl(string value) => value; + + protected override void ValidateImpl(string value) + { + bool lastCharWasHighSurrogate = false; + for (int i = 0; i < value.Length; i++) + { + char c = value[i]; + // Directly from the spec + if (c <= 0x1f || (c >= 0x7f && c <= 0x9f)) + { + throw new ArgumentException($"Control character U+{(ushort)c:x4} is not permitted in string attributes"); + } + // First two ranges in http://www.unicode.org/faq/private_use.html#noncharacters + if (c >= 0xfffe || (c >= 0xfdd0 && c <= 0xfdef)) + { + throw new ArgumentException($"Noncharacter U+{(ushort)c:x4} is not permitted in string attributes"); + } + + // Handle surrogate pairs, based on this character and whether the last character was a high surrogate. + // Every high surrogate must be followed by a low surrogate, and every low surrogate must be preceded by a high surrogate. + // Confusingly, the "high surrogate" region [U+D800, U+DBFF] is lower in value than the "low surrogate" region [U+DC00, U+DFFF]. + if (char.IsSurrogate(c)) + { + if (char.IsHighSurrogate(c)) + { + if (lastCharWasHighSurrogate) + { + throw new ArgumentException($"High surrogate character U+{(ushort)value[i - 1]:x4} must be followed by a low surrogate character"); + } + lastCharWasHighSurrogate = true; + } + else + { + if (!lastCharWasHighSurrogate) + { + throw new ArgumentException($"Low surrogate character U+{(ushort)c:x4} must be preceded by a high surrogate character"); + } + // Convert the surrogate pair to validate it's not a non-character. + // This is the third rule in http://www.unicode.org/faq/private_use.html#noncharacters + int utf32 = char.ConvertToUtf32(value[i - 1], c); + var last16Bits = utf32 & 0xffff; + if (last16Bits == 0xffff || last16Bits == 0xfffe) + { + throw new ArgumentException($"Noncharacter U+{utf32:x} is not permitted in string attributes"); + } + lastCharWasHighSurrogate = false; + } + } + else if (lastCharWasHighSurrogate) + { + throw new ArgumentException($"High surrogate character U+{(ushort)value[i - 1]:x4} must be followed by a low surrogate character"); + } + } + if (lastCharWasHighSurrogate) + { + throw new ArgumentException($"String must not end with high surrogate character U+{(ushort)value[value.Length - 1]:x4}"); + } + } } private class TimestampType : GenericCloudEventsAttributeType diff --git a/test/CloudNative.CloudEvents.UnitTests/CloudEventAttributeTypeTest.cs b/test/CloudNative.CloudEvents.UnitTests/CloudEventAttributeTypeTest.cs index 67a3332..a72d403 100644 --- a/test/CloudNative.CloudEvents.UnitTests/CloudEventAttributeTypeTest.cs +++ b/test/CloudNative.CloudEvents.UnitTests/CloudEventAttributeTypeTest.cs @@ -150,37 +150,41 @@ namespace CloudNative.CloudEvents.UnitTests [InlineData("test")] [InlineData("TEST")] [InlineData("\U0001F600")] + [InlineData("x\U0001F600y")] + [InlineData("x\U0001F600y\U0001F600z")] public void ParseAndFormat_Valid(string text) { var parseResult = (string) CloudEventAttributeType.String.Parse(text); Assert.Equal(parseResult, text); var formatResult = CloudEventAttributeType.String.Format(text); Assert.Equal(text, formatResult); + CloudEventAttributeType.String.Validate(text); } - [Theory(Skip = "String validation isn't implemented yet")] - [InlineData("\n")] // Control character - [InlineData("\ufdd0")] // Non-character + [Theory] + [InlineData("\n")] // Control character (first range) + [InlineData("\u007f")] // Control character (second range) + [InlineData("\ufdd0")] // Non-character (first range) + [InlineData("\ufffe")] // Non-character (second range) + [InlineData("\U0001FFFE")] // Non-character (surrogate range) + [InlineData("\U0010FFFE")] // Non-character (surrogate range) public void InvalidCharacters(string text) { - Assert.Throws(() => CloudEventAttributeType.String.Parse(text)); - Assert.Throws(() => CloudEventAttributeType.String.Format(text)); Assert.Throws(() => CloudEventAttributeType.String.Validate(text)); } // Note: these are specified separately as .NET string attributes are stored as UTF-8 // internally, which means you can't express invalid strings in attributes (and get them // out again). - [Theory(Skip = "String validation isn't implemented yet")] - [InlineData(0xd800, 0x20)] // Low surrogate without following high - [InlineData(0xdc00, 0x20)] // High surrogate without preceding low - [InlineData(0xdc00, 0xd800)] // High surrogate followed by low - [InlineData(0x20, 0xd800)] // Low surrogate at end of string + [Theory] + [InlineData(0xd800, 0x20)] // High surrogate followed by non-surrogate + [InlineData(0xdc00, 0x20)] // Low surrogate at start of string + [InlineData(0x20, 0xdc00)] // Non-surrogate followed by low surrogate + [InlineData(0xd800, 0xd800)] // High surrogate followed by high surrogate + [InlineData(0x20, 0xd800)] // High surrogate at end of string public void InvalidSurrogates(int first, int second) { string text = $"{(char)first}{(char)second}"; - Assert.Throws(() => CloudEventAttributeType.String.Parse(text)); - Assert.Throws(() => CloudEventAttributeType.String.Format(text)); Assert.Throws(() => CloudEventAttributeType.String.Validate(text)); } }