Compare commits

...

11 Commits
v0.2.0 ... main

Author SHA1 Message Date
Colin Walters e8557b235b Release 0.2.1
Signed-off-by: Colin Walters <walters@verbum.org>
2025-06-23 07:15:47 -04:00
Colin Walters aa908b29b3 lib: Avoid a panic with unhandled error types
Just did a quick check of our `unwrap()` usage and this one stood
out.

Signed-off-by: Colin Walters <walters@verbum.org>
2025-06-23 07:15:47 -04:00
Colin Walters 8110a04d03
tests: Also test arbitrary floating point (#6)
* tests: Also test arbitrary floating point

Signed-off-by: Colin Walters <walters@verbum.org>
Signed-off-by: Etienne Champetier <e.champetier@ateme.com>

Co-authored-by: Etienne Champetier <e.champetier@ateme.com>
2025-06-19 07:15:28 -04:00
Colin Walters de391b8df1
Merge pull request #5 from champtar/tests
Use CanonJsonSerialize in tests
2025-06-18 08:35:39 -04:00
Colin Walters 69a48fcc67 tests: Fix crosscheck with olpc_cjson and cjson
First, enhance arbitrary_json to take regex for key space, so we can
exclude e.g. control characters.

When we're dealing with ASCII_ALPHANUMERIC, we can assert exact
equivalency. Add a test case that (for cjson) also takes
arbitrary keys, but compares via serde_json::Value
(which automatically sorts) because we know the object ordering will be different.

Signed-off-by: Colin Walters <walters@verbum.org>
2025-06-18 08:25:03 -04:00
Etienne Champetier c65a8119a1 Use CanonJsonSerialize in tests
Signed-off-by: Etienne Champetier <e.champetier@ateme.com>
2025-06-18 05:00:47 -04:00
Etienne Champetier 728c62e007 ignore crosscheck_olpc_cjson
When the encode! macro is expended, it end up using
olpc_cjson::CanonicalFormatter instead of crate::CanonicalFormatter
olpc_cjson doesn't escape control char, so test is broken.

Also add crosscheck_cjson, this one fails because of ordering differences.

Signed-off-by: Etienne Champetier <e.champetier@ateme.com>
2025-06-18 05:00:47 -04:00
Etienne Champetier d58fab4daa
Merge pull request #4 from cgwalters/add-docs
Add more docs
2025-06-18 08:43:39 +03:00
Colin Walters ad8709f4a7 Add more docs
And `#![deny(missing_docs)]` by default.

Signed-off-by: Colin Walters <walters@verbum.org>
2025-06-17 18:15:13 -04:00
Colin Walters e0667e9589
Merge pull request #3 from containers/CanonJsonSerialize
Add CanonJsonSerialize trait
2025-06-17 18:03:55 -04:00
Etienne Champetier cc8729b414 Add CanonJsonSerialize trait
Signed-off-by: Etienne Champetier <e.champetier@ateme.com>
2025-06-17 17:42:45 -04:00
2 changed files with 106 additions and 33 deletions

View File

@ -1,6 +1,6 @@
[package] [package]
name = "canon-json" name = "canon-json"
version = "0.2.0" version = "0.2.1"
edition = "2021" edition = "2021"
description = "serde_json Formatter to serialize as RFC 8785 canonical JSON" description = "serde_json Formatter to serialize as RFC 8785 canonical JSON"
license = "MIT OR Apache-2.0" license = "MIT OR Apache-2.0"
@ -20,3 +20,6 @@ cap-std = "3.4.4"
sha2 = "0.10.9" sha2 = "0.10.9"
# For cross-integration testing # For cross-integration testing
olpc-cjson = "0.1" olpc-cjson = "0.1"
cjson = "0.1.2"
# For round trip testing
serde_json = { version = "1.0", features = ["float_roundtrip"] }

View File

@ -3,6 +3,7 @@
#![doc = include_str!("../README.md")] #![doc = include_str!("../README.md")]
#![forbid(unsafe_code)] #![forbid(unsafe_code)]
#![deny(missing_docs)]
mod floatformat; mod floatformat;
@ -12,7 +13,7 @@ use std::io::{Error, ErrorKind, Result, Write};
use serde::Serialize; use serde::Serialize;
use serde_json::ser::{CharEscape, CompactFormatter, Formatter, Serializer}; use serde_json::ser::{CharEscape, CompactFormatter, Formatter, Serializer};
/// A [`Formatter`] that produces canonical JSON. /// A [`Formatter`] that produces canonical (RFC 8785) JSON.
/// ///
/// See the [crate-level documentation](../index.html) for more detail. /// See the [crate-level documentation](../index.html) for more detail.
/// ///
@ -51,8 +52,11 @@ impl ObjectKey {
let val = serde_json::Value::String(s); let val = serde_json::Value::String(s);
let mut s = Serializer::new(w); let mut s = Serializer::new(w);
val.serialize(&mut s).map_err(|e| { val.serialize(&mut s).map_err(|e| {
let kind = e.io_error_kind().unwrap(); if let Some(kind) = e.io_error_kind() {
Error::new(kind, "I/O error") Error::new(kind, "I/O error")
} else {
Error::new(ErrorKind::Other, e.to_string())
}
}) })
} }
} }
@ -351,6 +355,42 @@ impl Formatter for CanonicalFormatter {
} }
} }
/// A helper trait to write canonical JSON.
pub trait CanonJsonSerialize {
/// Serialize the given data structure as JSON into the I/O stream.
fn to_canon_json_writer<W>(&self, writer: W) -> Result<()>
where
W: Write;
/// Serialize the given data structure as a JSON byte vector.
fn to_canon_json_vec(&self) -> Result<Vec<u8>>;
/// Serialize the given data structure as a String.
fn to_canon_json_string(&self) -> Result<String>;
}
impl<S> CanonJsonSerialize for S
where
S: Serialize,
{
fn to_canon_json_writer<W>(&self, writer: W) -> Result<()>
where
W: Write,
{
let mut ser = Serializer::with_formatter(writer, CanonicalFormatter::new());
Ok(self.serialize(&mut ser)?)
}
fn to_canon_json_vec(&self) -> Result<Vec<u8>> {
let mut buf = Vec::new();
self.to_canon_json_writer(&mut buf)?;
Ok(buf)
}
fn to_canon_json_string(&self) -> Result<String> {
String::from_utf8(self.to_canon_json_vec()?)
.map_err(|err| Error::new(ErrorKind::InvalidData, err))
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
@ -358,8 +398,7 @@ mod tests {
use std::{cmp::Ordering, io::Result}; use std::{cmp::Ordering, io::Result};
use proptest::prelude::*; use proptest::prelude::*;
use serde::Serialize; use serde_json::Number;
use serde_json::{Number, Serializer};
use sha2::{Digest, Sha256}; use sha2::{Digest, Sha256};
use similar_asserts::assert_eq; use similar_asserts::assert_eq;
@ -390,10 +429,7 @@ mod tests {
macro_rules! encode { macro_rules! encode {
($($tt:tt)+) => { ($($tt:tt)+) => {
(|v: serde_json::Value| -> Result<Vec<u8>> { (|v: serde_json::Value| -> Result<Vec<u8>> {
let mut buf = Vec::new(); v.to_canon_json_vec()
let mut ser = Serializer::with_formatter(&mut buf, CanonicalFormatter::new());
v.serialize(&mut ser)?;
Ok(buf)
})(serde_json::json!($($tt)+)) })(serde_json::json!($($tt)+))
}; };
} }
@ -501,10 +537,6 @@ mod tests {
i128: i128::MIN, i128: i128::MIN,
}; };
let mut buf = Vec::new();
let mut ser = Serializer::with_formatter(&mut buf, CanonicalFormatter::new());
value.serialize(&mut ser).unwrap();
let expected = [ let expected = [
123, 34, 105, 49, 50, 56, 34, 58, 45, 49, 55, 48, 49, 52, 49, 49, 56, 51, 52, 54, 48, 123, 34, 105, 49, 50, 56, 34, 58, 45, 49, 55, 48, 49, 52, 49, 49, 56, 51, 52, 54, 48,
52, 54, 57, 50, 51, 49, 55, 51, 49, 54, 56, 55, 51, 48, 51, 55, 49, 53, 56, 56, 52, 49, 52, 54, 57, 50, 51, 49, 55, 51, 49, 54, 56, 55, 51, 48, 51, 55, 49, 53, 56, 56, 52, 49,
@ -513,7 +545,7 @@ mod tests {
55, 54, 56, 50, 49, 49, 52, 53, 53, 125, 55, 54, 56, 50, 49, 49, 52, 53, 53, 125,
]; ];
assert_eq!(buf, expected); assert_eq!(value.to_canon_json_vec().unwrap(), expected);
} }
#[test] #[test]
@ -524,24 +556,39 @@ mod tests {
assert_eq!(&buf, &expected); assert_eq!(&buf, &expected);
} }
fn arbitrary_json() -> impl Strategy<Value = serde_json::Value> { /// As it says, generate arbitrary JSON. This is based on
/// https://proptest-rs.github.io/proptest/proptest/tutorial/recursive.html
///
/// We support controlling the regex for keys, and whether or not floating point values are emitted.
fn arbitrary_json(
keyspace: &'static str,
allow_fp: bool,
) -> impl Strategy<Value = serde_json::Value> {
use serde_json::Value; use serde_json::Value;
const S: &str = ".*";
let leaf = prop_oneof![ let leaf = prop_oneof![
Just(Value::Null), Just(Value::Null),
any::<u32>().prop_map(|v| Value::Number(Number::from_u128(v.into()).unwrap())), any::<f64>().prop_filter_map("valid f64 for JSON", move |v| {
let n = if allow_fp && v.fract() != 0.0 {
Number::from_f64(v).unwrap()
} else {
// Constrain to values clearly lower than
// the https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number/MAX_SAFE_INTEGER
Number::from_u128(v as u32 as u128).unwrap()
};
Some(Value::Number(n))
}),
any::<bool>().prop_map(Value::Bool), any::<bool>().prop_map(Value::Bool),
S.prop_map(Value::String), keyspace.prop_map(Value::String),
]; ];
leaf.prop_recursive( leaf.prop_recursive(
8, // 8 levels deep 8, // 8 levels deep
256, // Shoot for maximum size of 256 nodes 256, // Shoot for maximum size of 256 nodes
10, // We put up to 10 items per collection 10, // We put up to 10 items per collection
|inner| { move |inner| {
prop_oneof![ prop_oneof![
// Take the inner strategy and make the two recursive cases. // Take the inner strategy and make the two recursive cases.
prop::collection::vec(inner.clone(), 0..10).prop_map(Value::Array), prop::collection::vec(inner.clone(), 0..10).prop_map(Value::Array),
prop::collection::hash_map(S, inner, 0..10) prop::collection::hash_map(keyspace, inner, 0..10)
.prop_map(|v| { v.into_iter().collect() }), .prop_map(|v| { v.into_iter().collect() }),
] ]
}, },
@ -550,7 +597,7 @@ mod tests {
proptest! { proptest! {
#[test] #[test]
fn roundtrip_rfc8785(v in arbitrary_json()) { fn roundtrip_rfc8785(v in arbitrary_json(".*", true)) {
let buf = encode!(&v).unwrap(); let buf = encode!(&v).unwrap();
let v2: serde_json::Value = serde_json::from_slice(&buf) let v2: serde_json::Value = serde_json::from_slice(&buf)
.map_err(|e| format!("Failed to parse {v:?} -> {}: {e}", String::from_utf8_lossy(&buf))).unwrap(); .map_err(|e| format!("Failed to parse {v:?} -> {}: {e}", String::from_utf8_lossy(&buf))).unwrap();
@ -560,11 +607,7 @@ mod tests {
fn verify(input: &str, expected: &str) { fn verify(input: &str, expected: &str) {
let input: serde_json::Value = serde_json::from_str(input).unwrap(); let input: serde_json::Value = serde_json::from_str(input).unwrap();
let mut buf = Vec::new(); assert_eq!(expected, input.to_canon_json_string().unwrap());
let mut ser = Serializer::with_formatter(&mut buf, CanonicalFormatter::new());
input.serialize(&mut ser).unwrap();
let buf = String::from_utf8(buf).unwrap();
assert_eq!(expected, &buf);
} }
#[test] #[test]
@ -651,17 +694,44 @@ mod tests {
Ok(()) Ok(())
} }
// Regex that excludes basically everything except printable ASCII
// because we know that e.g. olpc-cjson bombs on control characters,
// and also because it does NFC orering that will cause non-equivalency
// for some whitespace etc.
const ASCII_ALPHANUMERIC: &str = r"[a-zA-Z0-9]*";
proptest! { proptest! {
// Verify strict equivalency with printable ASCII only keys
#[test] #[test]
fn crosscheck_olpc_cjson(v in arbitrary_json()) { fn crosscheck_olpc_cjson_ascii(v in arbitrary_json(ASCII_ALPHANUMERIC, false)) {
use olpc_cjson::CanonicalFormatter; let canon_json = String::from_utf8(encode!(&v).unwrap()).unwrap();
let mut olpc_cjson_serialized = Vec::new(); let mut olpc_cjson_serialized = Vec::new();
let mut ser = serde_json::Serializer::with_formatter(&mut olpc_cjson_serialized, CanonicalFormatter::new()); let mut ser = serde_json::Serializer::with_formatter(&mut olpc_cjson_serialized, olpc_cjson::CanonicalFormatter::new());
prop_assume!(v.serialize(&mut ser).is_ok()); v.serialize(&mut ser).unwrap();
assert_eq!(canon_json, String::from_utf8(olpc_cjson_serialized).unwrap());
}
}
proptest! {
// Verify strict equivalency with printable ASCII only keys
#[test]
fn crosscheck_cjson_ascii(v in arbitrary_json(ASCII_ALPHANUMERIC, false)) {
let canon_json = String::from_utf8(encode!(&v).unwrap()).unwrap();
let cjson = String::from_utf8(cjson::to_vec(&v).unwrap()).unwrap();
assert_eq!(canon_json, cjson);
}
// Verify equivalency (after sorting) with non-ASCII keys
#[test]
fn crosscheck_cjson(v in arbitrary_json(".*", false)) {
let buf = encode!(&v).unwrap(); let buf = encode!(&v).unwrap();
assert_eq!(buf, olpc_cjson_serialized); let self_reparsed = serde_json::from_slice::<serde_json::Value>(&buf).unwrap();
let buf = cjson::to_vec(&v).unwrap();
let cjson_reparsed = serde_json::from_slice::<serde_json::Value>(&buf).unwrap();
// As above with olpc-cjson, this relies on the fact that serde_json
// sorts object keys by default.
assert_eq!(self_reparsed, v);
assert_eq!(cjson_reparsed, v);
} }
} }
} }