Compare commits

...

11 Commits
v0.2.0 ... main

Author SHA1 Message Date
Colin Walters e8557b235b Release 0.2.1
Signed-off-by: Colin Walters <walters@verbum.org>
2025-06-23 07:15:47 -04:00
Colin Walters aa908b29b3 lib: Avoid a panic with unhandled error types
Just did a quick check of our `unwrap()` usage and this one stood
out.

Signed-off-by: Colin Walters <walters@verbum.org>
2025-06-23 07:15:47 -04:00
Colin Walters 8110a04d03
tests: Also test arbitrary floating point (#6)
* tests: Also test arbitrary floating point

Signed-off-by: Colin Walters <walters@verbum.org>
Signed-off-by: Etienne Champetier <e.champetier@ateme.com>

Co-authored-by: Etienne Champetier <e.champetier@ateme.com>
2025-06-19 07:15:28 -04:00
Colin Walters de391b8df1
Merge pull request #5 from champtar/tests
Use CanonJsonSerialize in tests
2025-06-18 08:35:39 -04:00
Colin Walters 69a48fcc67 tests: Fix crosscheck with olpc_cjson and cjson
First, enhance arbitrary_json to take regex for key space, so we can
exclude e.g. control characters.

When we're dealing with ASCII_ALPHANUMERIC, we can assert exact
equivalency. Add a test case that (for cjson) also takes
arbitrary keys, but compares via serde_json::Value
(which automatically sorts) because we know the object ordering will be different.

Signed-off-by: Colin Walters <walters@verbum.org>
2025-06-18 08:25:03 -04:00
Etienne Champetier c65a8119a1 Use CanonJsonSerialize in tests
Signed-off-by: Etienne Champetier <e.champetier@ateme.com>
2025-06-18 05:00:47 -04:00
Etienne Champetier 728c62e007 ignore crosscheck_olpc_cjson
When the encode! macro is expended, it end up using
olpc_cjson::CanonicalFormatter instead of crate::CanonicalFormatter
olpc_cjson doesn't escape control char, so test is broken.

Also add crosscheck_cjson, this one fails because of ordering differences.

Signed-off-by: Etienne Champetier <e.champetier@ateme.com>
2025-06-18 05:00:47 -04:00
Etienne Champetier d58fab4daa
Merge pull request #4 from cgwalters/add-docs
Add more docs
2025-06-18 08:43:39 +03:00
Colin Walters ad8709f4a7 Add more docs
And `#![deny(missing_docs)]` by default.

Signed-off-by: Colin Walters <walters@verbum.org>
2025-06-17 18:15:13 -04:00
Colin Walters e0667e9589
Merge pull request #3 from containers/CanonJsonSerialize
Add CanonJsonSerialize trait
2025-06-17 18:03:55 -04:00
Etienne Champetier cc8729b414 Add CanonJsonSerialize trait
Signed-off-by: Etienne Champetier <e.champetier@ateme.com>
2025-06-17 17:42:45 -04:00
2 changed files with 106 additions and 33 deletions

View File

@ -1,6 +1,6 @@
[package]
name = "canon-json"
version = "0.2.0"
version = "0.2.1"
edition = "2021"
description = "serde_json Formatter to serialize as RFC 8785 canonical JSON"
license = "MIT OR Apache-2.0"
@ -20,3 +20,6 @@ cap-std = "3.4.4"
sha2 = "0.10.9"
# For cross-integration testing
olpc-cjson = "0.1"
cjson = "0.1.2"
# For round trip testing
serde_json = { version = "1.0", features = ["float_roundtrip"] }

View File

@ -3,6 +3,7 @@
#![doc = include_str!("../README.md")]
#![forbid(unsafe_code)]
#![deny(missing_docs)]
mod floatformat;
@ -12,7 +13,7 @@ use std::io::{Error, ErrorKind, Result, Write};
use serde::Serialize;
use serde_json::ser::{CharEscape, CompactFormatter, Formatter, Serializer};
/// A [`Formatter`] that produces canonical JSON.
/// A [`Formatter`] that produces canonical (RFC 8785) JSON.
///
/// See the [crate-level documentation](../index.html) for more detail.
///
@ -51,8 +52,11 @@ impl ObjectKey {
let val = serde_json::Value::String(s);
let mut s = Serializer::new(w);
val.serialize(&mut s).map_err(|e| {
let kind = e.io_error_kind().unwrap();
Error::new(kind, "I/O error")
if let Some(kind) = e.io_error_kind() {
Error::new(kind, "I/O error")
} else {
Error::new(ErrorKind::Other, e.to_string())
}
})
}
}
@ -351,6 +355,42 @@ impl Formatter for CanonicalFormatter {
}
}
/// A helper trait to write canonical JSON.
pub trait CanonJsonSerialize {
/// Serialize the given data structure as JSON into the I/O stream.
fn to_canon_json_writer<W>(&self, writer: W) -> Result<()>
where
W: Write;
/// Serialize the given data structure as a JSON byte vector.
fn to_canon_json_vec(&self) -> Result<Vec<u8>>;
/// Serialize the given data structure as a String.
fn to_canon_json_string(&self) -> Result<String>;
}
impl<S> CanonJsonSerialize for S
where
S: Serialize,
{
fn to_canon_json_writer<W>(&self, writer: W) -> Result<()>
where
W: Write,
{
let mut ser = Serializer::with_formatter(writer, CanonicalFormatter::new());
Ok(self.serialize(&mut ser)?)
}
fn to_canon_json_vec(&self) -> Result<Vec<u8>> {
let mut buf = Vec::new();
self.to_canon_json_writer(&mut buf)?;
Ok(buf)
}
fn to_canon_json_string(&self) -> Result<String> {
String::from_utf8(self.to_canon_json_vec()?)
.map_err(|err| Error::new(ErrorKind::InvalidData, err))
}
}
#[cfg(test)]
mod tests {
use super::*;
@ -358,8 +398,7 @@ mod tests {
use std::{cmp::Ordering, io::Result};
use proptest::prelude::*;
use serde::Serialize;
use serde_json::{Number, Serializer};
use serde_json::Number;
use sha2::{Digest, Sha256};
use similar_asserts::assert_eq;
@ -390,10 +429,7 @@ mod tests {
macro_rules! encode {
($($tt:tt)+) => {
(|v: serde_json::Value| -> Result<Vec<u8>> {
let mut buf = Vec::new();
let mut ser = Serializer::with_formatter(&mut buf, CanonicalFormatter::new());
v.serialize(&mut ser)?;
Ok(buf)
v.to_canon_json_vec()
})(serde_json::json!($($tt)+))
};
}
@ -501,10 +537,6 @@ mod tests {
i128: i128::MIN,
};
let mut buf = Vec::new();
let mut ser = Serializer::with_formatter(&mut buf, CanonicalFormatter::new());
value.serialize(&mut ser).unwrap();
let expected = [
123, 34, 105, 49, 50, 56, 34, 58, 45, 49, 55, 48, 49, 52, 49, 49, 56, 51, 52, 54, 48,
52, 54, 57, 50, 51, 49, 55, 51, 49, 54, 56, 55, 51, 48, 51, 55, 49, 53, 56, 56, 52, 49,
@ -513,7 +545,7 @@ mod tests {
55, 54, 56, 50, 49, 49, 52, 53, 53, 125,
];
assert_eq!(buf, expected);
assert_eq!(value.to_canon_json_vec().unwrap(), expected);
}
#[test]
@ -524,24 +556,39 @@ mod tests {
assert_eq!(&buf, &expected);
}
fn arbitrary_json() -> impl Strategy<Value = serde_json::Value> {
/// As it says, generate arbitrary JSON. This is based on
/// https://proptest-rs.github.io/proptest/proptest/tutorial/recursive.html
///
/// We support controlling the regex for keys, and whether or not floating point values are emitted.
fn arbitrary_json(
keyspace: &'static str,
allow_fp: bool,
) -> impl Strategy<Value = serde_json::Value> {
use serde_json::Value;
const S: &str = ".*";
let leaf = prop_oneof![
Just(Value::Null),
any::<u32>().prop_map(|v| Value::Number(Number::from_u128(v.into()).unwrap())),
any::<f64>().prop_filter_map("valid f64 for JSON", move |v| {
let n = if allow_fp && v.fract() != 0.0 {
Number::from_f64(v).unwrap()
} else {
// Constrain to values clearly lower than
// the https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number/MAX_SAFE_INTEGER
Number::from_u128(v as u32 as u128).unwrap()
};
Some(Value::Number(n))
}),
any::<bool>().prop_map(Value::Bool),
S.prop_map(Value::String),
keyspace.prop_map(Value::String),
];
leaf.prop_recursive(
8, // 8 levels deep
256, // Shoot for maximum size of 256 nodes
10, // We put up to 10 items per collection
|inner| {
move |inner| {
prop_oneof![
// Take the inner strategy and make the two recursive cases.
prop::collection::vec(inner.clone(), 0..10).prop_map(Value::Array),
prop::collection::hash_map(S, inner, 0..10)
prop::collection::hash_map(keyspace, inner, 0..10)
.prop_map(|v| { v.into_iter().collect() }),
]
},
@ -550,7 +597,7 @@ mod tests {
proptest! {
#[test]
fn roundtrip_rfc8785(v in arbitrary_json()) {
fn roundtrip_rfc8785(v in arbitrary_json(".*", true)) {
let buf = encode!(&v).unwrap();
let v2: serde_json::Value = serde_json::from_slice(&buf)
.map_err(|e| format!("Failed to parse {v:?} -> {}: {e}", String::from_utf8_lossy(&buf))).unwrap();
@ -560,11 +607,7 @@ mod tests {
fn verify(input: &str, expected: &str) {
let input: serde_json::Value = serde_json::from_str(input).unwrap();
let mut buf = Vec::new();
let mut ser = Serializer::with_formatter(&mut buf, CanonicalFormatter::new());
input.serialize(&mut ser).unwrap();
let buf = String::from_utf8(buf).unwrap();
assert_eq!(expected, &buf);
assert_eq!(expected, input.to_canon_json_string().unwrap());
}
#[test]
@ -651,17 +694,44 @@ mod tests {
Ok(())
}
// Regex that excludes basically everything except printable ASCII
// because we know that e.g. olpc-cjson bombs on control characters,
// and also because it does NFC orering that will cause non-equivalency
// for some whitespace etc.
const ASCII_ALPHANUMERIC: &str = r"[a-zA-Z0-9]*";
proptest! {
// Verify strict equivalency with printable ASCII only keys
#[test]
fn crosscheck_olpc_cjson(v in arbitrary_json()) {
use olpc_cjson::CanonicalFormatter;
fn crosscheck_olpc_cjson_ascii(v in arbitrary_json(ASCII_ALPHANUMERIC, false)) {
let canon_json = String::from_utf8(encode!(&v).unwrap()).unwrap();
let mut olpc_cjson_serialized = Vec::new();
let mut ser = serde_json::Serializer::with_formatter(&mut olpc_cjson_serialized, CanonicalFormatter::new());
prop_assume!(v.serialize(&mut ser).is_ok());
let mut ser = serde_json::Serializer::with_formatter(&mut olpc_cjson_serialized, olpc_cjson::CanonicalFormatter::new());
v.serialize(&mut ser).unwrap();
assert_eq!(canon_json, String::from_utf8(olpc_cjson_serialized).unwrap());
}
}
proptest! {
// Verify strict equivalency with printable ASCII only keys
#[test]
fn crosscheck_cjson_ascii(v in arbitrary_json(ASCII_ALPHANUMERIC, false)) {
let canon_json = String::from_utf8(encode!(&v).unwrap()).unwrap();
let cjson = String::from_utf8(cjson::to_vec(&v).unwrap()).unwrap();
assert_eq!(canon_json, cjson);
}
// Verify equivalency (after sorting) with non-ASCII keys
#[test]
fn crosscheck_cjson(v in arbitrary_json(".*", false)) {
let buf = encode!(&v).unwrap();
assert_eq!(buf, olpc_cjson_serialized);
let self_reparsed = serde_json::from_slice::<serde_json::Value>(&buf).unwrap();
let buf = cjson::to_vec(&v).unwrap();
let cjson_reparsed = serde_json::from_slice::<serde_json::Value>(&buf).unwrap();
// As above with olpc-cjson, this relies on the fact that serde_json
// sorts object keys by default.
assert_eq!(self_reparsed, v);
assert_eq!(cjson_reparsed, v);
}
}
}