Compare commits
11 Commits
Author | SHA1 | Date |
---|---|---|
|
e8557b235b | |
|
aa908b29b3 | |
|
8110a04d03 | |
|
de391b8df1 | |
|
69a48fcc67 | |
|
c65a8119a1 | |
|
728c62e007 | |
|
d58fab4daa | |
|
ad8709f4a7 | |
|
e0667e9589 | |
|
cc8729b414 |
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "canon-json"
|
name = "canon-json"
|
||||||
version = "0.2.0"
|
version = "0.2.1"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
description = "serde_json Formatter to serialize as RFC 8785 canonical JSON"
|
description = "serde_json Formatter to serialize as RFC 8785 canonical JSON"
|
||||||
license = "MIT OR Apache-2.0"
|
license = "MIT OR Apache-2.0"
|
||||||
|
@ -20,3 +20,6 @@ cap-std = "3.4.4"
|
||||||
sha2 = "0.10.9"
|
sha2 = "0.10.9"
|
||||||
# For cross-integration testing
|
# For cross-integration testing
|
||||||
olpc-cjson = "0.1"
|
olpc-cjson = "0.1"
|
||||||
|
cjson = "0.1.2"
|
||||||
|
# For round trip testing
|
||||||
|
serde_json = { version = "1.0", features = ["float_roundtrip"] }
|
||||||
|
|
134
src/lib.rs
134
src/lib.rs
|
@ -3,6 +3,7 @@
|
||||||
|
|
||||||
#![doc = include_str!("../README.md")]
|
#![doc = include_str!("../README.md")]
|
||||||
#![forbid(unsafe_code)]
|
#![forbid(unsafe_code)]
|
||||||
|
#![deny(missing_docs)]
|
||||||
|
|
||||||
mod floatformat;
|
mod floatformat;
|
||||||
|
|
||||||
|
@ -12,7 +13,7 @@ use std::io::{Error, ErrorKind, Result, Write};
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
use serde_json::ser::{CharEscape, CompactFormatter, Formatter, Serializer};
|
use serde_json::ser::{CharEscape, CompactFormatter, Formatter, Serializer};
|
||||||
|
|
||||||
/// A [`Formatter`] that produces canonical JSON.
|
/// A [`Formatter`] that produces canonical (RFC 8785) JSON.
|
||||||
///
|
///
|
||||||
/// See the [crate-level documentation](../index.html) for more detail.
|
/// See the [crate-level documentation](../index.html) for more detail.
|
||||||
///
|
///
|
||||||
|
@ -51,8 +52,11 @@ impl ObjectKey {
|
||||||
let val = serde_json::Value::String(s);
|
let val = serde_json::Value::String(s);
|
||||||
let mut s = Serializer::new(w);
|
let mut s = Serializer::new(w);
|
||||||
val.serialize(&mut s).map_err(|e| {
|
val.serialize(&mut s).map_err(|e| {
|
||||||
let kind = e.io_error_kind().unwrap();
|
if let Some(kind) = e.io_error_kind() {
|
||||||
Error::new(kind, "I/O error")
|
Error::new(kind, "I/O error")
|
||||||
|
} else {
|
||||||
|
Error::new(ErrorKind::Other, e.to_string())
|
||||||
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -351,6 +355,42 @@ impl Formatter for CanonicalFormatter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A helper trait to write canonical JSON.
|
||||||
|
pub trait CanonJsonSerialize {
|
||||||
|
/// Serialize the given data structure as JSON into the I/O stream.
|
||||||
|
fn to_canon_json_writer<W>(&self, writer: W) -> Result<()>
|
||||||
|
where
|
||||||
|
W: Write;
|
||||||
|
/// Serialize the given data structure as a JSON byte vector.
|
||||||
|
fn to_canon_json_vec(&self) -> Result<Vec<u8>>;
|
||||||
|
/// Serialize the given data structure as a String.
|
||||||
|
fn to_canon_json_string(&self) -> Result<String>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<S> CanonJsonSerialize for S
|
||||||
|
where
|
||||||
|
S: Serialize,
|
||||||
|
{
|
||||||
|
fn to_canon_json_writer<W>(&self, writer: W) -> Result<()>
|
||||||
|
where
|
||||||
|
W: Write,
|
||||||
|
{
|
||||||
|
let mut ser = Serializer::with_formatter(writer, CanonicalFormatter::new());
|
||||||
|
Ok(self.serialize(&mut ser)?)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn to_canon_json_vec(&self) -> Result<Vec<u8>> {
|
||||||
|
let mut buf = Vec::new();
|
||||||
|
self.to_canon_json_writer(&mut buf)?;
|
||||||
|
Ok(buf)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn to_canon_json_string(&self) -> Result<String> {
|
||||||
|
String::from_utf8(self.to_canon_json_vec()?)
|
||||||
|
.map_err(|err| Error::new(ErrorKind::InvalidData, err))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
@ -358,8 +398,7 @@ mod tests {
|
||||||
use std::{cmp::Ordering, io::Result};
|
use std::{cmp::Ordering, io::Result};
|
||||||
|
|
||||||
use proptest::prelude::*;
|
use proptest::prelude::*;
|
||||||
use serde::Serialize;
|
use serde_json::Number;
|
||||||
use serde_json::{Number, Serializer};
|
|
||||||
use sha2::{Digest, Sha256};
|
use sha2::{Digest, Sha256};
|
||||||
use similar_asserts::assert_eq;
|
use similar_asserts::assert_eq;
|
||||||
|
|
||||||
|
@ -390,10 +429,7 @@ mod tests {
|
||||||
macro_rules! encode {
|
macro_rules! encode {
|
||||||
($($tt:tt)+) => {
|
($($tt:tt)+) => {
|
||||||
(|v: serde_json::Value| -> Result<Vec<u8>> {
|
(|v: serde_json::Value| -> Result<Vec<u8>> {
|
||||||
let mut buf = Vec::new();
|
v.to_canon_json_vec()
|
||||||
let mut ser = Serializer::with_formatter(&mut buf, CanonicalFormatter::new());
|
|
||||||
v.serialize(&mut ser)?;
|
|
||||||
Ok(buf)
|
|
||||||
})(serde_json::json!($($tt)+))
|
})(serde_json::json!($($tt)+))
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -501,10 +537,6 @@ mod tests {
|
||||||
i128: i128::MIN,
|
i128: i128::MIN,
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut buf = Vec::new();
|
|
||||||
let mut ser = Serializer::with_formatter(&mut buf, CanonicalFormatter::new());
|
|
||||||
value.serialize(&mut ser).unwrap();
|
|
||||||
|
|
||||||
let expected = [
|
let expected = [
|
||||||
123, 34, 105, 49, 50, 56, 34, 58, 45, 49, 55, 48, 49, 52, 49, 49, 56, 51, 52, 54, 48,
|
123, 34, 105, 49, 50, 56, 34, 58, 45, 49, 55, 48, 49, 52, 49, 49, 56, 51, 52, 54, 48,
|
||||||
52, 54, 57, 50, 51, 49, 55, 51, 49, 54, 56, 55, 51, 48, 51, 55, 49, 53, 56, 56, 52, 49,
|
52, 54, 57, 50, 51, 49, 55, 51, 49, 54, 56, 55, 51, 48, 51, 55, 49, 53, 56, 56, 52, 49,
|
||||||
|
@ -513,7 +545,7 @@ mod tests {
|
||||||
55, 54, 56, 50, 49, 49, 52, 53, 53, 125,
|
55, 54, 56, 50, 49, 49, 52, 53, 53, 125,
|
||||||
];
|
];
|
||||||
|
|
||||||
assert_eq!(buf, expected);
|
assert_eq!(value.to_canon_json_vec().unwrap(), expected);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -524,24 +556,39 @@ mod tests {
|
||||||
assert_eq!(&buf, &expected);
|
assert_eq!(&buf, &expected);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn arbitrary_json() -> impl Strategy<Value = serde_json::Value> {
|
/// As it says, generate arbitrary JSON. This is based on
|
||||||
|
/// https://proptest-rs.github.io/proptest/proptest/tutorial/recursive.html
|
||||||
|
///
|
||||||
|
/// We support controlling the regex for keys, and whether or not floating point values are emitted.
|
||||||
|
fn arbitrary_json(
|
||||||
|
keyspace: &'static str,
|
||||||
|
allow_fp: bool,
|
||||||
|
) -> impl Strategy<Value = serde_json::Value> {
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
const S: &str = ".*";
|
|
||||||
let leaf = prop_oneof![
|
let leaf = prop_oneof![
|
||||||
Just(Value::Null),
|
Just(Value::Null),
|
||||||
any::<u32>().prop_map(|v| Value::Number(Number::from_u128(v.into()).unwrap())),
|
any::<f64>().prop_filter_map("valid f64 for JSON", move |v| {
|
||||||
|
let n = if allow_fp && v.fract() != 0.0 {
|
||||||
|
Number::from_f64(v).unwrap()
|
||||||
|
} else {
|
||||||
|
// Constrain to values clearly lower than
|
||||||
|
// the https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number/MAX_SAFE_INTEGER
|
||||||
|
Number::from_u128(v as u32 as u128).unwrap()
|
||||||
|
};
|
||||||
|
Some(Value::Number(n))
|
||||||
|
}),
|
||||||
any::<bool>().prop_map(Value::Bool),
|
any::<bool>().prop_map(Value::Bool),
|
||||||
S.prop_map(Value::String),
|
keyspace.prop_map(Value::String),
|
||||||
];
|
];
|
||||||
leaf.prop_recursive(
|
leaf.prop_recursive(
|
||||||
8, // 8 levels deep
|
8, // 8 levels deep
|
||||||
256, // Shoot for maximum size of 256 nodes
|
256, // Shoot for maximum size of 256 nodes
|
||||||
10, // We put up to 10 items per collection
|
10, // We put up to 10 items per collection
|
||||||
|inner| {
|
move |inner| {
|
||||||
prop_oneof![
|
prop_oneof![
|
||||||
// Take the inner strategy and make the two recursive cases.
|
// Take the inner strategy and make the two recursive cases.
|
||||||
prop::collection::vec(inner.clone(), 0..10).prop_map(Value::Array),
|
prop::collection::vec(inner.clone(), 0..10).prop_map(Value::Array),
|
||||||
prop::collection::hash_map(S, inner, 0..10)
|
prop::collection::hash_map(keyspace, inner, 0..10)
|
||||||
.prop_map(|v| { v.into_iter().collect() }),
|
.prop_map(|v| { v.into_iter().collect() }),
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -550,7 +597,7 @@ mod tests {
|
||||||
|
|
||||||
proptest! {
|
proptest! {
|
||||||
#[test]
|
#[test]
|
||||||
fn roundtrip_rfc8785(v in arbitrary_json()) {
|
fn roundtrip_rfc8785(v in arbitrary_json(".*", true)) {
|
||||||
let buf = encode!(&v).unwrap();
|
let buf = encode!(&v).unwrap();
|
||||||
let v2: serde_json::Value = serde_json::from_slice(&buf)
|
let v2: serde_json::Value = serde_json::from_slice(&buf)
|
||||||
.map_err(|e| format!("Failed to parse {v:?} -> {}: {e}", String::from_utf8_lossy(&buf))).unwrap();
|
.map_err(|e| format!("Failed to parse {v:?} -> {}: {e}", String::from_utf8_lossy(&buf))).unwrap();
|
||||||
|
@ -560,11 +607,7 @@ mod tests {
|
||||||
|
|
||||||
fn verify(input: &str, expected: &str) {
|
fn verify(input: &str, expected: &str) {
|
||||||
let input: serde_json::Value = serde_json::from_str(input).unwrap();
|
let input: serde_json::Value = serde_json::from_str(input).unwrap();
|
||||||
let mut buf = Vec::new();
|
assert_eq!(expected, input.to_canon_json_string().unwrap());
|
||||||
let mut ser = Serializer::with_formatter(&mut buf, CanonicalFormatter::new());
|
|
||||||
input.serialize(&mut ser).unwrap();
|
|
||||||
let buf = String::from_utf8(buf).unwrap();
|
|
||||||
assert_eq!(expected, &buf);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -651,17 +694,44 @@ mod tests {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Regex that excludes basically everything except printable ASCII
|
||||||
|
// because we know that e.g. olpc-cjson bombs on control characters,
|
||||||
|
// and also because it does NFC orering that will cause non-equivalency
|
||||||
|
// for some whitespace etc.
|
||||||
|
const ASCII_ALPHANUMERIC: &str = r"[a-zA-Z0-9]*";
|
||||||
|
|
||||||
proptest! {
|
proptest! {
|
||||||
|
// Verify strict equivalency with printable ASCII only keys
|
||||||
#[test]
|
#[test]
|
||||||
fn crosscheck_olpc_cjson(v in arbitrary_json()) {
|
fn crosscheck_olpc_cjson_ascii(v in arbitrary_json(ASCII_ALPHANUMERIC, false)) {
|
||||||
use olpc_cjson::CanonicalFormatter;
|
let canon_json = String::from_utf8(encode!(&v).unwrap()).unwrap();
|
||||||
|
|
||||||
let mut olpc_cjson_serialized = Vec::new();
|
let mut olpc_cjson_serialized = Vec::new();
|
||||||
let mut ser = serde_json::Serializer::with_formatter(&mut olpc_cjson_serialized, CanonicalFormatter::new());
|
let mut ser = serde_json::Serializer::with_formatter(&mut olpc_cjson_serialized, olpc_cjson::CanonicalFormatter::new());
|
||||||
prop_assume!(v.serialize(&mut ser).is_ok());
|
v.serialize(&mut ser).unwrap();
|
||||||
|
assert_eq!(canon_json, String::from_utf8(olpc_cjson_serialized).unwrap());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
proptest! {
|
||||||
|
// Verify strict equivalency with printable ASCII only keys
|
||||||
|
#[test]
|
||||||
|
fn crosscheck_cjson_ascii(v in arbitrary_json(ASCII_ALPHANUMERIC, false)) {
|
||||||
|
let canon_json = String::from_utf8(encode!(&v).unwrap()).unwrap();
|
||||||
|
let cjson = String::from_utf8(cjson::to_vec(&v).unwrap()).unwrap();
|
||||||
|
assert_eq!(canon_json, cjson);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify equivalency (after sorting) with non-ASCII keys
|
||||||
|
#[test]
|
||||||
|
fn crosscheck_cjson(v in arbitrary_json(".*", false)) {
|
||||||
let buf = encode!(&v).unwrap();
|
let buf = encode!(&v).unwrap();
|
||||||
assert_eq!(buf, olpc_cjson_serialized);
|
let self_reparsed = serde_json::from_slice::<serde_json::Value>(&buf).unwrap();
|
||||||
|
let buf = cjson::to_vec(&v).unwrap();
|
||||||
|
let cjson_reparsed = serde_json::from_slice::<serde_json::Value>(&buf).unwrap();
|
||||||
|
// As above with olpc-cjson, this relies on the fact that serde_json
|
||||||
|
// sorts object keys by default.
|
||||||
|
assert_eq!(self_reparsed, v);
|
||||||
|
assert_eq!(cjson_reparsed, v);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue