[pbs-devel] [RFC PATCH] schema: allow serializing rust Schema to perl JsonSchema
Wolfgang Bumiller
w.bumiller at proxmox.com
Thu May 8 10:24:12 CEST 2025
On Wed, May 07, 2025 at 06:31:14PM +0200, Gabriel Goller wrote:
> Implement serde::Serialize on the rust Schema, so that we can serialize
> it and use it as a JsonSchema in perl. This allows us to write a single
> Schema in rust and reuse it in perl for the api properties.
>
> The interesting bits (custom impls) are:
> * Recursive oneOf type-property resolver
> * oneOf and allOf implementation
> * ApiStringFormat skip of ApiStringVerifyFn (which won't work obviously)
I'd like the commit to explain where we actually want to use/need this.
Long ago (before we had the `OneOf` schema I already started this, but
figured we didn't really need it anyway at the time.
>
> Signed-off-by: Gabriel Goller <g.goller at proxmox.com>
> ---
>
> This is kinda hard to test, because nothing actually fails when the properties
> are wrong and the whole allOf, oneOf and ApiStringFormat is a bit
> untransparent. So some properties could be wrongly serialized, but I
> think I got everything right. Looking over all the properties would be
> appreciated!
allOf and regular object-schema can use the same startegy, you serialize
a map and you can use the iterator you get from the `.properties()`
method to iterate over all properties.
One thing you can take out of my `staff/old-perl-serializing` commit is
the `ApiStringFormat::VerifyFn` handling, it uses perlmod to serialize
the function as an xsub with some magic to call the rust verifier code.
We *may* also consider using such a wrapper for regex patterns in case
we run into any significant differences between the perl & regex-crate
engines...
Also, technically the perl `JSONSchema` should be able to verify a
schema... (At least partially...)
>
> Cargo.toml | 1 +
> proxmox-schema/Cargo.toml | 4 +-
> proxmox-schema/src/const_regex.rs | 12 ++
> proxmox-schema/src/schema.rs | 242 ++++++++++++++++++++++++++++--
> 4 files changed, 246 insertions(+), 13 deletions(-)
>
> diff --git a/Cargo.toml b/Cargo.toml
> index 2ca0ea618707..a0d760ae8fc9 100644
> --- a/Cargo.toml
> +++ b/Cargo.toml
> @@ -104,6 +104,7 @@ regex = "1.5"
> serde = "1.0"
> serde_cbor = "0.11.1"
> serde_json = "1.0"
> +serde_with = "3.8.1"
^ Is it really worth introducing this for `None` handling :S
> serde_plain = "1.0"
> syn = { version = "2", features = [ "full", "visit-mut" ] }
> tar = "0.4"
> diff --git a/proxmox-schema/Cargo.toml b/proxmox-schema/Cargo.toml
> index c8028aa52bd0..48ebf3a9005e 100644
> --- a/proxmox-schema/Cargo.toml
> +++ b/proxmox-schema/Cargo.toml
> @@ -15,8 +15,9 @@ rust-version.workspace = true
> anyhow.workspace = true
> const_format = { workspace = true, optional = true }
> regex.workspace = true
> -serde.workspace = true
> +serde = { workspace = true, features = ["derive"] }
> serde_json.workspace = true
> +serde_with.workspace = true
> textwrap = "0.16"
>
> # the upid type needs this for 'getpid'
> @@ -27,7 +28,6 @@ proxmox-api-macro = { workspace = true, optional = true }
>
> [dev-dependencies]
> url.workspace = true
> -serde = { workspace = true, features = [ "derive" ] }
> proxmox-api-macro.workspace = true
>
> [features]
> diff --git a/proxmox-schema/src/const_regex.rs b/proxmox-schema/src/const_regex.rs
> index 8ddc41abedeb..56f6c27fa1de 100644
> --- a/proxmox-schema/src/const_regex.rs
> +++ b/proxmox-schema/src/const_regex.rs
> @@ -1,5 +1,7 @@
> use std::fmt;
>
> +use serde::Serialize;
> +
> /// Helper to represent const regular expressions
> ///
> /// The current Regex::new() function is not `const_fn`. Unless that
> @@ -13,6 +15,16 @@ pub struct ConstRegexPattern {
> pub regex_obj: fn() -> &'static regex::Regex,
> }
>
> +impl Serialize for ConstRegexPattern {
I'm not a huge fan of adding this trait to `ConstRegexPattern`...
> + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
> + where
> + S: serde::Serializer,
> + {
> + // Get the compiled regex and serialize its pattern as a string
> + serializer.serialize_str((self.regex_obj)().as_str())
> + }
> +}
> +
> impl fmt::Debug for ConstRegexPattern {
> fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
> write!(f, "{:?}", self.regex_string)
> diff --git a/proxmox-schema/src/schema.rs b/proxmox-schema/src/schema.rs
> index ddbbacd462a4..11461eaf6ace 100644
> --- a/proxmox-schema/src/schema.rs
> +++ b/proxmox-schema/src/schema.rs
> @@ -4,10 +4,12 @@
> //! completely static API definitions that can be included within the programs read-only text
> //! segment.
>
> -use std::collections::HashSet;
> +use std::collections::{HashMap, HashSet};
> use std::fmt;
>
> use anyhow::{bail, format_err, Error};
> +use serde::ser::{SerializeMap, SerializeStruct};
> +use serde::{Serialize, Serializer};
> use serde_json::{json, Value};
>
> use crate::ConstRegexPattern;
> @@ -181,7 +183,8 @@ impl<'a> FromIterator<(&'a str, Error)> for ParameterError {
> }
>
> /// Data type to describe boolean values
> -#[derive(Debug)]
> +#[serde_with::skip_serializing_none]
> +#[derive(Debug, Serialize)]
> #[cfg_attr(feature = "test-harness", derive(Eq, PartialEq))]
> #[non_exhaustive]
> pub struct BooleanSchema {
> @@ -222,7 +225,8 @@ impl BooleanSchema {
> }
>
> /// Data type to describe integer values.
> -#[derive(Debug)]
> +#[serde_with::skip_serializing_none]
> +#[derive(Debug, Serialize)]
> #[cfg_attr(feature = "test-harness", derive(Eq, PartialEq))]
> #[non_exhaustive]
> pub struct IntegerSchema {
> @@ -304,7 +308,8 @@ impl IntegerSchema {
> }
>
> /// Data type to describe (JSON like) number value
> -#[derive(Debug)]
> +#[serde_with::skip_serializing_none]
> +#[derive(Debug, Serialize)]
> #[non_exhaustive]
> pub struct NumberSchema {
> pub description: &'static str,
> @@ -406,7 +411,8 @@ impl PartialEq for NumberSchema {
> }
>
> /// Data type to describe string values.
> -#[derive(Debug)]
> +#[serde_with::skip_serializing_none]
> +#[derive(Debug, Serialize)]
> #[cfg_attr(feature = "test-harness", derive(Eq, PartialEq))]
> #[non_exhaustive]
> pub struct StringSchema {
^ needs a `#[serde(rename_all = "camelCase")]`
> @@ -418,6 +424,7 @@ pub struct StringSchema {
> /// Optional maximal length.
> pub max_length: Option<usize>,
> /// Optional microformat.
> + #[serde(flatten)]
> pub format: Option<&'static ApiStringFormat>,
> /// A text representation of the format/type (used to generate documentation).
> pub type_text: Option<&'static str>,
^ while this needs a `rename = "typetext"`
But also, if the type text is None, property strings get the type text
generated via `format::get_property_string_type_text`.
You can take this from my staff branch.
> @@ -534,7 +541,8 @@ impl StringSchema {
> ///
> /// All array elements are of the same type, as defined in the `items`
> /// schema.
> -#[derive(Debug)]
> +#[serde_with::skip_serializing_none]
> +#[derive(Debug, Serialize)]
> #[cfg_attr(feature = "test-harness", derive(Eq, PartialEq))]
> #[non_exhaustive]
> pub struct ArraySchema {
^ needs a `#[serde(rename_all = "camelCase")]`
> @@ -634,6 +642,43 @@ pub type SchemaPropertyEntry = (&'static str, bool, &'static Schema);
> /// This is a workaround unless RUST can const_fn `Hash::new()`
> pub type SchemaPropertyMap = &'static [SchemaPropertyEntry];
>
> +/// A wrapper struct to hold the [`SchemaPropertyMap`] and serialize it nicely.
> +///
> +/// [`SchemaPropertyMap`] holds [`SchemaPropertyEntry`]s which are tuples. Tuples are serialized to
> +/// arrays, but we need a Map with the name (first item in the tuple) as a key and the optional
> +/// (second item in the tuple) as a property of the value.
> +pub struct SerializableSchemaProperties<'a>(&'a [SchemaPropertyEntry]);
> +
> +impl Serialize for SerializableSchemaProperties<'_> {
> + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
> + where
> + S: Serializer,
> + {
> + let mut seq = serializer.serialize_map(Some(self.0.len()))?;
> +
> + for (name, optional, schema) in self.0 {
> + let schema_with_metadata = OptionalSchema {
> + optional: *optional,
> + schema,
> + };
> +
> + seq.serialize_entry(&name, &schema_with_metadata)?;
> + }
> +
> + seq.end()
> + }
> +}
> +
> +/// A schema with a optional bool property.
> +///
> +/// The schema gets flattened, so it looks just like a normal Schema but with a optional property.
> +#[derive(Serialize)]
> +struct OptionalSchema<'a> {
> + optional: bool,
> + #[serde(flatten)]
> + schema: &'a Schema,
> +}
> +
> const fn assert_properties_sorted(properties: SchemaPropertyMap) {
> use std::cmp::Ordering;
>
> @@ -656,7 +701,7 @@ const fn assert_properties_sorted(properties: SchemaPropertyMap) {
> /// Legacy property strings may contain shortcuts where the *value* of a specific key is used as a
> /// *key* for yet another option. Most notably, PVE's `netX` properties use `<model>=<macaddr>`
> /// instead of `model=<model>,macaddr=<macaddr>`.
> -#[derive(Clone, Copy, Debug)]
> +#[derive(Clone, Copy, Debug, Serialize)]
> #[cfg_attr(feature = "test-harness", derive(Eq, PartialEq))]
> pub struct KeyAliasInfo {
> pub key_alias: &'static str,
> @@ -700,6 +745,77 @@ pub struct ObjectSchema {
> pub key_alias_info: Option<KeyAliasInfo>,
> }
>
> +impl Serialize for ObjectSchema {
> + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
> + where
> + S: Serializer,
Move the contents of this into a:
fn serialize_object_schema<S, T: ObjectSchemaType>(schema: &T, serializer: S) -> Result<S::Ok, S::Error>
> + {
> + let mut s = serializer.serialize_struct("ObjectSchema", 5)?;
(^ May as well just use `_map()` instead of `_struct()`, not sure
there's much of a benefit here.)
> +
> + s.serialize_field("description", self.description)?;
> + s.serialize_field("additional_properties", &self.additional_properties)?;
> +
> + // Collect all OneOf type properties recursively
The `ObjectSchemaType` trait gives you a `.properties()` Iterator.
> + let mut oneofs: Vec<SchemaPropertyEntry> = Vec::new();
> + for (_, _, schema) in self.properties {
> + collect_oneof_type_properties(schema, &mut oneofs);
> + }
> +
> + if !oneofs.is_empty() {
> + // Extend the oneOf type-properties with the actual properties
> + oneofs.extend_from_slice(self.properties);
> + s.serialize_field("properties", &SerializableSchemaProperties(&oneofs))?;
> + } else {
> + s.serialize_field("properties", &SerializableSchemaProperties(self.properties))?;
> + }
> +
> + if let Some(default_key) = self.default_key {
> + s.serialize_field("default_key", default_key)?;
^ Like `optional`, in perl `default_key` is part of the *property*, not
part of the `Object` schema.
> + } else {
> + s.skip_field("default_key")?;
> + }
> + if let Some(key_alias_info) = self.key_alias_info {
This does not exist in perl.
The key alias info works like this:
- All keys listed in the `KeyAliasInfo::values` alias to
`KeyAliasInfo::alias`, and *which* key has been used is stored in the
property named `KeyAliasInfo::key_alias`.
Assume we have
key_alias_info = {
keyAlias = "model",
alias = "macaddr",
values = [ "virtio", "e1000", ... ]
}
This transforms:
virtio=aa:aa:aa:aa:aa:aa,bridge=vmbr0
into
model=virtio,macaddr=aa:aa:aa:aa:aa:aa,bridge=vmbr0
So you need to pass the key alias info (KAI from here on out) along to a
helper which serializes the property list which then:
- first finds the schema for the `KAI.alias`
- whenever a property is serialized which is listed in the `KAI.values`
array it merges: `keyAlias = KAI.keyAlias, alias = KAI.alias` *into* the
property
- iterate through all the `KAI.values` and serialize them with the
schema found for the `KAI.alias` property
> + s.serialize_field("key_alias_info", &key_alias_info)?;
> + } else {
> + s.skip_field("key_alias_info")?;
> + }
> +
> + s.end()
> + }
> +}
> +
> +// Recursive function to find all OneOf type properties in a schema
> +fn collect_oneof_type_properties(schema: &Schema, result: &mut Vec<SchemaPropertyEntry>) {
Why are you recursing into arrays, property strings etc. here? The contents of arrays aren't
part of the containing object's property list?
> + match schema {
> + Schema::OneOf(oneof) => {
> + result.push(*oneof.type_property_entry);
> + }
> + Schema::Array(array) => {
> + // Recursively check the array schema
> + collect_oneof_type_properties(array.items, result);
> + }
> + Schema::String(string) => {
> + // Check the PropertyString Schema
> + if let Some(ApiStringFormat::PropertyString(schema)) = string.format {
> + collect_oneof_type_properties(schema, result);
> + }
> + }
> + Schema::Object(obj) => {
> + // Check all properties in the object
> + for (_, _, prop_schema) in obj.properties {
> + collect_oneof_type_properties(prop_schema, result);
> + }
> + }
> + Schema::AllOf(all_of) => {
> + // Check all schemas in the allOf list
> + for &schema in all_of.list {
> + collect_oneof_type_properties(schema, result);
> + }
> + }
> + _ => {}
> + }
> +}
> +
> impl ObjectSchema {
> /// Create a new `object` schema.
> ///
> @@ -811,7 +927,7 @@ impl ObjectSchema {
> ///
> /// Technically this could also contain an `additional_properties` flag, however, in the JSON
> /// Schema, this is not supported, so here we simply assume additional properties to be allowed.
> -#[derive(Debug)]
> +#[derive(Debug, Serialize)]
> #[cfg_attr(feature = "test-harness", derive(Eq, PartialEq))]
> #[non_exhaustive]
> pub struct AllOfSchema {
> @@ -864,7 +980,7 @@ impl AllOfSchema {
> /// In serde-language, we use an internally tagged enum representation.
> ///
> /// Note that these are limited to object schemas. Other schemas will produce errors.
> -#[derive(Debug)]
> +#[derive(Debug, Serialize)]
> #[cfg_attr(feature = "test-harness", derive(Eq, PartialEq))]
> #[non_exhaustive]
> pub struct OneOfSchema {
> @@ -880,6 +996,30 @@ pub struct OneOfSchema {
> pub list: &'static [(&'static str, &'static Schema)],
> }
>
> +fn serialize_oneof_schema<S>(one_of: &OneOfSchema, serializer: S) -> Result<S::Ok, S::Error>
> +where
> + S: Serializer,
> +{
I'll first have to refresh my memory on the perl-side oneOf weirdness
for this.
Maybe @Dominik can take a look as well.
If we need rust->perl support here then the perl oneOf may very well
need some changes...
> + use serde::ser::SerializeMap;
> +
> + let mut map = serializer.serialize_map(Some(3))?;
> +
> + map.serialize_entry("description", &one_of.description)?;
> +
> + let variants = one_of
> + .list
> + .iter()
> + .map(|(_, schema)| schema)
> + .collect::<Vec<_>>();
> +
> + map.serialize_entry("oneOf", &variants)?;
> +
> + // The schema gets inserted into the parent properties
> + map.serialize_entry("type-property", &one_of.type_property_entry.0)?;
> +
> + map.end()
> +}
> +
> const fn assert_one_of_list_is_sorted(list: &[(&str, &Schema)]) {
> use std::cmp::Ordering;
>
> @@ -1360,7 +1500,8 @@ impl Iterator for OneOfPropertyIterator {
> /// ],
> /// ).schema();
> /// ```
> -#[derive(Debug)]
> +#[derive(Debug, Serialize)]
> +#[serde(tag = "type", rename_all = "lowercase")]
> #[cfg_attr(feature = "test-harness", derive(Eq, PartialEq))]
> pub enum Schema {
> Null,
> @@ -1370,10 +1511,81 @@ pub enum Schema {
> String(StringSchema),
> Object(ObjectSchema),
> Array(ArraySchema),
> + #[serde(serialize_with = "serialize_allof_schema")]
^ This should use `#[serde(rename = "object")]`. We don't have explicit
AllOfs in perl, because in perl we can easily just merge properties. In
rust we were unable to do that, since our schemas are all
compile-time-const.
> AllOf(AllOfSchema),
> + #[serde(untagged)]
> + #[serde(serialize_with = "serialize_oneof_schema", rename = "oneOf")]
> OneOf(OneOfSchema),
> }
>
> +/// Serialize the AllOf Schema
> +///
> +/// This will create one ObjectSchema and merge the properties of all the children.
> +fn serialize_allof_schema<S>(all_of: &AllOfSchema, serializer: S) -> Result<S::Ok, S::Error>
drop this and call the above mentioned `serialize_object_schema()`
instead.
> +where
> + S: Serializer,
> +{
> + use serde::ser::SerializeMap;
> +
> + let mut map = serializer.serialize_map(Some(4))?;
> +
> + // Add the top-level description
> + map.serialize_entry("description", &all_of.description)?;
> +
> + // The type is always object
> + map.serialize_entry("type", "object")?;
> +
> + let mut all_properties = HashMap::new();
> + let mut additional_properties = false;
> +
> + for &schema in all_of.list {
> + if let Some(object_schema) = schema.object() {
> + // If any schema allows additional properties, the merged schema will too
> + if object_schema.additional_properties {
> + additional_properties = true;
> + }
> +
> + // Add all properties from this schema
> + for (name, optional, prop_schema) in object_schema.properties {
> + all_properties.insert(*name, (*optional, *prop_schema));
> + }
> + } else if let Some(nested_all_of) = schema.all_of() {
> + // For nested AllOf schemas go through recursively
> + for &nested_schema in nested_all_of.list {
> + if let Some(object_schema) = nested_schema.object() {
> + if object_schema.additional_properties {
> + additional_properties = true;
> + }
> +
> + for (name, optional, prop_schema) in object_schema.properties {
> + all_properties.insert(*name, (*optional, *prop_schema));
> + }
> + }
> + }
> + }
> + }
> +
> + // Add the merged properties
> + let properties_entry = all_properties
> + .iter()
> + .map(|(name, (optional, schema))| {
> + (
> + *name,
> + OptionalSchema {
> + optional: *optional,
> + schema,
> + },
> + )
> + })
> + .collect::<HashMap<_, _>>();
> +
> + map.serialize_entry("properties", &properties_entry)?;
> +
> + map.serialize_entry("additional_properties", &additional_properties)?;
> +
> + map.end()
> +}
> +
> impl Schema {
> /// Verify JSON value with `schema`.
> pub fn verify_json(&self, data: &Value) -> Result<(), Error> {
> @@ -1694,10 +1906,12 @@ impl Schema {
> }
>
> /// A string enum entry. An enum entry must have a value and a description.
> -#[derive(Clone, Debug)]
> +#[derive(Clone, Debug, Serialize)]
> +#[serde(transparent)]
> #[cfg_attr(feature = "test-harness", derive(Eq, PartialEq))]
> pub struct EnumEntry {
> pub value: &'static str,
> + #[serde(skip)]
> pub description: &'static str,
> }
>
> @@ -1776,14 +1990,20 @@ impl EnumEntry {
> /// let data = PRODUCT_LIST_SCHEMA.parse_property_string("1,2"); // parse as Array
> /// assert!(data.is_ok());
> /// ```
> +#[derive(Serialize)]
> pub enum ApiStringFormat {
> /// Enumerate all valid strings
> + #[serde(rename = "enum")]
> Enum(&'static [EnumEntry]),
> /// Use a regular expression to describe valid strings.
> + #[serde(rename = "pattern")]
> Pattern(&'static ConstRegexPattern),
> /// Use a schema to describe complex types encoded as string.
> + #[serde(rename = "format")]
> PropertyString(&'static Schema),
> /// Use a verification function.
> + /// Note: we can't serialize this, panic if we encounter this.
> + #[serde(skip)]
^ as mentioned, see my old-perl-serializing branch...
> VerifyFn(ApiStringVerifyFn),
> }
>
> --
> 2.39.5
More information about the pbs-devel
mailing list