[pbs-devel] [RFC PATCH] schema: allow serializing rust Schema to perl JsonSchema

Wolfgang Bumiller w.bumiller at proxmox.com
Thu May 8 10:24:12 CEST 2025


On Wed, May 07, 2025 at 06:31:14PM +0200, Gabriel Goller wrote:
> Implement serde::Serialize on the rust Schema, so that we can serialize
> it and use it as a JsonSchema in perl. This allows us to write a single
> Schema in rust and reuse it in perl for the api properties.
> 
> The interesting bits (custom impls) are:
>  * Recursive oneOf type-property resolver
>  * oneOf and allOf implementation
>  * ApiStringFormat skip of ApiStringVerifyFn (which won't work obviously)

I'd like the commit to explain where we actually want to use/need this.
Long ago (before we had the `OneOf` schema I already started this, but
figured we didn't really need it anyway at the time.

> 
> Signed-off-by: Gabriel Goller <g.goller at proxmox.com>
> ---
> 
> This is kinda hard to test, because nothing actually fails when the properties
> are wrong and the whole allOf, oneOf and ApiStringFormat is a bit
> untransparent. So some properties could be wrongly serialized, but I
> think I got everything right. Looking over all the properties would be
> appreciated!

allOf and regular object-schema can use the same startegy, you serialize
a map and you can use the iterator you get from the `.properties()`
method to iterate over all properties.

One thing you can take out of my `staff/old-perl-serializing` commit is
the `ApiStringFormat::VerifyFn` handling, it uses perlmod to serialize
the function as an xsub with some magic to call the rust verifier code.

We *may* also consider using such a wrapper for regex patterns in case
we run into any significant differences between the perl & regex-crate
engines...

Also, technically the perl `JSONSchema` should be able to verify a
schema... (At least partially...)

> 
>  Cargo.toml                        |   1 +
>  proxmox-schema/Cargo.toml         |   4 +-
>  proxmox-schema/src/const_regex.rs |  12 ++
>  proxmox-schema/src/schema.rs      | 242 ++++++++++++++++++++++++++++--
>  4 files changed, 246 insertions(+), 13 deletions(-)
> 
> diff --git a/Cargo.toml b/Cargo.toml
> index 2ca0ea618707..a0d760ae8fc9 100644
> --- a/Cargo.toml
> +++ b/Cargo.toml
> @@ -104,6 +104,7 @@ regex = "1.5"
>  serde = "1.0"
>  serde_cbor = "0.11.1"
>  serde_json = "1.0"
> +serde_with = "3.8.1"

^ Is it really worth introducing this for `None` handling :S

>  serde_plain = "1.0"
>  syn = { version = "2", features = [ "full", "visit-mut" ] }
>  tar = "0.4"
> diff --git a/proxmox-schema/Cargo.toml b/proxmox-schema/Cargo.toml
> index c8028aa52bd0..48ebf3a9005e 100644
> --- a/proxmox-schema/Cargo.toml
> +++ b/proxmox-schema/Cargo.toml
> @@ -15,8 +15,9 @@ rust-version.workspace = true
>  anyhow.workspace = true
>  const_format = { workspace = true, optional = true }
>  regex.workspace = true
> -serde.workspace = true
> +serde = { workspace = true, features = ["derive"] }
>  serde_json.workspace = true
> +serde_with.workspace = true
>  textwrap = "0.16"
>  
>  # the upid type needs this for 'getpid'
> @@ -27,7 +28,6 @@ proxmox-api-macro = { workspace = true, optional = true }
>  
>  [dev-dependencies]
>  url.workspace = true
> -serde = { workspace = true, features = [ "derive" ] }
>  proxmox-api-macro.workspace = true
>  
>  [features]
> diff --git a/proxmox-schema/src/const_regex.rs b/proxmox-schema/src/const_regex.rs
> index 8ddc41abedeb..56f6c27fa1de 100644
> --- a/proxmox-schema/src/const_regex.rs
> +++ b/proxmox-schema/src/const_regex.rs
> @@ -1,5 +1,7 @@
>  use std::fmt;
>  
> +use serde::Serialize;
> +
>  /// Helper to represent const regular expressions
>  ///
>  /// The current Regex::new() function is not `const_fn`. Unless that
> @@ -13,6 +15,16 @@ pub struct ConstRegexPattern {
>      pub regex_obj: fn() -> &'static regex::Regex,
>  }
>  
> +impl Serialize for ConstRegexPattern {

I'm not a huge fan of adding this trait to `ConstRegexPattern`...

> +    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
> +    where
> +        S: serde::Serializer,
> +    {
> +        // Get the compiled regex and serialize its pattern as a string
> +        serializer.serialize_str((self.regex_obj)().as_str())
> +    }
> +}
> +
>  impl fmt::Debug for ConstRegexPattern {
>      fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
>          write!(f, "{:?}", self.regex_string)
> diff --git a/proxmox-schema/src/schema.rs b/proxmox-schema/src/schema.rs
> index ddbbacd462a4..11461eaf6ace 100644
> --- a/proxmox-schema/src/schema.rs
> +++ b/proxmox-schema/src/schema.rs
> @@ -4,10 +4,12 @@
>  //! completely static API definitions that can be included within the programs read-only text
>  //! segment.
>  
> -use std::collections::HashSet;
> +use std::collections::{HashMap, HashSet};
>  use std::fmt;
>  
>  use anyhow::{bail, format_err, Error};
> +use serde::ser::{SerializeMap, SerializeStruct};
> +use serde::{Serialize, Serializer};
>  use serde_json::{json, Value};
>  
>  use crate::ConstRegexPattern;
> @@ -181,7 +183,8 @@ impl<'a> FromIterator<(&'a str, Error)> for ParameterError {
>  }
>  
>  /// Data type to describe boolean values
> -#[derive(Debug)]
> +#[serde_with::skip_serializing_none]
> +#[derive(Debug, Serialize)]
>  #[cfg_attr(feature = "test-harness", derive(Eq, PartialEq))]
>  #[non_exhaustive]
>  pub struct BooleanSchema {
> @@ -222,7 +225,8 @@ impl BooleanSchema {
>  }
>  
>  /// Data type to describe integer values.
> -#[derive(Debug)]
> +#[serde_with::skip_serializing_none]
> +#[derive(Debug, Serialize)]
>  #[cfg_attr(feature = "test-harness", derive(Eq, PartialEq))]
>  #[non_exhaustive]
>  pub struct IntegerSchema {
> @@ -304,7 +308,8 @@ impl IntegerSchema {
>  }
>  
>  /// Data type to describe (JSON like) number value
> -#[derive(Debug)]
> +#[serde_with::skip_serializing_none]
> +#[derive(Debug, Serialize)]
>  #[non_exhaustive]
>  pub struct NumberSchema {
>      pub description: &'static str,
> @@ -406,7 +411,8 @@ impl PartialEq for NumberSchema {
>  }
>  
>  /// Data type to describe string values.
> -#[derive(Debug)]
> +#[serde_with::skip_serializing_none]
> +#[derive(Debug, Serialize)]
>  #[cfg_attr(feature = "test-harness", derive(Eq, PartialEq))]
>  #[non_exhaustive]
>  pub struct StringSchema {

^ needs a `#[serde(rename_all = "camelCase")]`

> @@ -418,6 +424,7 @@ pub struct StringSchema {
>      /// Optional maximal length.
>      pub max_length: Option<usize>,
>      /// Optional microformat.
> +    #[serde(flatten)]
>      pub format: Option<&'static ApiStringFormat>,
>      /// A text representation of the format/type (used to generate documentation).
>      pub type_text: Option<&'static str>,

^ while this needs a `rename = "typetext"`

But also, if the type text is None, property strings get the type text
generated via `format::get_property_string_type_text`.

You can take this from my staff branch.

> @@ -534,7 +541,8 @@ impl StringSchema {
>  ///
>  /// All array elements are of the same type, as defined in the `items`
>  /// schema.
> -#[derive(Debug)]
> +#[serde_with::skip_serializing_none]
> +#[derive(Debug, Serialize)]
>  #[cfg_attr(feature = "test-harness", derive(Eq, PartialEq))]
>  #[non_exhaustive]
>  pub struct ArraySchema {

^ needs a `#[serde(rename_all = "camelCase")]`

> @@ -634,6 +642,43 @@ pub type SchemaPropertyEntry = (&'static str, bool, &'static Schema);
>  /// This is a workaround unless RUST can const_fn `Hash::new()`
>  pub type SchemaPropertyMap = &'static [SchemaPropertyEntry];
>  
> +/// A wrapper struct to hold the [`SchemaPropertyMap`] and serialize it nicely.
> +///
> +/// [`SchemaPropertyMap`] holds [`SchemaPropertyEntry`]s which are tuples. Tuples are serialized to
> +/// arrays, but we need a Map with the name (first item in the tuple) as a key and the optional
> +/// (second item in the tuple) as a property of the value.
> +pub struct SerializableSchemaProperties<'a>(&'a [SchemaPropertyEntry]);
> +
> +impl Serialize for SerializableSchemaProperties<'_> {
> +    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
> +    where
> +        S: Serializer,
> +    {
> +        let mut seq = serializer.serialize_map(Some(self.0.len()))?;
> +
> +        for (name, optional, schema) in self.0 {
> +            let schema_with_metadata = OptionalSchema {
> +                optional: *optional,
> +                schema,
> +            };
> +
> +            seq.serialize_entry(&name, &schema_with_metadata)?;
> +        }
> +
> +        seq.end()
> +    }
> +}
> +
> +/// A schema with a optional bool property.
> +///
> +/// The schema gets flattened, so it looks just like a normal Schema but with a optional property.
> +#[derive(Serialize)]
> +struct OptionalSchema<'a> {
> +    optional: bool,
> +    #[serde(flatten)]
> +    schema: &'a Schema,
> +}
> +
>  const fn assert_properties_sorted(properties: SchemaPropertyMap) {
>      use std::cmp::Ordering;
>  
> @@ -656,7 +701,7 @@ const fn assert_properties_sorted(properties: SchemaPropertyMap) {
>  /// Legacy property strings may contain shortcuts where the *value* of a specific key is used as a
>  /// *key* for yet another option. Most notably, PVE's `netX` properties use `<model>=<macaddr>`
>  /// instead of `model=<model>,macaddr=<macaddr>`.
> -#[derive(Clone, Copy, Debug)]
> +#[derive(Clone, Copy, Debug, Serialize)]
>  #[cfg_attr(feature = "test-harness", derive(Eq, PartialEq))]
>  pub struct KeyAliasInfo {
>      pub key_alias: &'static str,
> @@ -700,6 +745,77 @@ pub struct ObjectSchema {
>      pub key_alias_info: Option<KeyAliasInfo>,
>  }
>  
> +impl Serialize for ObjectSchema {
> +    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
> +    where
> +        S: Serializer,

Move the contents of this into a:

    fn serialize_object_schema<S, T: ObjectSchemaType>(schema: &T, serializer: S) -> Result<S::Ok, S::Error>

> +    {
> +        let mut s = serializer.serialize_struct("ObjectSchema", 5)?;

(^ May as well just use `_map()` instead of `_struct()`, not sure
there's much of a benefit here.)

> +
> +        s.serialize_field("description", self.description)?;
> +        s.serialize_field("additional_properties", &self.additional_properties)?;
> +
> +        // Collect all OneOf type properties recursively

The `ObjectSchemaType` trait gives you a `.properties()` Iterator.

> +        let mut oneofs: Vec<SchemaPropertyEntry> = Vec::new();
> +        for (_, _, schema) in self.properties {
> +            collect_oneof_type_properties(schema, &mut oneofs);
> +        }
> +
> +        if !oneofs.is_empty() {
> +            // Extend the oneOf type-properties with the actual properties
> +            oneofs.extend_from_slice(self.properties);
> +            s.serialize_field("properties", &SerializableSchemaProperties(&oneofs))?;
> +        } else {
> +            s.serialize_field("properties", &SerializableSchemaProperties(self.properties))?;
> +        }
> +
> +        if let Some(default_key) = self.default_key {
> +            s.serialize_field("default_key", default_key)?;

^ Like `optional`, in perl `default_key` is part of the *property*, not
part of the `Object` schema.

> +        } else {
> +            s.skip_field("default_key")?;
> +        }
> +        if let Some(key_alias_info) = self.key_alias_info {

This does not exist in perl.

The key alias info works like this:
- All keys listed in the `KeyAliasInfo::values` alias to
  `KeyAliasInfo::alias`, and *which* key has been used is stored in the
  property named `KeyAliasInfo::key_alias`.

Assume we have

    key_alias_info = {
        keyAlias = "model",
        alias = "macaddr",
        values = [ "virtio", "e1000", ... ]
    }

This transforms:

    virtio=aa:aa:aa:aa:aa:aa,bridge=vmbr0

into

    model=virtio,macaddr=aa:aa:aa:aa:aa:aa,bridge=vmbr0

So you need to pass the key alias info (KAI from here on out) along to a
helper which serializes the property list which then:
- first finds the schema for the `KAI.alias`
- whenever a property is serialized which is listed in the `KAI.values`
  array it merges: `keyAlias = KAI.keyAlias, alias = KAI.alias` *into* the
  property
- iterate through all the `KAI.values` and serialize them with the
  schema found for the `KAI.alias` property
  

> +            s.serialize_field("key_alias_info", &key_alias_info)?;
> +        } else {
> +            s.skip_field("key_alias_info")?;
> +        }
> +
> +        s.end()
> +    }
> +}
> +
> +// Recursive function to find all OneOf type properties in a schema
> +fn collect_oneof_type_properties(schema: &Schema, result: &mut Vec<SchemaPropertyEntry>) {

Why are you recursing into arrays, property strings etc. here? The contents of arrays aren't
part of the containing object's property list?

> +    match schema {
> +        Schema::OneOf(oneof) => {
> +            result.push(*oneof.type_property_entry);
> +        }
> +        Schema::Array(array) => {
> +            // Recursively check the array schema
> +            collect_oneof_type_properties(array.items, result);
> +        }
> +        Schema::String(string) => {
> +            // Check the PropertyString Schema
> +            if let Some(ApiStringFormat::PropertyString(schema)) = string.format {
> +                collect_oneof_type_properties(schema, result);
> +            }
> +        }
> +        Schema::Object(obj) => {
> +            // Check all properties in the object
> +            for (_, _, prop_schema) in obj.properties {
> +                collect_oneof_type_properties(prop_schema, result);
> +            }
> +        }
> +        Schema::AllOf(all_of) => {
> +            // Check all schemas in the allOf list
> +            for &schema in all_of.list {
> +                collect_oneof_type_properties(schema, result);
> +            }
> +        }
> +        _ => {}
> +    }
> +}
> +
>  impl ObjectSchema {
>      /// Create a new `object` schema.
>      ///
> @@ -811,7 +927,7 @@ impl ObjectSchema {
>  ///
>  /// Technically this could also contain an `additional_properties` flag, however, in the JSON
>  /// Schema, this is not supported, so here we simply assume additional properties to be allowed.
> -#[derive(Debug)]
> +#[derive(Debug, Serialize)]
>  #[cfg_attr(feature = "test-harness", derive(Eq, PartialEq))]
>  #[non_exhaustive]
>  pub struct AllOfSchema {
> @@ -864,7 +980,7 @@ impl AllOfSchema {
>  /// In serde-language, we use an internally tagged enum representation.
>  ///
>  /// Note that these are limited to object schemas. Other schemas will produce errors.
> -#[derive(Debug)]
> +#[derive(Debug, Serialize)]
>  #[cfg_attr(feature = "test-harness", derive(Eq, PartialEq))]
>  #[non_exhaustive]
>  pub struct OneOfSchema {
> @@ -880,6 +996,30 @@ pub struct OneOfSchema {
>      pub list: &'static [(&'static str, &'static Schema)],
>  }
>  
> +fn serialize_oneof_schema<S>(one_of: &OneOfSchema, serializer: S) -> Result<S::Ok, S::Error>
> +where
> +    S: Serializer,
> +{

I'll first have to refresh my memory on the perl-side oneOf weirdness
for this.
Maybe @Dominik can take a look as well.
If we need rust->perl support here then the perl oneOf may very well
need some changes...

> +    use serde::ser::SerializeMap;
> +
> +    let mut map = serializer.serialize_map(Some(3))?;
> +
> +    map.serialize_entry("description", &one_of.description)?;
> +
> +    let variants = one_of
> +        .list
> +        .iter()
> +        .map(|(_, schema)| schema)
> +        .collect::<Vec<_>>();
> +
> +    map.serialize_entry("oneOf", &variants)?;
> +
> +    // The schema gets inserted into the parent properties
> +    map.serialize_entry("type-property", &one_of.type_property_entry.0)?;
> +
> +    map.end()
> +}
> +
>  const fn assert_one_of_list_is_sorted(list: &[(&str, &Schema)]) {
>      use std::cmp::Ordering;
>  
> @@ -1360,7 +1500,8 @@ impl Iterator for OneOfPropertyIterator {
>  ///     ],
>  /// ).schema();
>  /// ```
> -#[derive(Debug)]
> +#[derive(Debug, Serialize)]
> +#[serde(tag = "type", rename_all = "lowercase")]
>  #[cfg_attr(feature = "test-harness", derive(Eq, PartialEq))]
>  pub enum Schema {
>      Null,
> @@ -1370,10 +1511,81 @@ pub enum Schema {
>      String(StringSchema),
>      Object(ObjectSchema),
>      Array(ArraySchema),
> +    #[serde(serialize_with = "serialize_allof_schema")]

^ This should use `#[serde(rename = "object")]`. We don't have explicit
AllOfs in perl, because in perl we can easily just merge properties. In
rust we were unable to do that, since our schemas are all
compile-time-const.

>      AllOf(AllOfSchema),
> +    #[serde(untagged)]
> +    #[serde(serialize_with = "serialize_oneof_schema", rename = "oneOf")]
>      OneOf(OneOfSchema),
>  }
>  
> +/// Serialize the AllOf Schema
> +///
> +/// This will create one ObjectSchema and merge the properties of all the children.
> +fn serialize_allof_schema<S>(all_of: &AllOfSchema, serializer: S) -> Result<S::Ok, S::Error>

drop this and call the above mentioned `serialize_object_schema()`
instead.

> +where
> +    S: Serializer,
> +{
> +    use serde::ser::SerializeMap;
> +
> +    let mut map = serializer.serialize_map(Some(4))?;
> +
> +    // Add the top-level description
> +    map.serialize_entry("description", &all_of.description)?;
> +
> +    // The type is always object
> +    map.serialize_entry("type", "object")?;
> +
> +    let mut all_properties = HashMap::new();
> +    let mut additional_properties = false;
> +
> +    for &schema in all_of.list {
> +        if let Some(object_schema) = schema.object() {
> +            // If any schema allows additional properties, the merged schema will too
> +            if object_schema.additional_properties {
> +                additional_properties = true;
> +            }
> +
> +            // Add all properties from this schema
> +            for (name, optional, prop_schema) in object_schema.properties {
> +                all_properties.insert(*name, (*optional, *prop_schema));
> +            }
> +        } else if let Some(nested_all_of) = schema.all_of() {
> +            // For nested AllOf schemas go through recursively
> +            for &nested_schema in nested_all_of.list {
> +                if let Some(object_schema) = nested_schema.object() {
> +                    if object_schema.additional_properties {
> +                        additional_properties = true;
> +                    }
> +
> +                    for (name, optional, prop_schema) in object_schema.properties {
> +                        all_properties.insert(*name, (*optional, *prop_schema));
> +                    }
> +                }
> +            }
> +        }
> +    }
> +
> +    // Add the merged properties
> +    let properties_entry = all_properties
> +        .iter()
> +        .map(|(name, (optional, schema))| {
> +            (
> +                *name,
> +                OptionalSchema {
> +                    optional: *optional,
> +                    schema,
> +                },
> +            )
> +        })
> +        .collect::<HashMap<_, _>>();
> +
> +    map.serialize_entry("properties", &properties_entry)?;
> +
> +    map.serialize_entry("additional_properties", &additional_properties)?;
> +
> +    map.end()
> +}
> +
>  impl Schema {
>      /// Verify JSON value with `schema`.
>      pub fn verify_json(&self, data: &Value) -> Result<(), Error> {
> @@ -1694,10 +1906,12 @@ impl Schema {
>  }
>  
>  /// A string enum entry. An enum entry must have a value and a description.
> -#[derive(Clone, Debug)]
> +#[derive(Clone, Debug, Serialize)]
> +#[serde(transparent)]
>  #[cfg_attr(feature = "test-harness", derive(Eq, PartialEq))]
>  pub struct EnumEntry {
>      pub value: &'static str,
> +    #[serde(skip)]
>      pub description: &'static str,
>  }
>  
> @@ -1776,14 +1990,20 @@ impl EnumEntry {
>  /// let data = PRODUCT_LIST_SCHEMA.parse_property_string("1,2"); // parse as Array
>  /// assert!(data.is_ok());
>  /// ```
> +#[derive(Serialize)]
>  pub enum ApiStringFormat {
>      /// Enumerate all valid strings
> +    #[serde(rename = "enum")]
>      Enum(&'static [EnumEntry]),
>      /// Use a regular expression to describe valid strings.
> +    #[serde(rename = "pattern")]
>      Pattern(&'static ConstRegexPattern),
>      /// Use a schema to describe complex types encoded as string.
> +    #[serde(rename = "format")]
>      PropertyString(&'static Schema),
>      /// Use a verification function.
> +    /// Note: we can't serialize this, panic if we encounter this.
> +    #[serde(skip)]

^ as mentioned, see my old-perl-serializing branch...

>      VerifyFn(ApiStringVerifyFn),
>  }
>  
> -- 
> 2.39.5




More information about the pbs-devel mailing list