[pve-devel] [PATCH proxmox v3 02/13] add proxmox-oci crate
Wolfgang Bumiller
w.bumiller at proxmox.com
Thu Jul 10 10:46:51 CEST 2025
On Wed, Jul 09, 2025 at 02:34:19PM +0200, Filip Schauer wrote:
> This crate can parse an OCI image tarball and extract its rootfs. Layers
> are applied in sequence, but an overlay filesystem is currently not
> used.
>
> Signed-off-by: Filip Schauer <f.schauer at proxmox.com>
> ---
> Changed since v2:
> * remove reachable unwraps & refactor code
> * increase hasher buffer size from 4096 to 32768 (matching internal
> sha2::Digest buffering)
> * preserve permissions and xattrs during rootfs extraction
> * handle whiteouts & opaque whiteouts
>
> Cargo.toml | 1 +
> proxmox-oci/Cargo.toml | 22 +++
> proxmox-oci/debian/changelog | 5 +
> proxmox-oci/debian/control | 45 +++++
> proxmox-oci/debian/debcargo.toml | 7 +
> proxmox-oci/src/lib.rs | 283 +++++++++++++++++++++++++++++++
> proxmox-oci/src/oci_tar_image.rs | 145 ++++++++++++++++
> 7 files changed, 508 insertions(+)
> create mode 100644 proxmox-oci/Cargo.toml
> create mode 100644 proxmox-oci/debian/changelog
> create mode 100644 proxmox-oci/debian/control
> create mode 100644 proxmox-oci/debian/debcargo.toml
> create mode 100644 proxmox-oci/src/lib.rs
> create mode 100644 proxmox-oci/src/oci_tar_image.rs
>
> diff --git a/Cargo.toml b/Cargo.toml
> index 020e7497..4606fc19 100644
> --- a/Cargo.toml
> +++ b/Cargo.toml
> @@ -26,6 +26,7 @@ members = [
> "proxmox-metrics",
> "proxmox-network-api",
> "proxmox-notify",
> + "proxmox-oci",
> "proxmox-openid",
> "proxmox-product-config",
> "proxmox-resource-scheduling",
> diff --git a/proxmox-oci/Cargo.toml b/proxmox-oci/Cargo.toml
> new file mode 100644
> index 00000000..4daff6ab
> --- /dev/null
> +++ b/proxmox-oci/Cargo.toml
> @@ -0,0 +1,22 @@
> +[package]
> +name = "proxmox-oci"
> +description = "OCI image parsing and extraction"
> +version = "0.1.0"
> +
> +authors.workspace = true
> +edition.workspace = true
> +exclude.workspace = true
> +homepage.workspace = true
> +license.workspace = true
> +repository.workspace = true
> +rust-version.workspace = true
> +
> +[dependencies]
> +flate2.workspace = true
> +oci-spec = "0.8.1"
> +sha2 = "0.10"
> +tar.workspace = true
> +thiserror = "1"
> +zstd.workspace = true
> +
> +proxmox-io.workspace = true
> diff --git a/proxmox-oci/debian/changelog b/proxmox-oci/debian/changelog
> new file mode 100644
> index 00000000..754d06c1
> --- /dev/null
> +++ b/proxmox-oci/debian/changelog
> @@ -0,0 +1,5 @@
> +rust-proxmox-oci (0.1.0-1) bookworm; urgency=medium
> +
> + * Initial release.
> +
> + -- Proxmox Support Team <support at proxmox.com> Mon, 28 Apr 2025 12:34:56 +0200
> diff --git a/proxmox-oci/debian/control b/proxmox-oci/debian/control
> new file mode 100644
> index 00000000..f33331c5
> --- /dev/null
> +++ b/proxmox-oci/debian/control
> @@ -0,0 +1,45 @@
> +Source: rust-proxmox-oci
> +Section: rust
> +Priority: optional
> +Build-Depends: debhelper-compat (= 13),
> + dh-sequence-cargo
> +Build-Depends-Arch: cargo:native <!nocheck>,
> + rustc:native (>= 1.82) <!nocheck>,
> + libstd-rust-dev <!nocheck>,
> + librust-flate2-1+default-dev <!nocheck>,
> + librust-oci-spec-0.8+default-dev (>= 0.8.1-~~) <!nocheck>,
> + librust-proxmox-io-1+default-dev (>= 1.2.0-~~) <!nocheck>,
> + librust-sha2-0.10+default-dev <!nocheck>,
> + librust-tar-0.4+default-dev <!nocheck>,
> + librust-thiserror-1+default-dev <!nocheck>,
> + librust-zstd-0.13+default-dev <!nocheck>
> +Maintainer: Proxmox Support Team <support at proxmox.com>
> +Standards-Version: 4.7.0
> +Vcs-Git: git://git.proxmox.com/git/proxmox.git
> +Vcs-Browser: https://git.proxmox.com/?p=proxmox.git
> +Homepage: https://proxmox.com
> +X-Cargo-Crate: proxmox-oci
> +Rules-Requires-Root: no
> +
> +Package: librust-proxmox-oci-dev
> +Architecture: any
> +Multi-Arch: same
> +Depends:
> + ${misc:Depends},
> + librust-flate2-1+default-dev,
> + librust-oci-spec-0.8+default-dev (>= 0.8.1-~~),
> + librust-proxmox-io-1+default-dev (>= 1.2.0-~~),
> + librust-sha2-0.10+default-dev,
> + librust-tar-0.4+default-dev,
> + librust-thiserror-1+default-dev,
> + librust-zstd-0.13+default-dev
> +Provides:
> + librust-proxmox-oci+default-dev (= ${binary:Version}),
> + librust-proxmox-oci-0-dev (= ${binary:Version}),
> + librust-proxmox-oci-0+default-dev (= ${binary:Version}),
> + librust-proxmox-oci-0.1-dev (= ${binary:Version}),
> + librust-proxmox-oci-0.1+default-dev (= ${binary:Version}),
> + librust-proxmox-oci-0.1.0-dev (= ${binary:Version}),
> + librust-proxmox-oci-0.1.0+default-dev (= ${binary:Version})
> +Description: OCI image parsing and extraction - Rust source code
> + Source code for Debianized Rust crate "proxmox-oci"
> diff --git a/proxmox-oci/debian/debcargo.toml b/proxmox-oci/debian/debcargo.toml
> new file mode 100644
> index 00000000..b7864cdb
> --- /dev/null
> +++ b/proxmox-oci/debian/debcargo.toml
> @@ -0,0 +1,7 @@
> +overlay = "."
> +crate_src_path = ".."
> +maintainer = "Proxmox Support Team <support at proxmox.com>"
> +
> +[source]
> +vcs_git = "git://git.proxmox.com/git/proxmox.git"
> +vcs_browser = "https://git.proxmox.com/?p=proxmox.git"
> diff --git a/proxmox-oci/src/lib.rs b/proxmox-oci/src/lib.rs
> new file mode 100644
> index 00000000..b54d06e1
> --- /dev/null
> +++ b/proxmox-oci/src/lib.rs
> @@ -0,0 +1,283 @@
> +use std::collections::HashMap;
> +use std::fs::{read_dir, remove_dir_all, remove_file, File};
> +use std::io::{Read, Seek};
> +use std::path::{Path, PathBuf};
> +use std::str::FromStr;
> +
> +use flate2::read::GzDecoder;
> +pub use oci_spec::image::Config;
> +use oci_spec::image::{Arch, ImageConfiguration, ImageManifest, MediaType};
> +use oci_spec::OciSpecError;
> +use sha2::digest::generic_array::GenericArray;
> +use sha2::{Digest, Sha256};
> +use tar::{Archive, EntryType};
> +use thiserror::Error;
> +
> +mod oci_tar_image;
> +use oci_tar_image::OciTarImage;
> +
> +fn compute_digest<R: Read, H: Digest>(
> + mut reader: R,
> + mut hasher: H,
> +) -> std::io::Result<GenericArray<u8, H::OutputSize>> {
> + let mut buf = proxmox_io::boxed::zeroed(32768);
> +
> + loop {
> + let bytes_read = reader.read(&mut buf)?;
> + if bytes_read == 0 {
> + break Ok(hasher.finalize());
> + }
> +
> + hasher.update(&buf[..bytes_read]);
> + }
> +}
> +
> +fn compute_sha256<R: Read>(reader: R) -> std::io::Result<oci_spec::image::Sha256Digest> {
> + let digest = compute_digest(reader, Sha256::new())?;
> + Ok(oci_spec::image::Sha256Digest::from_str(&format!("{digest:x}")).unwrap())
> +}
> +
> +/// Build a mapping from uncompressed layer digests (as found in the image config's `rootfs.diff_ids`)
> +/// to their corresponding compressed-layer digests (i.e. the filenames under `blobs/<algorithm>/<digest>`)
> +fn build_layer_map<R: Read + Seek>(
> + mut oci_tar_image: OciTarImage<R>,
> + image_manifest: &ImageManifest,
> +) -> Result<
> + (
> + OciTarImage<R>,
> + HashMap<oci_spec::image::Digest, oci_spec::image::Descriptor>,
> + ),
> + ExtractError,
> +> {
> + let mut layer_mapping = HashMap::new();
> +
> + for layer in image_manifest.layers() {
> + let digest = match layer.media_type() {
> + MediaType::ImageLayer | MediaType::ImageLayerNonDistributable => layer.digest().clone(),
> + MediaType::ImageLayerGzip | MediaType::ImageLayerNonDistributableGzip => {
> + let mut compressed_blob = oci_tar_image
> + .open_blob(layer.digest())
> + .ok_or(ExtractError::MissingLayerFile(layer.digest().clone()))?;
> + let decoder = GzDecoder::new(&mut compressed_blob);
> + let hash = compute_sha256(decoder)?.into();
> + oci_tar_image = compressed_blob.into_oci_tar_image();
> + hash
> + }
> + MediaType::ImageLayerZstd | MediaType::ImageLayerNonDistributableZstd => {
> + let mut compressed_blob = oci_tar_image
> + .open_blob(layer.digest())
> + .ok_or(ExtractError::MissingLayerFile(layer.digest().clone()))?;
> + let decoder = zstd::Decoder::new(&mut compressed_blob)?;
> + let hash = compute_sha256(decoder)?.into();
> + oci_tar_image = compressed_blob.into_oci_tar_image();
> + hash
> + }
> + // Skip any other non-ImageLayer related media types.
> + // Match explicitly to avoid missing new image layer types when oci-spec updates.
> + MediaType::Descriptor
> + | MediaType::LayoutHeader
> + | MediaType::ImageManifest
> + | MediaType::ImageIndex
> + | MediaType::ImageConfig
> + | MediaType::ArtifactManifest
> + | MediaType::EmptyJSON
> + | MediaType::Other(_) => continue,
> + };
> +
> + layer_mapping.insert(digest, layer.clone());
> + }
> +
> + Ok((oci_tar_image, layer_mapping))
> +}
> +
> +#[derive(Debug, Error)]
> +pub enum ProxmoxOciError {
> + #[error("Error while parsing OCI image: {0}")]
> + ParseError(#[from] ParseError),
> + #[error("Error while extracting OCI image: {0}")]
> + ExtractError(#[from] ExtractError),
> +}
> +
> +pub fn parse_and_extract_image<P: AsRef<Path>>(
> + oci_tar_path: P,
> + rootfs_path: P,
> +) -> Result<Option<Config>, ProxmoxOciError> {
> + let (oci_tar_image, image_manifest, image_config) = parse_image(oci_tar_path)?;
> +
> + extract_image_rootfs(oci_tar_image, &image_manifest, &image_config, rootfs_path)?;
> +
> + Ok(image_config.config().clone())
> +}
> +
> +#[derive(Debug, Error)]
> +pub enum ParseError {
> + #[error("OCI spec error: {0}")]
> + OciSpec(#[from] OciSpecError),
> + #[error("Wrong media type")]
> + WrongMediaType,
> + #[error("IO error: {0}")]
> + Io(#[from] std::io::Error),
> + #[error("Unsupported CPU architecture")]
> + UnsupportedArchitecture,
> + #[error("Missing image config")]
> + MissingImageConfig,
> +}
> +
> +fn parse_image<P: AsRef<Path>>(
> + oci_tar_path: P,
> +) -> Result<(OciTarImage<File>, ImageManifest, ImageConfiguration), ParseError> {
> + let oci_tar_file = File::open(oci_tar_path)?;
> + let mut oci_tar_image = OciTarImage::new(oci_tar_file)?;
> +
> + let image_manifest = oci_tar_image
> + .image_manifest(&Arch::Amd64)
> + .ok_or(ParseError::UnsupportedArchitecture)??;
> +
> + let image_config_descriptor = image_manifest.config();
> +
> + if image_config_descriptor.media_type() != &MediaType::ImageConfig {
> + return Err(ParseError::WrongMediaType);
> + }
> +
> + let mut image_config_file = oci_tar_image
> + .open_blob(image_config_descriptor.digest())
> + .ok_or(ParseError::MissingImageConfig)?;
> + let image_config = ImageConfiguration::from_reader(&mut image_config_file)?;
> +
> + Ok((
> + image_config_file.into_oci_tar_image(),
> + image_manifest,
> + image_config,
> + ))
> +}
> +
> +#[derive(Debug, Error)]
> +pub enum ExtractError {
> + #[error("Incorrectly formatted digest: \"{0}\"")]
> + InvalidDigest(String),
> + #[error("Unknown layer digest {0} found in rootfs.diff_ids")]
> + UnknownLayerDigest(oci_spec::image::Digest),
> + #[error("Layer file {0} mentioned in image manifest is missing")]
> + MissingLayerFile(oci_spec::image::Digest),
> + #[error("IO error: {0}")]
> + Io(#[from] std::io::Error),
> + #[error("Layer has wrong media type: {0}")]
> + WrongMediaType(String),
> +}
> +
> +fn extract_image_rootfs<R: Read + Seek, P: AsRef<Path>>(
> + oci_tar_image: OciTarImage<R>,
> + image_manifest: &ImageManifest,
> + image_config: &ImageConfiguration,
> + target_path: P,
> +) -> Result<(), ExtractError> {
> + let (mut oci_tar_image, layer_map) = build_layer_map(oci_tar_image, image_manifest)?;
> +
> + for layer in image_config.rootfs().diff_ids() {
> + let layer_digest = oci_spec::image::Digest::from_str(layer)
> + .map_err(|_| ExtractError::InvalidDigest(layer.to_string()))?;
> + let layer_descriptor = layer_map
> + .get(&layer_digest)
> + .ok_or(ExtractError::UnknownLayerDigest(layer_digest.clone()))?;
> + let mut layer_file = oci_tar_image
> + .open_blob(layer_descriptor.digest())
> + .ok_or(ExtractError::MissingLayerFile(layer_digest))?;
> +
> + let (whiteouts, opaque_whiteouts) = match layer_descriptor.media_type() {
> + MediaType::ImageLayer | MediaType::ImageLayerNonDistributable => {
> + extract_archive(&mut layer_file, &target_path)?
> + }
> + MediaType::ImageLayerGzip | MediaType::ImageLayerNonDistributableGzip => {
> + let mut gz_decoder = GzDecoder::new(&mut layer_file);
> + extract_archive(&mut gz_decoder, &target_path)?
> + }
> + MediaType::ImageLayerZstd | MediaType::ImageLayerNonDistributableZstd => {
> + let mut zstd_decoder = zstd::Decoder::new(&mut layer_file)?;
> + extract_archive(&mut zstd_decoder, &target_path)?
> + }
> + // Error on any other non-ImageLayer related media types.
> + // Match explicitly to avoid missing new image layer types when oci-spec updates.
> + media_type @ (MediaType::Descriptor
> + | MediaType::LayoutHeader
> + | MediaType::ImageManifest
> + | MediaType::ImageIndex
> + | MediaType::ImageConfig
> + | MediaType::ArtifactManifest
> + | MediaType::EmptyJSON
> + | MediaType::Other(_)) => {
> + return Err(ExtractError::WrongMediaType(media_type.to_string()))
> + }
> + };
> +
> + oci_tar_image = layer_file.into_oci_tar_image();
> +
> + for whiteout in whiteouts {
> + let wh_abs_path = target_path.as_ref().join(&whiteout);
> + remove_path(wh_abs_path)?;
> + }
> +
> + for opaque_whiteout in opaque_whiteouts {
> + let wh_abs_path = target_path.as_ref().join(&opaque_whiteout);
> + for direntry in read_dir(wh_abs_path)? {
> + remove_path(direntry?.path())?;
> + }
> + }
> + }
> +
> + Ok(())
> +}
> +
> +fn extract_archive<R: Read, P: AsRef<Path>>(
> + reader: &mut R,
> + target_path: P,
> +) -> std::io::Result<(Vec<PathBuf>, Vec<PathBuf>)> {
> + const WHITEOUT_PREFIX: &str = ".wh.";
> + const OPAQUE_WHITEOUT_NAME: &str = ".wh..wh..opq";
> +
> + let mut archive = Archive::new(reader);
> + archive.set_preserve_ownerships(true);
> + archive.set_preserve_permissions(true);
> + archive.set_unpack_xattrs(true);
> + let mut directories = Vec::new();
> + let mut whiteouts = Vec::new();
> + let mut opaque_whiteouts = Vec::new();
> +
> + for entry in archive.entries()? {
> + let mut file = entry?;
> + if file.header().entry_type() == EntryType::Directory {
> + directories.push(file);
> + } else {
> + let filepath = file.path()?.into_owned();
> + if let Some(filename) = filepath.file_name() {
> + if filename == OPAQUE_WHITEOUT_NAME {
> + if let Some(parent) = filepath.parent() {
> + opaque_whiteouts.push(parent.to_path_buf());
Paths can *technically* exist as both a whiteout and a new entry, so
delaying the removal to after extraction may potentially remove files
which should still exist.
*Technically* the spec allows whiteouts to be ordered "wrong"...
The spec states:
- Whiteout files MUST only apply to resources in lower/parent layers.
- Files that are present in the same layer as a whiteout file can only be hidden by whiteout files in subsequent layers.
So in order to be "truly" correct, we'd have to go through the archive
twice: once to apply all the whiteouts, and then to extract all the
non-whiteouts...
I'm not sure this happens in the real world, but I think code-wise it's
easy enough.
> + }
> + continue;
> + } else if let Some(filename) = filename.to_str() {
> + if let Some(filename_stripped) = filename.strip_prefix(WHITEOUT_PREFIX) {
> + whiteouts.push(filepath.with_file_name(filename_stripped));
> + continue;
> + }
> + }
> + }
> +
At this point we also have to remove the destination - potentially
recursively. When replacing a directory with a file, the new layer
simply contains the file without any previous whiteouts.
> + file.unpack_in(&target_path)?;
> + }
> + }
> +
> + directories.sort_by(|a, b| b.path_bytes().cmp(&a.path_bytes()));
> + for mut dir in directories {
> + dir.unpack_in(&target_path)?;
> + }
> +
> + Ok((whiteouts, opaque_whiteouts))
> +}
> +
> +fn remove_path(path: PathBuf) -> std::io::Result<()> {
> + if path.metadata()?.is_dir() {
> + remove_dir_all(path)
> + } else {
> + remove_file(path)
> + }
> +}
> diff --git a/proxmox-oci/src/oci_tar_image.rs b/proxmox-oci/src/oci_tar_image.rs
> new file mode 100644
> index 00000000..212f6b53
> --- /dev/null
> +++ b/proxmox-oci/src/oci_tar_image.rs
> @@ -0,0 +1,145 @@
> +use std::collections::HashMap;
> +use std::io::{Read, Seek, SeekFrom};
> +use std::ops::Range;
> +use std::path::{Path, PathBuf};
> +
> +use oci_spec::image::{Arch, Digest, ImageIndex, ImageManifest, MediaType};
> +use oci_spec::OciSpecError;
> +use tar::Archive;
> +
> +use proxmox_io::RangeReader;
> +
> +#[derive(Clone)]
> +struct TarEntry {
> + range: Range<u64>,
> +}
> +
> +impl TarEntry {
> + fn new(range: Range<u64>) -> Self {
> + Self { range }
> + }
> +}
> +
> +pub struct OciTarImage<R: Read + Seek> {
> + reader: R,
> + entries: HashMap<PathBuf, TarEntry>,
> + image_index: ImageIndex,
> +}
> +
> +impl<R: Read + Seek> OciTarImage<R> {
> + pub fn new(reader: R) -> oci_spec::Result<Self> {
> + let mut archive = Archive::new(reader);
> + let entries = archive.entries_with_seek()?;
> + let mut entries_index = HashMap::new();
> + let mut image_index = None;
> +
> + for entry in entries {
> + let mut entry = entry?;
> + let offset = entry.raw_file_position();
> + let size = entry.size();
> + let path = entry.path()?.into_owned();
> +
> + if path.as_path() == Path::new("index.json") {
> + image_index = Some(ImageIndex::from_reader(&mut entry)?);
> + }
> +
> + let tar_entry = TarEntry::new(offset..(offset + size));
> + entries_index.insert(path, tar_entry);
> + }
> +
> + if let Some(image_index) = image_index {
> + Ok(Self {
> + reader: archive.into_inner(),
> + entries: entries_index,
> + image_index,
> + })
> + } else {
> + Err(OciSpecError::Other("Missing index.json file".into()))
> + }
> + }
> +
> + pub fn image_index(&self) -> &ImageIndex {
> + &self.image_index
> + }
> +
> + fn get_blob_entry(&self, digest: &Digest) -> Option<TarEntry> {
> + let path = get_blob_path(digest);
> + self.entries.get(&path).cloned()
> + }
> +
> + pub fn open_blob(self, digest: &Digest) -> Option<OciTarImageBlob<R>> {
> + if let Some(entry) = self.get_blob_entry(digest) {
> + Some(OciTarImageBlob::new(self, entry.range))
> + } else {
> + None
> + }
> + }
> +
> + pub fn image_manifest(
> + &mut self,
> + architecture: &Arch,
> + ) -> Option<oci_spec::Result<ImageManifest>> {
> + let digest = match self.image_index.manifests().iter().find(|&x| {
> + x.media_type() == &MediaType::ImageManifest
> + && x.platform()
> + .as_ref()
> + .is_none_or(|platform| platform.architecture() == architecture)
> + }) {
> + Some(descriptor) => descriptor.digest(),
> + None => return None,
> + };
> +
> + if let Some(entry) = self.get_blob_entry(digest) {
> + let mut range_reader = RangeReader::new(&mut self.reader, entry.range);
> + Some(ImageManifest::from_reader(&mut range_reader))
> + } else {
> + Some(Err(OciSpecError::Other(format!(
> + "Image manifest with digest {digest} mentioned in image index is missing"
> + ))))
> + }
> + }
> +}
> +
> +fn get_blob_path(digest: &Digest) -> PathBuf {
> + let algorithm = digest.algorithm();
> + let digest = digest.digest();
> + format!("blobs/{algorithm}/{digest}").into()
> +}
> +
> +pub struct OciTarImageBlob<R: Read + Seek> {
> + range_reader: RangeReader<R>,
> + entries: HashMap<PathBuf, TarEntry>,
> + image_index: ImageIndex,
> +}
> +
> +impl<R: Read + Seek> OciTarImageBlob<R> {
> + fn new(archive: OciTarImage<R>, range: Range<u64>) -> Self {
> + let range_reader = RangeReader::new(archive.reader, range);
> +
> + Self {
> + range_reader,
> + entries: archive.entries,
> + image_index: archive.image_index,
> + }
> + }
> +
> + pub fn into_oci_tar_image(self) -> OciTarImage<R> {
> + OciTarImage {
> + reader: self.range_reader.into_inner(),
> + entries: self.entries,
> + image_index: self.image_index,
> + }
> + }
> +}
> +
> +impl<R: Read + Seek> Read for OciTarImageBlob<R> {
> + fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
> + self.range_reader.read(buf)
> + }
> +}
> +
> +impl<R: Read + Seek> Seek for OciTarImageBlob<R> {
> + fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
> + self.range_reader.seek(pos)
> + }
> +}
> --
> 2.47.2
More information about the pve-devel
mailing list