[pve-devel] [PATCH proxmox v3 02/13] add proxmox-oci crate
Filip Schauer
f.schauer at proxmox.com
Wed Jul 9 14:34:19 CEST 2025
This crate can parse an OCI image tarball and extract its rootfs. Layers
are applied in sequence, but an overlay filesystem is currently not
used.
Signed-off-by: Filip Schauer <f.schauer at proxmox.com>
---
Changed since v2:
* remove reachable unwraps & refactor code
* increase hasher buffer size from 4096 to 32768 (matching internal
sha2::Digest buffering)
* preserve permissions and xattrs during rootfs extraction
* handle whiteouts & opaque whiteouts
Cargo.toml | 1 +
proxmox-oci/Cargo.toml | 22 +++
proxmox-oci/debian/changelog | 5 +
proxmox-oci/debian/control | 45 +++++
proxmox-oci/debian/debcargo.toml | 7 +
proxmox-oci/src/lib.rs | 283 +++++++++++++++++++++++++++++++
proxmox-oci/src/oci_tar_image.rs | 145 ++++++++++++++++
7 files changed, 508 insertions(+)
create mode 100644 proxmox-oci/Cargo.toml
create mode 100644 proxmox-oci/debian/changelog
create mode 100644 proxmox-oci/debian/control
create mode 100644 proxmox-oci/debian/debcargo.toml
create mode 100644 proxmox-oci/src/lib.rs
create mode 100644 proxmox-oci/src/oci_tar_image.rs
diff --git a/Cargo.toml b/Cargo.toml
index 020e7497..4606fc19 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -26,6 +26,7 @@ members = [
"proxmox-metrics",
"proxmox-network-api",
"proxmox-notify",
+ "proxmox-oci",
"proxmox-openid",
"proxmox-product-config",
"proxmox-resource-scheduling",
diff --git a/proxmox-oci/Cargo.toml b/proxmox-oci/Cargo.toml
new file mode 100644
index 00000000..4daff6ab
--- /dev/null
+++ b/proxmox-oci/Cargo.toml
@@ -0,0 +1,22 @@
+[package]
+name = "proxmox-oci"
+description = "OCI image parsing and extraction"
+version = "0.1.0"
+
+authors.workspace = true
+edition.workspace = true
+exclude.workspace = true
+homepage.workspace = true
+license.workspace = true
+repository.workspace = true
+rust-version.workspace = true
+
+[dependencies]
+flate2.workspace = true
+oci-spec = "0.8.1"
+sha2 = "0.10"
+tar.workspace = true
+thiserror = "1"
+zstd.workspace = true
+
+proxmox-io.workspace = true
diff --git a/proxmox-oci/debian/changelog b/proxmox-oci/debian/changelog
new file mode 100644
index 00000000..754d06c1
--- /dev/null
+++ b/proxmox-oci/debian/changelog
@@ -0,0 +1,5 @@
+rust-proxmox-oci (0.1.0-1) bookworm; urgency=medium
+
+ * Initial release.
+
+ -- Proxmox Support Team <support at proxmox.com> Mon, 28 Apr 2025 12:34:56 +0200
diff --git a/proxmox-oci/debian/control b/proxmox-oci/debian/control
new file mode 100644
index 00000000..f33331c5
--- /dev/null
+++ b/proxmox-oci/debian/control
@@ -0,0 +1,45 @@
+Source: rust-proxmox-oci
+Section: rust
+Priority: optional
+Build-Depends: debhelper-compat (= 13),
+ dh-sequence-cargo
+Build-Depends-Arch: cargo:native <!nocheck>,
+ rustc:native (>= 1.82) <!nocheck>,
+ libstd-rust-dev <!nocheck>,
+ librust-flate2-1+default-dev <!nocheck>,
+ librust-oci-spec-0.8+default-dev (>= 0.8.1-~~) <!nocheck>,
+ librust-proxmox-io-1+default-dev (>= 1.2.0-~~) <!nocheck>,
+ librust-sha2-0.10+default-dev <!nocheck>,
+ librust-tar-0.4+default-dev <!nocheck>,
+ librust-thiserror-1+default-dev <!nocheck>,
+ librust-zstd-0.13+default-dev <!nocheck>
+Maintainer: Proxmox Support Team <support at proxmox.com>
+Standards-Version: 4.7.0
+Vcs-Git: git://git.proxmox.com/git/proxmox.git
+Vcs-Browser: https://git.proxmox.com/?p=proxmox.git
+Homepage: https://proxmox.com
+X-Cargo-Crate: proxmox-oci
+Rules-Requires-Root: no
+
+Package: librust-proxmox-oci-dev
+Architecture: any
+Multi-Arch: same
+Depends:
+ ${misc:Depends},
+ librust-flate2-1+default-dev,
+ librust-oci-spec-0.8+default-dev (>= 0.8.1-~~),
+ librust-proxmox-io-1+default-dev (>= 1.2.0-~~),
+ librust-sha2-0.10+default-dev,
+ librust-tar-0.4+default-dev,
+ librust-thiserror-1+default-dev,
+ librust-zstd-0.13+default-dev
+Provides:
+ librust-proxmox-oci+default-dev (= ${binary:Version}),
+ librust-proxmox-oci-0-dev (= ${binary:Version}),
+ librust-proxmox-oci-0+default-dev (= ${binary:Version}),
+ librust-proxmox-oci-0.1-dev (= ${binary:Version}),
+ librust-proxmox-oci-0.1+default-dev (= ${binary:Version}),
+ librust-proxmox-oci-0.1.0-dev (= ${binary:Version}),
+ librust-proxmox-oci-0.1.0+default-dev (= ${binary:Version})
+Description: OCI image parsing and extraction - Rust source code
+ Source code for Debianized Rust crate "proxmox-oci"
diff --git a/proxmox-oci/debian/debcargo.toml b/proxmox-oci/debian/debcargo.toml
new file mode 100644
index 00000000..b7864cdb
--- /dev/null
+++ b/proxmox-oci/debian/debcargo.toml
@@ -0,0 +1,7 @@
+overlay = "."
+crate_src_path = ".."
+maintainer = "Proxmox Support Team <support at proxmox.com>"
+
+[source]
+vcs_git = "git://git.proxmox.com/git/proxmox.git"
+vcs_browser = "https://git.proxmox.com/?p=proxmox.git"
diff --git a/proxmox-oci/src/lib.rs b/proxmox-oci/src/lib.rs
new file mode 100644
index 00000000..b54d06e1
--- /dev/null
+++ b/proxmox-oci/src/lib.rs
@@ -0,0 +1,283 @@
+use std::collections::HashMap;
+use std::fs::{read_dir, remove_dir_all, remove_file, File};
+use std::io::{Read, Seek};
+use std::path::{Path, PathBuf};
+use std::str::FromStr;
+
+use flate2::read::GzDecoder;
+pub use oci_spec::image::Config;
+use oci_spec::image::{Arch, ImageConfiguration, ImageManifest, MediaType};
+use oci_spec::OciSpecError;
+use sha2::digest::generic_array::GenericArray;
+use sha2::{Digest, Sha256};
+use tar::{Archive, EntryType};
+use thiserror::Error;
+
+mod oci_tar_image;
+use oci_tar_image::OciTarImage;
+
+fn compute_digest<R: Read, H: Digest>(
+ mut reader: R,
+ mut hasher: H,
+) -> std::io::Result<GenericArray<u8, H::OutputSize>> {
+ let mut buf = proxmox_io::boxed::zeroed(32768);
+
+ loop {
+ let bytes_read = reader.read(&mut buf)?;
+ if bytes_read == 0 {
+ break Ok(hasher.finalize());
+ }
+
+ hasher.update(&buf[..bytes_read]);
+ }
+}
+
+fn compute_sha256<R: Read>(reader: R) -> std::io::Result<oci_spec::image::Sha256Digest> {
+ let digest = compute_digest(reader, Sha256::new())?;
+ Ok(oci_spec::image::Sha256Digest::from_str(&format!("{digest:x}")).unwrap())
+}
+
+/// Build a mapping from uncompressed layer digests (as found in the image config's `rootfs.diff_ids`)
+/// to their corresponding compressed-layer digests (i.e. the filenames under `blobs/<algorithm>/<digest>`)
+fn build_layer_map<R: Read + Seek>(
+ mut oci_tar_image: OciTarImage<R>,
+ image_manifest: &ImageManifest,
+) -> Result<
+ (
+ OciTarImage<R>,
+ HashMap<oci_spec::image::Digest, oci_spec::image::Descriptor>,
+ ),
+ ExtractError,
+> {
+ let mut layer_mapping = HashMap::new();
+
+ for layer in image_manifest.layers() {
+ let digest = match layer.media_type() {
+ MediaType::ImageLayer | MediaType::ImageLayerNonDistributable => layer.digest().clone(),
+ MediaType::ImageLayerGzip | MediaType::ImageLayerNonDistributableGzip => {
+ let mut compressed_blob = oci_tar_image
+ .open_blob(layer.digest())
+ .ok_or(ExtractError::MissingLayerFile(layer.digest().clone()))?;
+ let decoder = GzDecoder::new(&mut compressed_blob);
+ let hash = compute_sha256(decoder)?.into();
+ oci_tar_image = compressed_blob.into_oci_tar_image();
+ hash
+ }
+ MediaType::ImageLayerZstd | MediaType::ImageLayerNonDistributableZstd => {
+ let mut compressed_blob = oci_tar_image
+ .open_blob(layer.digest())
+ .ok_or(ExtractError::MissingLayerFile(layer.digest().clone()))?;
+ let decoder = zstd::Decoder::new(&mut compressed_blob)?;
+ let hash = compute_sha256(decoder)?.into();
+ oci_tar_image = compressed_blob.into_oci_tar_image();
+ hash
+ }
+ // Skip any other non-ImageLayer related media types.
+ // Match explicitly to avoid missing new image layer types when oci-spec updates.
+ MediaType::Descriptor
+ | MediaType::LayoutHeader
+ | MediaType::ImageManifest
+ | MediaType::ImageIndex
+ | MediaType::ImageConfig
+ | MediaType::ArtifactManifest
+ | MediaType::EmptyJSON
+ | MediaType::Other(_) => continue,
+ };
+
+ layer_mapping.insert(digest, layer.clone());
+ }
+
+ Ok((oci_tar_image, layer_mapping))
+}
+
+#[derive(Debug, Error)]
+pub enum ProxmoxOciError {
+ #[error("Error while parsing OCI image: {0}")]
+ ParseError(#[from] ParseError),
+ #[error("Error while extracting OCI image: {0}")]
+ ExtractError(#[from] ExtractError),
+}
+
+pub fn parse_and_extract_image<P: AsRef<Path>>(
+ oci_tar_path: P,
+ rootfs_path: P,
+) -> Result<Option<Config>, ProxmoxOciError> {
+ let (oci_tar_image, image_manifest, image_config) = parse_image(oci_tar_path)?;
+
+ extract_image_rootfs(oci_tar_image, &image_manifest, &image_config, rootfs_path)?;
+
+ Ok(image_config.config().clone())
+}
+
+#[derive(Debug, Error)]
+pub enum ParseError {
+ #[error("OCI spec error: {0}")]
+ OciSpec(#[from] OciSpecError),
+ #[error("Wrong media type")]
+ WrongMediaType,
+ #[error("IO error: {0}")]
+ Io(#[from] std::io::Error),
+ #[error("Unsupported CPU architecture")]
+ UnsupportedArchitecture,
+ #[error("Missing image config")]
+ MissingImageConfig,
+}
+
+fn parse_image<P: AsRef<Path>>(
+ oci_tar_path: P,
+) -> Result<(OciTarImage<File>, ImageManifest, ImageConfiguration), ParseError> {
+ let oci_tar_file = File::open(oci_tar_path)?;
+ let mut oci_tar_image = OciTarImage::new(oci_tar_file)?;
+
+ let image_manifest = oci_tar_image
+ .image_manifest(&Arch::Amd64)
+ .ok_or(ParseError::UnsupportedArchitecture)??;
+
+ let image_config_descriptor = image_manifest.config();
+
+ if image_config_descriptor.media_type() != &MediaType::ImageConfig {
+ return Err(ParseError::WrongMediaType);
+ }
+
+ let mut image_config_file = oci_tar_image
+ .open_blob(image_config_descriptor.digest())
+ .ok_or(ParseError::MissingImageConfig)?;
+ let image_config = ImageConfiguration::from_reader(&mut image_config_file)?;
+
+ Ok((
+ image_config_file.into_oci_tar_image(),
+ image_manifest,
+ image_config,
+ ))
+}
+
+#[derive(Debug, Error)]
+pub enum ExtractError {
+ #[error("Incorrectly formatted digest: \"{0}\"")]
+ InvalidDigest(String),
+ #[error("Unknown layer digest {0} found in rootfs.diff_ids")]
+ UnknownLayerDigest(oci_spec::image::Digest),
+ #[error("Layer file {0} mentioned in image manifest is missing")]
+ MissingLayerFile(oci_spec::image::Digest),
+ #[error("IO error: {0}")]
+ Io(#[from] std::io::Error),
+ #[error("Layer has wrong media type: {0}")]
+ WrongMediaType(String),
+}
+
+fn extract_image_rootfs<R: Read + Seek, P: AsRef<Path>>(
+ oci_tar_image: OciTarImage<R>,
+ image_manifest: &ImageManifest,
+ image_config: &ImageConfiguration,
+ target_path: P,
+) -> Result<(), ExtractError> {
+ let (mut oci_tar_image, layer_map) = build_layer_map(oci_tar_image, image_manifest)?;
+
+ for layer in image_config.rootfs().diff_ids() {
+ let layer_digest = oci_spec::image::Digest::from_str(layer)
+ .map_err(|_| ExtractError::InvalidDigest(layer.to_string()))?;
+ let layer_descriptor = layer_map
+ .get(&layer_digest)
+ .ok_or(ExtractError::UnknownLayerDigest(layer_digest.clone()))?;
+ let mut layer_file = oci_tar_image
+ .open_blob(layer_descriptor.digest())
+ .ok_or(ExtractError::MissingLayerFile(layer_digest))?;
+
+ let (whiteouts, opaque_whiteouts) = match layer_descriptor.media_type() {
+ MediaType::ImageLayer | MediaType::ImageLayerNonDistributable => {
+ extract_archive(&mut layer_file, &target_path)?
+ }
+ MediaType::ImageLayerGzip | MediaType::ImageLayerNonDistributableGzip => {
+ let mut gz_decoder = GzDecoder::new(&mut layer_file);
+ extract_archive(&mut gz_decoder, &target_path)?
+ }
+ MediaType::ImageLayerZstd | MediaType::ImageLayerNonDistributableZstd => {
+ let mut zstd_decoder = zstd::Decoder::new(&mut layer_file)?;
+ extract_archive(&mut zstd_decoder, &target_path)?
+ }
+ // Error on any other non-ImageLayer related media types.
+ // Match explicitly to avoid missing new image layer types when oci-spec updates.
+ media_type @ (MediaType::Descriptor
+ | MediaType::LayoutHeader
+ | MediaType::ImageManifest
+ | MediaType::ImageIndex
+ | MediaType::ImageConfig
+ | MediaType::ArtifactManifest
+ | MediaType::EmptyJSON
+ | MediaType::Other(_)) => {
+ return Err(ExtractError::WrongMediaType(media_type.to_string()))
+ }
+ };
+
+ oci_tar_image = layer_file.into_oci_tar_image();
+
+ for whiteout in whiteouts {
+ let wh_abs_path = target_path.as_ref().join(&whiteout);
+ remove_path(wh_abs_path)?;
+ }
+
+ for opaque_whiteout in opaque_whiteouts {
+ let wh_abs_path = target_path.as_ref().join(&opaque_whiteout);
+ for direntry in read_dir(wh_abs_path)? {
+ remove_path(direntry?.path())?;
+ }
+ }
+ }
+
+ Ok(())
+}
+
+fn extract_archive<R: Read, P: AsRef<Path>>(
+ reader: &mut R,
+ target_path: P,
+) -> std::io::Result<(Vec<PathBuf>, Vec<PathBuf>)> {
+ const WHITEOUT_PREFIX: &str = ".wh.";
+ const OPAQUE_WHITEOUT_NAME: &str = ".wh..wh..opq";
+
+ let mut archive = Archive::new(reader);
+ archive.set_preserve_ownerships(true);
+ archive.set_preserve_permissions(true);
+ archive.set_unpack_xattrs(true);
+ let mut directories = Vec::new();
+ let mut whiteouts = Vec::new();
+ let mut opaque_whiteouts = Vec::new();
+
+ for entry in archive.entries()? {
+ let mut file = entry?;
+ if file.header().entry_type() == EntryType::Directory {
+ directories.push(file);
+ } else {
+ let filepath = file.path()?.into_owned();
+ if let Some(filename) = filepath.file_name() {
+ if filename == OPAQUE_WHITEOUT_NAME {
+ if let Some(parent) = filepath.parent() {
+ opaque_whiteouts.push(parent.to_path_buf());
+ }
+ continue;
+ } else if let Some(filename) = filename.to_str() {
+ if let Some(filename_stripped) = filename.strip_prefix(WHITEOUT_PREFIX) {
+ whiteouts.push(filepath.with_file_name(filename_stripped));
+ continue;
+ }
+ }
+ }
+
+ file.unpack_in(&target_path)?;
+ }
+ }
+
+ directories.sort_by(|a, b| b.path_bytes().cmp(&a.path_bytes()));
+ for mut dir in directories {
+ dir.unpack_in(&target_path)?;
+ }
+
+ Ok((whiteouts, opaque_whiteouts))
+}
+
+fn remove_path(path: PathBuf) -> std::io::Result<()> {
+ if path.metadata()?.is_dir() {
+ remove_dir_all(path)
+ } else {
+ remove_file(path)
+ }
+}
diff --git a/proxmox-oci/src/oci_tar_image.rs b/proxmox-oci/src/oci_tar_image.rs
new file mode 100644
index 00000000..212f6b53
--- /dev/null
+++ b/proxmox-oci/src/oci_tar_image.rs
@@ -0,0 +1,145 @@
+use std::collections::HashMap;
+use std::io::{Read, Seek, SeekFrom};
+use std::ops::Range;
+use std::path::{Path, PathBuf};
+
+use oci_spec::image::{Arch, Digest, ImageIndex, ImageManifest, MediaType};
+use oci_spec::OciSpecError;
+use tar::Archive;
+
+use proxmox_io::RangeReader;
+
+#[derive(Clone)]
+struct TarEntry {
+ range: Range<u64>,
+}
+
+impl TarEntry {
+ fn new(range: Range<u64>) -> Self {
+ Self { range }
+ }
+}
+
+pub struct OciTarImage<R: Read + Seek> {
+ reader: R,
+ entries: HashMap<PathBuf, TarEntry>,
+ image_index: ImageIndex,
+}
+
+impl<R: Read + Seek> OciTarImage<R> {
+ pub fn new(reader: R) -> oci_spec::Result<Self> {
+ let mut archive = Archive::new(reader);
+ let entries = archive.entries_with_seek()?;
+ let mut entries_index = HashMap::new();
+ let mut image_index = None;
+
+ for entry in entries {
+ let mut entry = entry?;
+ let offset = entry.raw_file_position();
+ let size = entry.size();
+ let path = entry.path()?.into_owned();
+
+ if path.as_path() == Path::new("index.json") {
+ image_index = Some(ImageIndex::from_reader(&mut entry)?);
+ }
+
+ let tar_entry = TarEntry::new(offset..(offset + size));
+ entries_index.insert(path, tar_entry);
+ }
+
+ if let Some(image_index) = image_index {
+ Ok(Self {
+ reader: archive.into_inner(),
+ entries: entries_index,
+ image_index,
+ })
+ } else {
+ Err(OciSpecError::Other("Missing index.json file".into()))
+ }
+ }
+
+ pub fn image_index(&self) -> &ImageIndex {
+ &self.image_index
+ }
+
+ fn get_blob_entry(&self, digest: &Digest) -> Option<TarEntry> {
+ let path = get_blob_path(digest);
+ self.entries.get(&path).cloned()
+ }
+
+ pub fn open_blob(self, digest: &Digest) -> Option<OciTarImageBlob<R>> {
+ if let Some(entry) = self.get_blob_entry(digest) {
+ Some(OciTarImageBlob::new(self, entry.range))
+ } else {
+ None
+ }
+ }
+
+ pub fn image_manifest(
+ &mut self,
+ architecture: &Arch,
+ ) -> Option<oci_spec::Result<ImageManifest>> {
+ let digest = match self.image_index.manifests().iter().find(|&x| {
+ x.media_type() == &MediaType::ImageManifest
+ && x.platform()
+ .as_ref()
+ .is_none_or(|platform| platform.architecture() == architecture)
+ }) {
+ Some(descriptor) => descriptor.digest(),
+ None => return None,
+ };
+
+ if let Some(entry) = self.get_blob_entry(digest) {
+ let mut range_reader = RangeReader::new(&mut self.reader, entry.range);
+ Some(ImageManifest::from_reader(&mut range_reader))
+ } else {
+ Some(Err(OciSpecError::Other(format!(
+ "Image manifest with digest {digest} mentioned in image index is missing"
+ ))))
+ }
+ }
+}
+
+fn get_blob_path(digest: &Digest) -> PathBuf {
+ let algorithm = digest.algorithm();
+ let digest = digest.digest();
+ format!("blobs/{algorithm}/{digest}").into()
+}
+
+pub struct OciTarImageBlob<R: Read + Seek> {
+ range_reader: RangeReader<R>,
+ entries: HashMap<PathBuf, TarEntry>,
+ image_index: ImageIndex,
+}
+
+impl<R: Read + Seek> OciTarImageBlob<R> {
+ fn new(archive: OciTarImage<R>, range: Range<u64>) -> Self {
+ let range_reader = RangeReader::new(archive.reader, range);
+
+ Self {
+ range_reader,
+ entries: archive.entries,
+ image_index: archive.image_index,
+ }
+ }
+
+ pub fn into_oci_tar_image(self) -> OciTarImage<R> {
+ OciTarImage {
+ reader: self.range_reader.into_inner(),
+ entries: self.entries,
+ image_index: self.image_index,
+ }
+ }
+}
+
+impl<R: Read + Seek> Read for OciTarImageBlob<R> {
+ fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
+ self.range_reader.read(buf)
+ }
+}
+
+impl<R: Read + Seek> Seek for OciTarImageBlob<R> {
+ fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
+ self.range_reader.seek(pos)
+ }
+}
--
2.47.2
More information about the pve-devel
mailing list