[pve-devel] [PATCH container v2] setup: fix architecture detection for NixOS containers

Christoph Heiss c.heiss at proxmox.com
Mon Sep 25 13:38:49 CEST 2023


NixOS is special and deviates in many places from a "standard" Linux
system. In this case, /bin/sh does not exist in the filesystem, before
the initial activation (aka. first boot) - which creates a symlink at
/bin/sh.

Due to the currently existing fallback code, only an error message is
logged and the architecture is defaulted to x86_64. Still, this is not
something users might expect.

Thus try a bit harder to detect the architecture for NixOS containers by
inspecting the init script, which contains a shebang-line with the full
path to the system shell.

This moves the architecture detection code to the end of the container
creation lifecycle, so that it can be implemented as a plugin
subroutine. Therefore this mechanism is now generic enough that it can
be adapted to other container OS's in the future if needed. AFAICS
`arch` is only used when writing the actual LXC config, so determining
it later during creation does not change anything.

detect_architecture() has been made a bit more generic; the LXC-specific
error was moved out of this function, as well as the chroot(). Ensuring
that it is executed from the correct rootdir/chroot should be handled by
the caller.

Tested by creating a NixOS and a Debian container (to verify that
nothing regressed) and checking if the warning "Architecure detection
failed: [..]" no longer appears for the NixOS CT and if  `arch` in the
CT config is correct. Also tested restoring both containers from a local
and a PBS backup, as well as migrating both container.

Signed-off-by: Christoph Heiss <c.heiss at proxmox.com>
---
v1: https://lists.proxmox.com/pipermail/pve-devel/2023-February/055949.html

Changes since v1:
  * Moved detect_architecture() to PVE::LXC::Tools to avoid a cyclic
    include
  * Properly log/report errors from detect_architecture()

 src/PVE/LXC/Create.pm       | 76 -------------------------------------
 src/PVE/LXC/Setup.pm        | 18 +++++++++
 src/PVE/LXC/Setup/Base.pm   |  9 +++++
 src/PVE/LXC/Setup/NixOS.pm  | 17 +++++++++
 src/PVE/LXC/Setup/Plugin.pm |  5 +++
 src/PVE/LXC/Tools.pm        | 50 ++++++++++++++++++++++++
 6 files changed, 99 insertions(+), 76 deletions(-)

diff --git a/src/PVE/LXC/Create.pm b/src/PVE/LXC/Create.pm
index f4c3220..277c6a9 100644
--- a/src/PVE/LXC/Create.pm
+++ b/src/PVE/LXC/Create.pm
@@ -16,72 +16,6 @@ use PVE::VZDump::ConvertOVZ;
 use PVE::Tools;
 use POSIX;

-sub detect_architecture {
-    my ($rootdir) = @_;
-
-    # see https://en.wikipedia.org/wiki/Executable_and_Linkable_Format
-
-    my $supported_elf_machine = {
-	0x03 => 'i386',
-	0x3e => 'amd64',
-	0x28 => 'armhf',
-	0xb7 => 'arm64',
-	0xf3 => 'riscv',
-    };
-
-    my $elf_fn = '/bin/sh'; # '/bin/sh' is POSIX mandatory
-    my $detect_arch = sub {
-	# chroot avoids a problem where we check the binary of the host system
-	# if $elf_fn is an absolut symlink (e.g. $rootdir/bin/sh -> /bin/bash)
-	chroot($rootdir) or die "chroot '$rootdir' failed: $!\n";
-	chdir('/') or die "failed to change to root directory\n";
-
-	open(my $fh, "<", $elf_fn) or die "open '$elf_fn' failed: $!\n";
-	binmode($fh);
-
-	my $length = read($fh, my $data, 20) or die "read failed: $!\n";
-
-	# 4 bytes ELF magic number and 1 byte ELF class, padding, machine
-	my ($magic, $class, undef, $machine) = unpack("A4CA12n", $data);
-
-	die "'$elf_fn' does not resolve to an ELF!\n"
-	    if (!defined($class) || !defined($magic) || $magic ne "\177ELF");
-
-	my $arch = $supported_elf_machine->{$machine};
-	die "'$elf_fn' has unknown ELF machine '$machine'!\n"
-	    if !defined($arch);
-
-	if ($arch eq 'riscv') {
-	    if ($class eq 1) {
-		$arch = 'riscv32';
-	    } elsif ($class eq 2) {
-		$arch = 'riscv64';
-	    } else {
-		die "'$elf_fn' has invalid class '$class'!\n";
-	    }
-	}
-
-	return $arch;
-    };
-
-    my $arch = eval { PVE::Tools::run_fork_with_timeout(10, $detect_arch); };
-    my $err = $@;
-
-    if (!defined($arch) && !defined($err)) {
-	# on timeout
-	die "Architecture detection failed: timeout\n";
-    } elsif ($err) {
-	# any other error
-	$arch = 'amd64';
-	print "Architecture detection failed: $err\nFalling back to $arch.\n" .
-	      "Use `pct set VMID --arch ARCH` to change.\n";
-    } else {
-	print "Detected container architecture: $arch\n";
-    }
-
-    return $arch;
-}
-
 sub restore_archive {
     my ($storage_cfg, $archive, $rootdir, $conf, $no_unpack_error, $bwlimit) = @_;

@@ -122,11 +56,6 @@ sub restore_proxmox_backup_archive {

     PVE::Storage::PBSPlugin::run_raw_client_cmd(
 	$scfg, $storeid, $cmd, $param, userns_cmd => $userns_cmd);
-
-    # if arch is set, we do not try to autodetect it
-    return if defined($conf->{arch});
-
-    $conf->{arch} = detect_architecture($rootdir);
 }

 sub restore_tar_archive {
@@ -187,11 +116,6 @@ sub restore_tar_archive {
     my $err = $@;
     close($archive_fh) if defined $archive_fh;
     die $err if $err && !$no_unpack_error;
-
-    # if arch is set, we do not try to autodetect it
-    return if defined($conf->{arch});
-
-    $conf->{arch} = detect_architecture($rootdir);
 }

 sub recover_config {
diff --git a/src/PVE/LXC/Setup.pm b/src/PVE/LXC/Setup.pm
index 891231f..c6a5fe9 100644
--- a/src/PVE/LXC/Setup.pm
+++ b/src/PVE/LXC/Setup.pm
@@ -131,6 +131,24 @@ sub new {
 	$plugin->{rootgid} = $rootgid;
     }

+    # if arch is unset, we try to autodetect it
+    if (!defined($conf->{arch})) {
+	my $arch = eval { $self->protected_call(sub { $plugin->detect_architecture() }) };
+
+	if (my $err = $@) {
+	    warn "Architecture detection failed: $err" if $err;
+	}
+
+	if (!defined($arch)) {
+	    $arch = 'amd64';
+	    print "Falling back to $arch.\nUse `pct set VMID --arch ARCH` to change.\n";
+	} else {
+	    print "Detected container architecture: $arch\n";
+	}
+
+	$conf->{arch} = $arch;
+    }
+
     return $self;
 }

diff --git a/src/PVE/LXC/Setup/Base.pm b/src/PVE/LXC/Setup/Base.pm
index b8f07ea..38f0d68 100644
--- a/src/PVE/LXC/Setup/Base.pm
+++ b/src/PVE/LXC/Setup/Base.pm
@@ -19,6 +19,8 @@ use PVE::Tools;
 use PVE::Network;

 use PVE::LXC::Setup::Plugin;
+use PVE::LXC::Tools;
+
 use base qw(PVE::LXC::Setup::Plugin);

 sub new {
@@ -608,6 +610,13 @@ sub ssh_host_key_types_to_generate {
     };
 }

+sub detect_architecture {
+    my ($self) = @_;
+
+    # '/bin/sh' is POSIX mandatory
+    return PVE::LXC::Tools::detect_elf_architecture('/bin/sh');
+}
+
 sub pre_start_hook {
     my ($self, $conf) = @_;

diff --git a/src/PVE/LXC/Setup/NixOS.pm b/src/PVE/LXC/Setup/NixOS.pm
index 845d2d5..c702f3d 100644
--- a/src/PVE/LXC/Setup/NixOS.pm
+++ b/src/PVE/LXC/Setup/NixOS.pm
@@ -6,6 +6,7 @@ use warnings;
 use File::Path 'make_path';

 use PVE::LXC::Setup::Base;
+use PVE::LXC::Tools;

 use base qw(PVE::LXC::Setup::Base);

@@ -37,4 +38,20 @@ sub setup_init {
     my ($self, $conf) = @_;
 }

+sub detect_architecture {
+    my ($self) = @_;
+
+    # /bin/sh only exists as a symlink after the initial system activaction on first boot.
+    # To detect the actual architecture of the system, examine the shebang line of the /sbin/init
+    # script, which has the full path to the system shell.
+    my $init_path = '/sbin/init';
+    open(my $fh, '<', $init_path) or die "open '$init_path' failed: $!\n";
+
+    if (<$fh> =~ /^#! ?(\S*)/) {
+	return PVE::LXC::Tools::detect_elf_architecture($1);
+    }
+
+    die "could not find a shell\n";
+}
+
 1;
diff --git a/src/PVE/LXC/Setup/Plugin.pm b/src/PVE/LXC/Setup/Plugin.pm
index 3d968e7..b9d9c2d 100644
--- a/src/PVE/LXC/Setup/Plugin.pm
+++ b/src/PVE/LXC/Setup/Plugin.pm
@@ -62,6 +62,11 @@ sub ssh_host_key_types_to_generate {
     croak "implement me in sub-class\n";
 }

+sub detect_architecture {
+    my ($self) = @_;
+    croak "implement me in sub-class\n";
+}
+
 # hooks

 sub pre_start_hook {
diff --git a/src/PVE/LXC/Tools.pm b/src/PVE/LXC/Tools.pm
index 1d83768..fdda4e3 100644
--- a/src/PVE/LXC/Tools.pm
+++ b/src/PVE/LXC/Tools.pm
@@ -150,4 +150,54 @@ sub can_use_new_mount_api() {
     return $cached_can_use_new_mount_api;
 }

+# Tries to the architecture of an executable file based on its ELF header.
+sub detect_elf_architecture {
+    my ($elf_fn) = @_;
+
+    # see https://en.wikipedia.org/wiki/Executable_and_Linkable_Format
+
+    my $supported_elf_machine = {
+	0x03 => 'i386',
+	0x3e => 'amd64',
+	0x28 => 'armhf',
+	0xb7 => 'arm64',
+	0xf3 => 'riscv',
+    };
+
+    my $detect_arch = sub {
+	open(my $fh, "<", $elf_fn) or die "open '$elf_fn' failed: $!\n";
+	binmode($fh);
+
+	my $length = read($fh, my $data, 20) or die "read failed: $!\n";
+
+	# 4 bytes ELF magic number and 1 byte ELF class, padding, machine
+	my ($magic, $class, undef, $machine) = unpack("A4CA12n", $data);
+
+	die "'$elf_fn' does not resolve to an ELF!\n"
+	    if (!defined($class) || !defined($magic) || $magic ne "\177ELF");
+
+	my $arch = $supported_elf_machine->{$machine};
+	die "'$elf_fn' has unknown ELF machine '$machine'!\n"
+	    if !defined($arch);
+
+	if ($arch eq 'riscv') {
+	    if ($class eq 1) {
+		$arch = 'riscv32';
+	    } elsif ($class eq 2) {
+		$arch = 'riscv64';
+	    } else {
+		die "'$elf_fn' has invalid class '$class'!\n";
+	    }
+	}
+
+	return $arch;
+    };
+
+    my $arch = eval { PVE::Tools::run_fork_with_timeout(10, $detect_arch); };
+    my $err = $@ // "timeout\n";
+    die $err if !defined($arch);
+
+    return $arch;
+}
+
 1;
--
2.41.0






More information about the pve-devel mailing list