[pve-devel] [RFC v2 qemu-server 3/4] fix #3075: add TPM v1.2 and v2.0 support via swtpm

Stefan Reiter s.reiter at proxmox.com
Thu Sep 23 16:54:58 CEST 2021


Starts an instance of swtpm per VM in it's systemd scope, it will
terminate by itself if the VM exits, or be terminated manually if
startup fails.

Before first use, a TPM state is created via swtpm_setup. State is
stored in a 'tpmstate0' volume, treated much the same way as an efidisk.

It is migrated 'offline', the important part here is the creation of the
target volume, the actual data transfer happens via the QEMU device
state migration process.

Move-disk can only work offline, as the disk is not registered with
QEMU, so 'drive-mirror' wouldn't work. swtpm itself has no method of
moving a backing storage at runtime.

Signed-off-by: Stefan Reiter <s.reiter at proxmox.com>
---
 PVE/API2/Qemu.pm        |   5 ++
 PVE/QemuMigrate.pm      |  14 ++++-
 PVE/QemuServer.pm       | 135 +++++++++++++++++++++++++++++++++++++---
 PVE/QemuServer/Drive.pm |  63 +++++++++++++++----
 4 files changed, 198 insertions(+), 19 deletions(-)

diff --git a/PVE/API2/Qemu.pm b/PVE/API2/Qemu.pm
index ef0d877..dc089b9 100644
--- a/PVE/API2/Qemu.pm
+++ b/PVE/API2/Qemu.pm
@@ -184,6 +184,11 @@ my $create_disks = sub {
 	    my $volid;
 	    if ($ds eq 'efidisk0') {
 		($volid, $size) = PVE::QemuServer::create_efidisk($storecfg, $storeid, $vmid, $fmt, $arch);
+	    } elsif ($ds eq 'tpmstate0') {
+		# swtpm can only use raw volumes, and uses a fixed size
+		$size = PVE::Tools::convert_size(PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE, 'b' => 'kb');
+		$volid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid,
+		    "raw", undef, $size);
 	    } else {
 		$volid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $fmt, undef, $size);
 	    }
diff --git a/PVE/QemuMigrate.pm b/PVE/QemuMigrate.pm
index 5ecc2a7..0df7c38 100644
--- a/PVE/QemuMigrate.pm
+++ b/PVE/QemuMigrate.pm
@@ -481,6 +481,7 @@ sub scan_local_volumes {
 
 	    $local_volumes->{$volid}->{ref} = $attr->{referenced_in_config} ? 'config' : 'snapshot';
 	    $local_volumes->{$volid}->{ref} = 'storage' if $attr->{is_unused};
+	    $local_volumes->{$volid}->{ref} = 'generated' if $attr->{is_tpmstate};
 
 	    $local_volumes->{$volid}->{is_vmstate} = $attr->{is_vmstate} ? 1 : 0;
 
@@ -580,6 +581,9 @@ sub scan_local_volumes {
 		$local_volumes->{$volid}->{migration_mode} = 'online';
 	    } elsif ($self->{running} && $ref eq 'generated') {
 		# offline migrate the cloud-init ISO and don't regenerate on VM start
+		#
+		# tpmstate will also be offline migrated first, and in case of
+		# live migration then updated by QEMU/swtpm if necessary
 		$local_volumes->{$volid}->{migration_mode} = 'offline';
 	    } else {
 		$local_volumes->{$volid}->{migration_mode} = 'offline';
@@ -641,7 +645,9 @@ sub config_update_local_disksizes {
 
     PVE::QemuConfig->foreach_volume($conf, sub {
 	my ($key, $drive) = @_;
-	return if $key eq 'efidisk0'; # skip efidisk, will be handled later
+	# skip special disks, will be handled later
+	return if $key eq 'efidisk0';
+	return if $key eq 'tpmstate0';
 
 	my $volid = $drive->{file};
 	return if !defined($local_volumes->{$volid}); # only update sizes for local volumes
@@ -658,6 +664,12 @@ sub config_update_local_disksizes {
     if (defined($conf->{efidisk0})) {
 	PVE::QemuServer::update_efidisk_size($conf);
     }
+
+    # TPM state might have an irregular filesize, to avoid problems on transfer
+    # we always assume the static size of 4M to allocate on the target
+    if (defined($conf->{tpmstate0})) {
+	PVE::QemuServer::update_tpmstate_size($conf);
+    }
 }
 
 sub filter_local_volumes {
diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
index cc73af8..89c248d 100644
--- a/PVE/QemuServer.pm
+++ b/PVE/QemuServer.pm
@@ -1143,7 +1143,8 @@ PVE::JSONSchema::register_format('pve-qm-bootdev', \&verify_bootdev);
 sub verify_bootdev {
     my ($dev, $noerr) = @_;
 
-    return $dev if PVE::QemuServer::Drive::is_valid_drivename($dev) && $dev !~ m/^efidisk/;
+    my $special = $dev =~ m/^efidisk/ || $dev =~ m/^tpmstate/;
+    return $dev if PVE::QemuServer::Drive::is_valid_drivename($dev) && !$special;
 
     my $check = sub {
 	my ($base) = @_;
@@ -2961,6 +2962,96 @@ sub audio_devs {
     return $devs;
 }
 
+sub get_tpm_paths {
+    my ($vmid) = @_;
+    return {
+	socket => "/var/run/qemu-server/$vmid.swtpm",
+	pid => "/var/run/qemu-server/$vmid.swtpm.pid",
+    };
+}
+
+sub add_tpm_device {
+    my ($vmid, $devices, $conf) = @_;
+
+    return if !$conf->{tpmstate0};
+
+    my $paths = get_tpm_paths($vmid);
+
+    push @$devices, "-chardev", "socket,id=tpmchar,path=$paths->{socket}";
+    push @$devices, "-tpmdev", "emulator,id=tpmdev,chardev=tpmchar";
+    push @$devices, "-device", "tpm-tis,tpmdev=tpmdev";
+}
+
+sub start_swtpm {
+    my ($storecfg, $vmid, $tpmdrive, $migration) = @_;
+
+    return if !$tpmdrive;
+
+    my $state;
+    my $tpm = parse_drive("tpmstate0", $tpmdrive);
+    my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
+    if ($storeid) {
+	$state = PVE::Storage::path($storecfg, $tpm->{file});
+    } else {
+	$state = $tpm->{file};
+    }
+
+    # FIXME (krbd should work fine)
+    die "TPM on RBD is not supported\n" if $tpm =~ m/^rbd:/;
+
+    my $paths = get_tpm_paths($vmid);
+
+    # during migration, we will get state from remote
+    #
+    # FIXME: Disabled for now, we use the default-generated TPM state, since
+    # swtpm_setup doesn't support the file:// backend yet!
+    if (!$migration && 0) {
+	# run swtpm_setup to create a new TPM state if it doesn't exist yet
+	my $setup_cmd = [
+	    "swtpm_setup",
+	    "--tpmstate",
+	    "backend-uri=file://$state",
+	    "--createek",
+	    "--create-ek-cert",
+	    "--create-platform-cert",
+	    "--lock-nvram",
+	    "--config",
+	    "/etc/swtpm_setup.conf", # do not use XDG configs
+	    "--runas",
+	    "0", # force creation as root, error if not possible
+	];
+
+	push @$setup_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
+	# TPM 2.0 supports ECC crypto, use if possible
+	push @$setup_cmd, "--ecc" if $tpm->{version} eq 'v2.0';
+
+	# produces a lot of verbose output, only show on error
+	my $tpmout = "";
+	run_command($setup_cmd, outfunc => sub {
+	    $tpmout .= $1 . "\n";
+	});
+    }
+
+    my $emulator_cmd = [
+	"swtpm",
+	"socket",
+	"--tpmstate",
+	"backend-uri=file://$state,mode=0600",
+	"--ctrl",
+	"type=unixio,path=$paths->{socket},mode=0600",
+	"--pid",
+	"file=$paths->{pid}",
+	"--terminate", # terminate on QEMU disconnect
+	"--daemon",
+    ];
+    push @$emulator_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
+    run_command($emulator_cmd, outfunc => sub { print $1; });
+
+    # return untainted PID of swtpm daemon so it can be killed on error
+    file_read_firstline($paths->{pid}) =~ m/(\d+)/;
+    return $1;
+}
+
 sub vga_conf_has_spice {
     my ($vga) = @_;
 
@@ -3462,6 +3553,8 @@ sub config_to_command {
 	push @$devices, @$audio_devs;
     }
 
+    add_tpm_device($vmid, $devices, $conf);
+
     my $sockets = 1;
     $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
     $sockets = $conf->{sockets} if  $conf->{sockets};
@@ -3658,6 +3751,8 @@ sub config_to_command {
 
 	# ignore efidisk here, already added in bios/fw handling code above
 	return if $drive->{interface} eq 'efidisk';
+	# similar for TPM
+	return if $drive->{interface} eq 'tpmstate';
 
 	$use_virtio = 1 if $ds =~ m/^virtio/;
 
@@ -4530,6 +4625,9 @@ sub foreach_volid {
 	$volhash->{$volid}->{is_vmstate} //= 0;
 	$volhash->{$volid}->{is_vmstate} = 1 if $key eq 'vmstate';
 
+	$volhash->{$volid}->{is_tpmstate} //= 0;
+	$volhash->{$volid}->{is_tpmstate} = 1 if $key eq 'tpmstate0';
+
 	$volhash->{$volid}->{is_unused} //= 0;
 	$volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/;
 
@@ -4727,7 +4825,7 @@ sub vmconfig_hotplug_pending {
 		vmconfig_update_net($storecfg, $conf, $hotplug_features->{network},
 				    $vmid, $opt, $value, $arch, $machine_type);
 	    } elsif (is_valid_drivename($opt)) {
-		die "skip\n" if $opt eq 'efidisk0';
+		die "skip\n" if $opt eq 'efidisk0' || $opt eq 'tpmstate0';
 		# some changes can be done without hotplug
 		my $drive = parse_drive($opt, $value);
 		if (drive_is_cloudinit($drive)) {
@@ -5347,8 +5445,17 @@ sub vm_start_nolock {
 	PVE::Tools::run_fork sub {
 	    PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %properties);
 
+	    my $tpmpid;
+	    if (my $tpm = $conf->{tpmstate0}) {
+		# start the TPM emulator so QEMU can connect on start
+		$tpmpid = start_swtpm($storecfg, $vmid, $tpm, $migratedfrom);
+	    }
+
 	    my $exitcode = run_command($cmd, %run_params);
-	    die "QEMU exited with code $exitcode\n" if $exitcode;
+	    if ($exitcode) {
+		kill 'TERM', $tpmpid if $tpmpid;
+		die "QEMU exited with code $exitcode\n";
+	    }
 	};
     };
 
@@ -6085,7 +6192,7 @@ sub restore_update_config_line {
 	$net->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if $net->{macaddr};
 	$netstr = print_net($net);
 	$res .= "$id: $netstr\n";
-    } elsif ($line =~ m/^((ide|scsi|virtio|sata|efidisk)\d+):\s*(\S+)\s*$/) {
+    } elsif ($line =~ m/^((ide|scsi|virtio|sata|efidisk|tpmstate)\d+):\s*(\S+)\s*$/) {
 	my $virtdev = $1;
 	my $value = $3;
 	my $di = parse_drive($virtdev, $value);
@@ -6403,8 +6510,8 @@ sub restore_proxmox_backup_archive {
 	    my $volid = $d->{volid};
 	    my $path = PVE::Storage::path($storecfg, $volid);
 
-	    # for live-restore we only want to preload the efidisk
-	    next if $options->{live} && $virtdev ne 'efidisk0';
+	    # for live-restore we only want to preload the efidisk and TPM state
+	    next if $options->{live} && $virtdev ne 'efidisk0' && $virtdev ne 'tpmstate0';
 
 	    my $pbs_restore_cmd = [
 		'/usr/bin/pbs-restore',
@@ -6479,7 +6586,9 @@ sub restore_proxmox_backup_archive {
 	my $conf = PVE::QemuConfig->load_config($vmid);
 	die "cannot do live-restore for template\n" if PVE::QemuConfig->is_template($conf);
 
-	delete $devinfo->{'drive-efidisk0'}; # this special drive is already restored before start
+	# these special drives are already restored before start
+	delete $devinfo->{'drive-efidisk0'};
+	delete $devinfo->{'drive-tpmstate0'};
 	pbs_live_restore($vmid, $conf, $storecfg, $devinfo, $repo, $keyfile, $pbs_backup_name);
 
 	PVE::QemuConfig->remove_lock($vmid, "create");
@@ -7313,6 +7422,8 @@ sub clone_disk {
 	    $size = PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE;
 	} elsif ($drivename eq 'efidisk0') {
 	    $size = get_efivars_size($conf);
+	} elsif ($drivename eq 'tpmstate0') {
+	    $size = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
 	} else {
 	    ($size) = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 10);
 	}
@@ -7353,6 +7464,8 @@ sub clone_disk {
 	    }
 	} else {
 
+	    die "cannot move TPM state while VM is running\n" if $drivename eq 'tpmstate0';
+
 	    my $kvmver = get_running_qemu_version ($vmid);
 	    if (!min_version($kvmver, 2, 7)) {
 		die "drive-mirror with iothread requires qemu version 2.7 or higher\n"
@@ -7423,6 +7536,14 @@ sub update_efidisk_size {
     return;
 }
 
+sub update_tpmstate_size {
+    my ($conf) = @_;
+
+    my $disk = PVE::QemuServer::parse_drive('tpmstate0', $conf->{tpmstate0});
+    $disk->{size} = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
+    $conf->{tpmstate0} = print_drive($disk);
+}
+
 sub create_efidisk($$$$$) {
     my ($storecfg, $storeid, $vmid, $fmt, $arch) = @_;
 
diff --git a/PVE/QemuServer/Drive.pm b/PVE/QemuServer/Drive.pm
index 5110190..32c7377 100644
--- a/PVE/QemuServer/Drive.pm
+++ b/PVE/QemuServer/Drive.pm
@@ -306,16 +306,6 @@ my $virtiodesc = {
 };
 PVE::JSONSchema::register_standard_option("pve-qm-virtio", $virtiodesc);
 
-my $alldrive_fmt = {
-    %drivedesc_base,
-    %iothread_fmt,
-    %model_fmt,
-    %queues_fmt,
-    %scsiblock_fmt,
-    %ssd_fmt,
-    %wwn_fmt,
-};
-
 my $efidisk_fmt = {
     volume => { alias => 'file' },
     file => {
@@ -345,6 +335,55 @@ my $efidisk_desc = {
 
 PVE::JSONSchema::register_standard_option("pve-qm-efidisk", $efidisk_desc);
 
+my %tpmversion_fmt = (
+    version => {
+	type => 'string',
+	enum => [qw(v1.2 v2.0)],
+	description => "The TPM interface version. v2.0 is newer and should be "
+		     . "preferred. Note that this cannot be changed later on.",
+	optional => 1,
+	default => 'v2.0',
+    },
+);
+my $tpmstate_fmt = {
+    volume => { alias => 'file' },
+    file => {
+	type => 'string',
+	format => 'pve-volume-id-or-qm-path',
+	default_key => 1,
+	format_description => 'volume',
+	description => "The drive's backing volume.",
+    },
+    size => {
+	type => 'string',
+	format => 'disk-size',
+	format_description => 'DiskSize',
+	description => "Disk size. This is purely informational and has no effect.",
+	optional => 1,
+    },
+    %tpmversion_fmt,
+};
+my $tpmstate_desc = {
+    optional => 1,
+    type => 'string', format => $tpmstate_fmt,
+    description => "Configure a Disk for storing TPM state. " .
+	$ALLOCATION_SYNTAX_DESC . " Note that SIZE_IN_GiB is ignored here " .
+	"and that the default size of 4 MiB will always be used instead. The " .
+	"format is also fixed to 'raw'.",
+};
+use constant TPMSTATE_DISK_SIZE => 4 * 1024 * 1024;
+
+my $alldrive_fmt = {
+    %drivedesc_base,
+    %iothread_fmt,
+    %model_fmt,
+    %queues_fmt,
+    %scsiblock_fmt,
+    %ssd_fmt,
+    %wwn_fmt,
+    %tpmversion_fmt,
+};
+
 my $unused_fmt = {
     volume => { alias => 'file' },
     file => {
@@ -379,6 +418,7 @@ for (my $i = 0; $i < $MAX_VIRTIO_DISKS; $i++)  {
 }
 
 $drivedesc_hash->{efidisk0} = $efidisk_desc;
+$drivedesc_hash->{tpmstate0} = $tpmstate_desc;
 
 for (my $i = 0; $i < $MAX_UNUSED_DISKS; $i++) {
     $drivedesc_hash->{"unused$i"} = $unuseddesc;
@@ -390,7 +430,8 @@ sub valid_drive_names {
             (map { "scsi$_" } (0 .. ($MAX_SCSI_DISKS - 1))),
             (map { "virtio$_" } (0 .. ($MAX_VIRTIO_DISKS - 1))),
             (map { "sata$_" } (0 .. ($MAX_SATA_DISKS - 1))),
-            'efidisk0');
+            'efidisk0',
+            'tpmstate0');
 }
 
 sub is_valid_drivename {
-- 
2.30.2






More information about the pve-devel mailing list