[pve-devel] [RFC manager 2/2] upgrade checklist

Fabian Grünbichler f.gruenbichler at proxmox.com
Mon Jun 24 13:56:40 CEST 2019


Signed-off-by: Fabian Grünbichler <f.gruenbichler at proxmox.com>
---
Notes:
    leftout on purpose:
    - checking of sources.list (no parser, lots of false negatives, needs to happen after upgrade to corosync 3)
    
    still missing for PVE 6.x / post-upgrade version:
    - modification of checked versions
    - ceph-volume scan on managed nodes with OSDs
    
    still missing for PVE 6.x / post-reboot version:
    - check for running kernel
    
    suggestions for additional checks/adaptations very welcome!

    to actually install and test, the usual build cycle with pve-docs needs to
    be manually broken. alternatively, manual copying/execution works fine as
    well ;)

 PVE/CLI/Makefile   |   2 +-
 bin/Makefile       |   3 +-
 PVE/CLI/pve5to6.pm | 370 +++++++++++++++++++++++++++++++++++++++++++++
 bin/pve5to6        |  10 ++
 4 files changed, 383 insertions(+), 2 deletions(-)
 create mode 100644 PVE/CLI/pve5to6.pm
 create mode 100755 bin/pve5to6

diff --git a/PVE/CLI/Makefile b/PVE/CLI/Makefile
index 93b3f3c6..7e9ae0df 100644
--- a/PVE/CLI/Makefile
+++ b/PVE/CLI/Makefile
@@ -1,6 +1,6 @@
 include ../../defines.mk
 
-SOURCES=vzdump.pm pvesubscription.pm pveceph.pm pveam.pm pvesr.pm pvenode.pm pvesh.pm
+SOURCES=vzdump.pm pvesubscription.pm pveceph.pm pveam.pm pvesr.pm pvenode.pm pvesh.pm pve5to6.pm
 
 all:
 
diff --git a/bin/Makefile b/bin/Makefile
index 52044ca9..31229477 100644
--- a/bin/Makefile
+++ b/bin/Makefile
@@ -7,7 +7,7 @@ PERL_DOC_INC_DIRS=..
 include /usr/share/pve-doc-generator/pve-doc-generator.mk
 
 SERVICES = pvestatd pveproxy pvedaemon spiceproxy
-CLITOOLS = vzdump pvesubscription pveceph pveam pvesr pvenode pvesh
+CLITOOLS = vzdump pvesubscription pveceph pveam pvesr pvenode pvesh pve5to6
 
 SCRIPTS =  			\
 	${SERVICES}		\
@@ -48,6 +48,7 @@ all: ${SERVICE_MANS} ${CLI_MANS} pvemailforward
 	podselect $* > $@.tmp
 	mv $@.tmp $@
 
+pve5to6.1.pod: pve5to6
 pveversion.1.pod: pveversion
 pveupgrade.1.pod: pveupgrade
 pvereport.1.pod: pvereport
diff --git a/PVE/CLI/pve5to6.pm b/PVE/CLI/pve5to6.pm
new file mode 100644
index 00000000..98981299
--- /dev/null
+++ b/PVE/CLI/pve5to6.pm
@@ -0,0 +1,370 @@
+package PVE::CLI::pve5to6;
+
+use strict;
+use warnings;
+
+use PVE::API2::APT;
+use PVE::API2::Ceph;
+use PVE::API2::LXC;
+use PVE::API2::Qemu;
+
+use PVE::Ceph::Tools;
+use PVE::Cluster;
+use PVE::Corosync;
+use PVE::INotify;
+use PVE::JSONSchema;
+use PVE::RPCEnvironment;
+use PVE::Storage;
+use PVE::Tools;
+
+use PVE::CLIHandler;
+
+use base qw(PVE::CLIHandler);
+
+my $nodename = PVE::INotify::nodename();
+
+sub setup_environment {
+    PVE::RPCEnvironment->setup_default_cli_env();
+}
+
+my $min_pve_rel = '5.4';
+my $min_pve_pkgrel = 2;
+
+my $counters = {
+    pass => 0,
+    skip => 0,
+    warn => 0,
+    fail => 0,
+};
+
+my $log_line = sub {
+    my ($level, $line) = @_;
+
+    $counters->{$level}++ if defined($level) && defined($counters->{$level});
+
+    print uc($level), ': ' if defined($level);
+    print "$line\n";
+};
+
+sub log_pass {
+    $log_line->('pass', @_);
+}
+
+sub log_info {
+    $log_line->('info', @_);
+}
+sub log_skip {
+    $log_line->('skip', @_);
+}
+sub log_warn {
+    $log_line->('warn', @_);
+}
+sub log_fail {
+    $log_line->('fail', @_);
+}
+
+my $get_pkg = sub {
+    my ($pkg) = @_;
+
+    my $versions = eval { PVE::API2::APT->versions({ node => $nodename }); };
+
+    if (!defined($versions)) {
+	my $msg = "unable to retrieve package version information";
+	$msg .= "- $@" if $@;
+	log_fail("$msg");
+	return undef;
+    }
+
+    my $pkgs = [ grep { $_->{Package} eq $pkg } @$versions ];
+    if (!defined $pkgs || $pkgs == 0) {
+	log_fail("unable to determine installed $pkg version.");
+	return undef;
+    } else {
+	return $pkgs->[0];
+    }
+};
+
+sub check_pve_packages {
+    print "CHECKING VERSION INFORMATION FOR PVE PACKAGES\n";
+
+    print "\nChecking for package updates..\n";
+    my $updates = eval { PVE::API2::APT->list_updates({ node => $nodename }); };
+    if (!defined($updates)) {
+	log_warn("$@") if $@;
+	log_fail("unable to retrieve list of package updates!");
+    } elsif (@$updates > 0) {
+	my $pkgs = join(', ', map { $_->{Package} } @$updates);
+	log_warn("updates for the following packages are available: $pkgs");
+    } else {
+	log_pass("all packages uptodate");
+    }
+
+    print "\nChecking proxmox-ve package version..\n";
+    if (defined(my $proxmox_ve = $get_pkg->('proxmox-ve'))) {
+	my $min_pve_ver = "$min_pve_rel-$min_pve_pkgrel";
+
+	if ($proxmox_ve->{OldVersion} =~ m/^$min_pve_rel-(\d+)/ && $1 >= $min_pve_pkgrel) {
+	    log_pass("proxmox-ve package has version >= $min_pve_ver");
+	} else {
+	    log_fail("proxmox-ve package is too old, please upgrade to >= $min_pve_ver!");
+	}
+    }
+}
+
+sub check_storage_health {
+    print "\nCHECKING CONFIGURED STORAGES\n\n";
+    my $cfg = PVE::Storage::config();
+
+    my $ctime = time();
+
+    my $info = PVE::Storage::storage_info($cfg);
+
+    foreach my $storeid (keys %$info) {
+	my $d = $info->{$storeid};
+	if ($d->{enabled}) {
+	    if ($d->{active}) {
+		log_pass("storage '$storeid' enabled and active.");
+	    } else {
+		log_warn("storage '$storeid' enabled but not active!");
+	    }
+	} else {
+	    log_skip("storage '$storeid' disabled.");
+	}
+    }
+}
+
+sub check_cluster_corosync {
+    print "\nCHECKING CLUSTER HEALTH/SETTINGS\n\n";
+
+    if (!PVE::Corosync::check_conf_exists(1)) {
+	log_skip("standalone node.");
+	return;
+    }
+
+    if (PVE::Cluster::check_cfs_quorum(1)) {
+	log_pass("Cluster is quorate.");
+    } else {
+	log_fail("Cluster lost quorum!");
+    }
+
+    my $conf = PVE::Cluster::cfs_read_file('corosync.conf');
+    my $conf_nodelist = PVE::Corosync::nodelist($conf);
+
+    if (!defined($conf_nodelist)) {
+	log_fail("unable to retrieve nodelist from corosync.conf");
+    } elsif (grep { $conf_nodelist->{$_}->{quorum_votes} != 1 } keys %$conf_nodelist) {
+	log_warn("non-default quorum_votes distribution detected!");
+    }
+
+    my $cfs_nodelist = PVE::Cluster::get_clinfo()->{nodelist};
+    my $offline_nodes = grep { $cfs_nodelist->{$_}->{online} != 1 } keys %$cfs_nodelist;
+    if ($offline_nodes > 0) {
+	log_fail("$offline_nodes nodes are offline!");
+    }
+
+    my $conf_nodelist_count = scalar(keys %$conf_nodelist);
+    my $cfs_nodelist_count = scalar(keys %$cfs_nodelist);
+    log_warn("cluster consists of less than three nodes!")
+	if $conf_nodelist_count < 3;
+
+    log_fail("corosync.conf ($conf_nodelist_count) and pmxcfs ($cfs_nodelist_count) don't agree about size of nodelist.")
+	if $conf_nodelist_count != $cfs_nodelist_count;
+
+    foreach my $cs_node (keys %$conf_nodelist) {
+	my $entry = $conf_nodelist->{$cs_node};
+	log_fail("No name entry for node '$cs_node' in corosync.conf.")
+	    if !defined($entry->{name});
+	log_fail("No nodeid configured for node '$cs_node' in corosync.conf.")
+	    if !defined($entry->{nodeid});
+
+	my $verify_ring_ip = sub {
+	    my $key = shift;
+	    my $ring = $entry->{$key};
+	    if (defined($ring) && !PVE::JSONSchema::pve_verify_ip($ring, 1)) {
+		log_fail("$key '$ring' of node '$cs_node' is not an IP address, consider replacing it with the currently resolved IP address.");
+	    }
+	};
+	$verify_ring_ip->('ring0_addr');
+	$verify_ring_ip->('ring1_addr');
+    }
+
+    my $totem = $conf->{main}->{totem};
+
+    my $transport = $totem->{transport};
+    if (defined($transport)) {
+	log_fail("Corosync transport expliclitly set to '$transport' instead of implicit default!");
+    }
+
+    if ((!defined($totem->{secauth}) || $totem->{secauth} ne 'on') && (!defined($totem->{crypto_cipher}) || $totem->{crypto_cipher} eq 'none')) {
+	log_fail("Corosync authentication/encryption is not explicitly enabled (secauth / crypto_cipher / crypto_hash)!");
+    }
+
+    if (defined($totem->{crypto_cipher}) && $totem->{crypto_cipher} eq '3des') {
+	log_fail("Corosync encryption cipher set to '3des', no longer supported in Corosync 3.x!");
+    }
+
+    my $prefix_info = sub { my $line = shift; log_info("$line"); };
+    eval {
+	print "\n";
+	log_info("Printing detailed cluster status..");
+	PVE::Tools::run_command(['corosync-quorumtool', '-siH'], outfunc => $prefix_info, errfunc => $prefix_info);
+    };
+
+    print "\nCHECKING INSTALLED COROSYNC VERSION\n\n";
+    if (defined(my $corosync = $get_pkg->('corosync'))) {
+	if ($corosync->{OldVersion} =~ m/^2\./) {
+	    log_fail("corosync 2.x installed, cluster-wide upgrade to 3.x needed!");
+	} elsif ($corosync->{OldVersion} =~ m/^3\./) {
+	    log_pass("corosync 3.x installed.");
+	} else {
+	    log_fail("unexpected corosync version installed: $corosync->{OldVersion}!");
+	}
+    }
+}
+
+sub check_ceph {
+    print "\nCHECKING HYPER-CONVERGED CEPH STATUS\n\n";
+
+    if (PVE::Ceph::Tools::check_ceph_inited(1)) {
+	log_info("hyper-converged ceph setup detected!");
+    } else {
+	log_skip("no hyper-converged ceph setup detected!");
+	return;
+    }
+
+    log_info("getting Ceph status/health information..");
+    my $ceph_status = eval { PVE::API2::Ceph->status({ node => $nodename }); };
+    my $osd_flags = eval { PVE::API2::Ceph->get_flags({ node => $nodename }); };
+    my $noout = $osd_flags =~ m/noout/;
+
+    if (!$ceph_status || !$ceph_status->{health}) {
+	log_fail("unable to determine Ceph status!");
+    } else {
+	my $ceph_health = $ceph_status->{health}->{status};
+	if (!$ceph_health) {
+	    log_fail("unable to determine Ceph health!");
+	} elsif ($ceph_health eq 'HEALTH_OK') {
+	    log_pass("Ceph health reported as 'HEALTH_OK'.");
+	} elsif ($ceph_health eq 'HEALTH_WARN' && $noout && (keys %{$ceph_status->{health}->{checks}} == 1)) {
+		log_pass("Ceph health reported as 'HEALTH_WARN' with a single failing check and 'noout' flag set.");
+	} else {
+		log_warn("Ceph health reported as '$ceph_health'");
+	}
+    }
+
+    log_info("getting Ceph OSD flags..");
+    eval {
+	if (!$osd_flags) {
+	    log_fail("unable to get Ceph OSD flags!");
+	} else {
+	    if ($osd_flags =~ m/recovery_deletes/ && $osd_flags =~ m/purged_snapdirs/) {
+		log_pass("all PGs have been scrubbed at least once while running Ceph Luminous.");
+	    } else {
+		log_fail("missing 'recovery_deletes' and/or 'purged_snapdirs' flag, scrub of all PGs required before upgrading to Nautilus!");
+	    }
+	    if ($noout) {
+		log_pass("noout flag set to prevent rebalancing during cluster-wide upgrades.");
+	    }  else {
+		log_warn("noout flag not set - recommended to prevent rebalancing during upgrades.");
+	    }
+	}
+    };
+
+    log_info("getting Ceph daemon versions..");
+    my $ceph_versions = eval { PVE::Ceph::Tools::get_cluster_versions(undef, 1); };
+    if (!$ceph_versions) {
+	log_fail("unable to determine Ceph daemon versions!");
+    } else {
+	my $services = [
+	    { 'key' => 'mon', 'name' => 'monitor' },
+	    { 'key' => 'mgr', 'name' => 'manager' },
+	    { 'key' => 'mds', 'name' => 'MDS' },
+	    { 'key' => 'osd', 'name' => 'OSD' },
+	];
+
+	foreach my $service (@$services) {
+	    my $name = $service->{name};
+	    if (my $service_versions = $ceph_versions->{$service->{key}}) {
+		if (keys %$service_versions == 0) {
+		    log_skip("no running instances detected for daemon type $name.");
+		} elsif (keys %$service_versions == 1) {
+		    log_pass("single running version detected for daemon type $name.");
+		} else {
+		    log_warn("multiple running versions detected for daemon type $name!");
+		}
+	    } else {
+		log_skip("unable to determine versions of running Ceph $name instances.");
+	    }
+	}
+
+	my $overall_versions = $ceph_versions->{overall};
+	if (!$overall_versions) {
+	    log_warn("unable to determine overall Ceph daemon versions!");
+	} elsif (keys %$overall_versions == 1) {
+	    log_pass("single running overall version detected for all Ceph daemon types.");
+	} else {
+	    log_warn("overall version mismatch detected, check 'ceph versions' output for details!");
+	}
+    }
+}
+
+sub check_misc {
+    print "\nMISCELLANEOUS CHECKS\n\n";
+    my $ssh_config = eval { PVE::Tools::file_get_contents('/root/.ssh/config') };
+    log_fail("Unsupported SSH Cipher configured for root in /root/.ssh/config: $1")
+	if $ssh_config =~ /^Ciphers .*(blowfish|arcfour|3des).*$/m;
+
+    my $root_free = PVE::Tools::df('/', 10);
+    log_warn("Less than 2G free space on root file system.")
+	if defined($root_free) && $root_free->{avail} < 2*1024*1024*1024;
+
+    my $running_guests = 0;
+    my $vms = eval { PVE::API2::Qemu->vmlist({ node => $nodename }) };
+    log_warn("Failed to retrieve information about this node's VMs - $@") if $@;
+    $running_guests += grep { $_->{status} eq 'running' } @$vms
+	if defined($vms);
+    my $cts = eval { PVE::API2::LXC->vmlist({ node => $nodename }) };
+    log_warn("Failed to retrieve information about this node's CTs - $@") if $@;
+    $running_guests += grep { $_->{status} eq 'running' } @$cts
+	if defined($cts);
+    log_warn("$running_guests running guests detected - consider migrating/stopping them.")
+	if $running_guests > 0;
+}
+
+__PACKAGE__->register_method ({
+    name => 'checklist',
+    path => 'checklist',
+    method => 'GET',
+    description => 'Check (pre-/post-)upgrade conditions.',
+    parameters => {
+	additionalProperties => 0,
+	properties => {
+	},
+    },
+    returns => { type => 'null' },
+    code => sub {
+	my ($param) = @_;
+
+	check_pve_packages();
+	check_cluster_corosync();
+	check_ceph();
+	check_storage_health();
+	check_misc();
+
+	print "\n\nSUMMARY:\n";
+	print "PASSED: $counters->{pass}\n";
+	print "SKIPPED: $counters->{skip}\n";
+	print "WARNINGS: $counters->{warn}\n";
+	print "FAILURES: $counters->{fail}\n";
+
+	print "\nATTENTION: Please check the output for detailed information!\n"
+	    if ($counters->{warn} > 0 || $counters->{fail} > 0);
+
+	return undef;
+    }});
+
+our $cmddef = {
+    checklist => [ __PACKAGE__, 'checklist', [], {}],
+};
+
+1;
diff --git a/bin/pve5to6 b/bin/pve5to6
new file mode 100755
index 00000000..4802e185
--- /dev/null
+++ b/bin/pve5to6
@@ -0,0 +1,10 @@
+#!/usr/bin/perl -T
+
+use strict;
+use warnings;
+
+use lib qw(.);
+
+use PVE::CLI::pve5to6;
+
+PVE::CLI::pve5to6->run_cli_handler();
-- 
2.20.1





More information about the pve-devel mailing list