[pve-devel] [PATCH access-control v2 2/2] add realmsync plugin for jobs and CRUD api for domainsync-jobs

Thomas Lamprecht t.lamprecht at proxmox.com
Tue Dec 13 14:43:16 CET 2022


Some mostly API and slightly higher level or cosmetical comments, but also some issue with
how you use the cfs domain locks at the end.

Also, a patch-wide s/realmsync/realm-sync/ would be great.

Am 06/12/2022 um 12:06 schrieb Dominik Csapak:
> to be able to define automated jobs that sync ldap/ad
> 
> The jobs plugin contains special handling when no node is given, since
> we only want it to run on a single node when that triggers. For that,
> we save a statefile in /etc/pve/priv/jobs/ which contains the
> node/time/upid of the node that runs the job. The first node that
> is able to lock the realm (via cfs_lock_domain) "wins" and may
> sync from the ldap.
> 
> in case a specific node was selected, this is omitted and the Jobs
> handling will not let it run on other nodes anyway
> 
> the API part is our usual sectionconfig CRUD api, but specialized
> for the specific type of job.
> 
> Signed-off-by: Dominik Csapak <d.csapak at proxmox.com>
> ---
>  src/PVE/API2/AccessControl.pm               |   6 +
>  src/PVE/API2/AccessControl/Job.pm           |  47 +++
>  src/PVE/API2/AccessControl/Job/Makefile     |   6 +
>  src/PVE/API2/AccessControl/Job/RealmSync.pm | 324 ++++++++++++++++++++
>  src/PVE/API2/AccessControl/Makefile         |   9 +
>  src/PVE/API2/Makefile                       |   4 +
>  src/PVE/Jobs/Makefile                       |   6 +
>  src/PVE/Jobs/RealmSync.pm                   | 193 ++++++++++++
>  src/PVE/Makefile                            |   1 +
>  9 files changed, 596 insertions(+)
>  create mode 100644 src/PVE/API2/AccessControl/Job.pm
>  create mode 100644 src/PVE/API2/AccessControl/Job/Makefile
>  create mode 100644 src/PVE/API2/AccessControl/Job/RealmSync.pm
>  create mode 100644 src/PVE/API2/AccessControl/Makefile
>  create mode 100644 src/PVE/Jobs/Makefile
>  create mode 100644 src/PVE/Jobs/RealmSync.pm
> 
> diff --git a/src/PVE/API2/AccessControl.pm b/src/PVE/API2/AccessControl.pm
> index 104e7e3..221ecb2 100644
> --- a/src/PVE/API2/AccessControl.pm
> +++ b/src/PVE/API2/AccessControl.pm
> @@ -14,6 +14,7 @@ use PVE::DataCenterConfig;
>  use PVE::RESTHandler;
>  use PVE::AccessControl;
>  use PVE::JSONSchema qw(get_standard_option);
> +use PVE::API2::AccessControl::Job;
>  use PVE::API2::Domains;
>  use PVE::API2::User;
>  use PVE::API2::Group;
> @@ -57,6 +58,11 @@ __PACKAGE__->register_method ({
>      path => 'domains',
>  });
>  
> +__PACKAGE__->register_method ({
> +    subclass => "PVE::API2::AccessControl::Job",
> +    path => 'jobs',
> +});
> +
>  __PACKAGE__->register_method ({
>      subclass => "PVE::API2::OpenId",
>      path => 'openid',
> diff --git a/src/PVE/API2/AccessControl/Job.pm b/src/PVE/API2/AccessControl/Job.pm
> new file mode 100644
> index 0000000..cf2d0a9
> --- /dev/null
> +++ b/src/PVE/API2/AccessControl/Job.pm
> @@ -0,0 +1,47 @@
> +package PVE::API2::AccessControl::Job;
> +
> +use strict;
> +use warnings;
> +
> +use PVE::RESTHandler;
> +
> +use PVE::API2::AccessControl::Job::RealmSync;
> +
> +use base qw(PVE::RESTHandler);
> +
> +__PACKAGE__->register_method ({
> +    subclass => "PVE::API2::AccessControl::Job::RealmSync",
> +    path => 'realm-sync',
> +});
> +
> +__PACKAGE__->register_method ({
> +    name => 'index',
> +    path => '',
> +    method => 'GET',
> +    description => "Directory index.",
> +    permissions => {
> +	user => 'all',
> +    },
> +    parameters => {
> +	additionalProperties => 0,
> +	properties => {},
> +    },
> +    returns => {
> +	type => 'array',
> +	items => {
> +	    type => "object",
> +	    properties => {
> +		subdir => { type => 'string' },
> +	    },
> +	},
> +	links => [ { rel => 'child', href => "{subdir}" } ],
> +    },
> +    code => sub {
> +	my ($param) = @_;
> +
> +	return {
> +	    subdir => 'realm-sync',
> +	};

as from the return type schema this should be an array:

return [
    { subdir => 'realm-sync' },
];

> +    }});
> +
> +1;
> diff --git a/src/PVE/API2/AccessControl/Job/Makefile b/src/PVE/API2/AccessControl/Job/Makefile
> new file mode 100644
> index 0000000..5feae4b
> --- /dev/null
> +++ b/src/PVE/API2/AccessControl/Job/Makefile
> @@ -0,0 +1,6 @@
> +API2_SOURCES= 		\
> +	RealmSync.pm	\
> +
> +.PHONY: install
> +install:
> +	for i in ${API2_SOURCES}; do install -D -m 0644 $$i ${DESTDIR}${PERLDIR}/PVE/API2/AccessControl/Job/$$i; done
> diff --git a/src/PVE/API2/AccessControl/Job/RealmSync.pm b/src/PVE/API2/AccessControl/Job/RealmSync.pm
> new file mode 100644
> index 0000000..f14bb2b
> --- /dev/null
> +++ b/src/PVE/API2/AccessControl/Job/RealmSync.pm
> @@ -0,0 +1,324 @@
> +package PVE::API2::AccessControl::Job::RealmSync;
> +
> +use strict;
> +use warnings;
> +
> +use JSON;
> +
> +use PVE::Cluster qw (cfs_read_file cfs_write_file cfs_lock_file);
> +use PVE::Exception qw(raise_param_exc);
> +use PVE::JSONSchema qw(get_standard_option);
> +use PVE::Job::Registry;
> +use PVE::RESTHandler;
> +use PVE::SafeSyslog;
> +use PVE::Tools qw(extract_param);
> +
> +use PVE::AccessControl;
> +use PVE::Auth::Plugin;
> +use PVE::Jobs::RealmSync;
> +
> +
> +use base qw(PVE::RESTHandler);
> +
> +my $get_cluster_last_run = sub {
> +    my ($jobid) = @_;
> +
> +    my $fn = "/etc/pve/priv/jobs/realmsync-$jobid.json";
> +    my $raw = PVE::Tools::file_get_contents($fn);
> +    my $state = eval { decode_json($raw) };
> +    die "json decode error on $fn: $@\n" if $@;
> +
> +    if (my $upid = $state->{upid}) {
> +	if (my $decoded = PVE::Tools::upid_decode($upid)) {
> +	    return $decoded->{starttime};
> +	}
> +    } else {
> +	return $state->{time};
> +    }
> +
> +    return undef;
> +};
> +
> +__PACKAGE__->register_method ({
> +    name => 'syncjob_index',
> +    path => '',
> +    method => 'GET',
> +    description => "List configured realm-sync-jobs.",
> +    permissions => {
> +	check => ['perm', '/', ['Sys.Audit']],
> +    },
> +    parameters => {
> +	additionalProperties => 0,
> +	properties => {},
> +    },
> +    returns => {
> +	type => 'array',
> +	items => {
> +	    type => "object",
> +	    properties => {
> +		id => {
> +		    description => "The ID of the entry.",
> +		    type => 'string'
> +		},
> +		enabled => {
> +		    description => "If the job is enalbed or not.",

typo: enabled

> +		    type => 'boolean',
> +		},
> +		comment => {
> +		    description => "A comment for the job.",
> +		    type => 'string',
> +		    optional => 1,
> +		},
> +		schedule => {
> +		    description => "The configured sync schedule.",
> +		    type => 'string',
> +		    optional => 1,

do we want to allow a empty schedule if there's a enable boolean switch anyway?

Or is this for making the "only used to run manually" use case slightly less confusing
to configure (as enabled=false + bogus schedule might be weird for some)?

> +		},
> +		realm => {
> +		    description => "The realm to sync.",
> +		    type => 'string',
> +		    optional => 1,

is this really optional?

> +		},
> +		node => {
> +		    description => "The node to run the job on, if any.",

what if this is empty? might want to communicate that over the description, either with
extra sentence or something like "The node the job is limited too run on"

Albeit it's IMO a bit of a odd thing in general, as especially realm syncs will always
have effects on the whole cluster, so why expose that and not just auto select one
(e.h., jitter sleep + node lock + check if recently run); we can always add such things
if there's an actual need for it.

> +		    type => 'string',
> +		    optional => 1,
> +		},
> +		scope => {
> +		    description => "The Selection of what to sync.",

might want to define the enum here? or better said, should this be a (registered) format
we can reuse here?

> +		    type => 'string',
> +		    optional => 1,
> +		},
> +		'remove-vanished' => {
> +		    description => "A list of things to remove when the entity vanished during a sync.",

same here

> +		    type => 'string',
> +		    optional => 1,
> +		},
> +		'last-run' => {
> +		    description => "UNIX epoch when the job last ran.",

"Last execution time of the job in seconds since the beginning of the UNIX epoch."

> +		    type => 'integer',
> +		    optional => 1,
> +		},
> +		'next-run' => {
> +		    description => "UNIX epoch when the job is planned to run next.",

"Next planned execution time of the job in seconds since the beginning of the UNIX epoch."

> +		    type => 'integer',
> +		    optional => 1,
> +		},
> +	    },
> +	},
> +	links => [ { rel => 'child', href => "{id}" } ],
> +    },
> +    code => sub {
> +	my ($param) = @_;
> +
> +	my $rpcenv = PVE::RPCEnvironment::get();
> +	my $user = $rpcenv->get_user();
> +
> +	my $jobs_data = cfs_read_file('jobs.cfg');
> +	my $order = $jobs_data->{order};
> +	my $jobs = $jobs_data->{ids};
> +
> +	my $res = [];
> +	for my $jobid (sort { $order->{$a} <=> $order->{$b} } keys %$jobs) {
> +	    my $job = $jobs->{$jobid};
> +	    next if $job->{type} ne 'realmsync';

Should the "type" be "realm-sync" for consistency?

> +
> +	    $job->{id} = $jobid;
> +	    if (my $schedule = $job->{schedule}) {
> +		$job->{'last-run'} = eval { $get_cluster_last_run->($jobid) };
> +		my $last_run = $job->{'last-run'} // time(); # current time as fallback
> +		my $calendar_event = Proxmox::RS::CalendarEvent->new($schedule);
> +		my $next_run = $calendar_event->compute_next_event($last_run);
> +		$job->{'next-run'} = $next_run if defined($next_run);
> +	    }
> +
> +	    push @$res, $job;
> +	}
> +
> +	return $res;
> +    }});
> +
> +__PACKAGE__->register_method({
> +    name => 'read_job',
> +    path => '{id}',
> +    method => 'GET',
> +    description => "Read realm-sync job definition.",
> +    permissions => {
> +	check => ['perm', '/', ['Sys.Audit']],
> +    },
> +    parameters => {
> +	additionalProperties => 0,
> +	properties => {
> +	    id => {
> +		type => 'string',
> +		format => 'pve-configid',
> +	    },
> +	},
> +    },
> +    returns => {
> +	type => 'object',
> +    },
> +    code => sub {
> +	my ($param) = @_;
> +
> +	my $jobs = cfs_read_file('jobs.cfg');
> +	my $id = $param->{id};
> +	my $job = $jobs->{ids}->{$id};
> +	return $job if $job && $job->{type} eq 'realmsync';
> +
> +	raise_param_exc({ id => "No such job '$id'" });
> +
> +    }});
> +
> +__PACKAGE__->register_method({
> +    name => 'create_job',
> +    path => '{id}',
> +    method => 'POST',
> +    protected => 1,
> +    description => "Create new realm-sync job.",
> +    permissions => {
> +	description => "'Realm.AllocateUser' on '/access/realm/<realm>' and "
> +	    ."'User.Modify' permissions to '/access/groups/'.",
> +	check => [ 'and',
> +	    ['perm', '/access/realm/{realm}', ['Realm.AllocateUser']],
> +	    ['perm', '/access/groups', ['User.Modify']],
> +	],
> +    },
> +    parameters => PVE::Jobs::RealmSync->createSchema(),
> +    returns => { type => 'null' },
> +    code => sub {
> +	my ($param) = @_;
> +
> +	my $id = extract_param($param, 'id');
> +
> +	cfs_lock_file('jobs.cfg', undef, sub {
> +	    my $data = cfs_read_file('jobs.cfg');
> +
> +	    die "Job '$id' already exists\n"
> +		if $data->{ids}->{$id};
> +
> +	    my $plugin = PVE::Job::Registry->lookup('realmsync');
> +	    my $opts = $plugin->check_config($id, $param, 1, 1);
> +
> +	    my $realm = $opts->{realm};
> +	    my $cfg = cfs_read_file('domains.cfg');
> +
> +	    raise_param_exc({ realm => "No such realm '$realm'" })
> +		if !defined($cfg->{ids}->{$realm});
> +
> +	    my $realm_type = $cfg->{ids}->{$realm}->{type};
> +	    raise_param_exc({ realm => "Only LDAP/AD realms can be synced." })
> +		if $realm_type ne 'ldap' && $realm_type ne 'ad';
> +
> +	    $data->{ids}->{$id} = $opts;
> +
> +	    PVE::Jobs::create_job($id, 'realmsync');
> +
> +	    cfs_write_file('jobs.cfg', $data);
> +	});
> +	die "$@" if ($@);
> +
> +	return undef;
> +    }});
> +
> +__PACKAGE__->register_method({
> +    name => 'update_job',
> +    path => '{id}',
> +    method => 'PUT',
> +    protected => 1,
> +    description => "Update realm-sync job definition.",
> +    permissions => {
> +	description => "'Realm.AllocateUser' on '/access/realm/<realm>' and "
> +	    ." 'User.Modify' permissions to '/access/groups/'.",
> +	check => [ 'and',
> +	    ['perm', '/access/realm/{realm}', ['Realm.AllocateUser']],
> +	    ['perm', '/access/groups', ['User.Modify']],
> +	],
> +    },
> +    parameters => PVE::Jobs::RealmSync->updateSchema(),
> +    returns => { type => 'null' },
> +    code => sub {
> +	my ($param) = @_;
> +
> +	my $id = extract_param($param, 'id');
> +	my $delete = extract_param($param, 'delete');
> +	if ($delete) {
> +	    $delete = [PVE::Tools::split_list($delete)];
> +	}
> +
> +	cfs_lock_file('jobs.cfg', undef, sub {
> +	    my $jobs = cfs_read_file('jobs.cfg');
> +
> +	    die "no options specified\n" if !scalar(keys %$param);
> +
> +	    my $plugin = PVE::Job::Registry->lookup('realmsync');
> +	    my $opts = $plugin->check_config($id, $param, 0, 1);
> +
> +	    my $job = $jobs->{ids}->{$id};
> +	    die "no such realmsync job\n" if !$job || $job->{type} ne 'realmsync';
> +
> +	    my $options = $plugin->options();
> +	    for my $k (@$delete) {
> +		my $d = $options->{$k} || die "no such option '$k'\n";
> +		die "unable to delete required option '$k'\n" if !$d->{optional};
> +		die "unable to delete fixed option '$k'\n" if $d->{fixed};
> +		die "cannot set and delete property '$k' at the same time!\n"
> +			if defined($opts->{$k});
> +		delete $job->{$k};
> +	    }

this seems something that we have a few times already in some form, might want to add this
as helper somehere, possibly to section config.

> +
> +	    $job->{$_} = $param->{$_} for keys $param->%*;
> +
> +	    cfs_write_file('jobs.cfg', $jobs);
> +
> +	    PVE::Jobs::detect_changed_runtime_props($id, 'realmsync', $job);
> +
> +	    return;
> +	});
> +	die "$@" if ($@);
> +    }});
> +
> +
> +__PACKAGE__->register_method({
> +    name => 'delete_job',
> +    path => '{id}',
> +    method => 'DELETE',
> +    description => "Delete realm-sync job definition.",
> +    permissions => {
> +	check => ['perm', '/', ['Sys.Modify']],
> +    },
> +    protected => 1,
> +    parameters => {
> +	additionalProperties => 0,
> +	properties => {
> +	    id => {
> +		type => 'string',
> +		format => 'pve-configid',
> +	    },
> +	},
> +    },
> +    returns => { type => 'null' },
> +    code => sub {
> +	my ($param) = @_;
> +
> +	my $id = $param->{id};
> +
> +	cfs_lock_file('jobs.cfg', undef, sub {
> +	    my $jobs = cfs_read_file('jobs.cfg');
> +
> +	    if (!defined($jobs->{ids}->{$id}) || $jobs->{ids}->{$id}->{type} ne 'realmsync') {
> +		raise_param_exc({ id => "No such job '$id'" });
> +	    }
> +	    delete $jobs->{ids}->{$id};
> +
> +	    PVE::Jobs::remove_job($id, 'realmsync');> +
> +	    cfs_write_file('jobs.cfg', $jobs);
> +	    unlink "/etc/pve/priv/jobs/realmsync-$id.json";

should be contained in a sub, either save_state($id, undef) or explicit delete_state one (names
are just examples)

> +	});
> +	die "$@" if $@;
> +
> +	return undef;
> +    }});
> +
> +1;
> diff --git a/src/PVE/API2/AccessControl/Makefile b/src/PVE/API2/AccessControl/Makefile
> new file mode 100644
> index 0000000..8d3fb58
> --- /dev/null
> +++ b/src/PVE/API2/AccessControl/Makefile
> @@ -0,0 +1,9 @@
> +API2_SOURCES= 	\
> +	Job.pm	\
> +
> +SUBDIRS=Job
> +
> +.PHONY: install
> +install:
> +	for i in ${API2_SOURCES}; do install -D -m 0644 $$i ${DESTDIR}${PERLDIR}/PVE/API2/AccessControl/$$i; done
> +	set -e && for i in ${SUBDIRS}; do ${MAKE} -C $$i $@; done
> diff --git a/src/PVE/API2/Makefile b/src/PVE/API2/Makefile
> index 2817f48..7d2be93 100644
> --- a/src/PVE/API2/Makefile
> +++ b/src/PVE/API2/Makefile
> @@ -2,6 +2,7 @@
>  API2_SOURCES= 		 	\
>  	AccessControl.pm 	\
>  	Domains.pm	 	\
> +	RealmSync.pm	 	\
>  	ACL.pm		 	\
>  	Role.pm		 	\
>  	Group.pm	 	\
> @@ -9,6 +10,9 @@ API2_SOURCES= 		 	\
>  	TFA.pm			\
>  	OpenId.pm
>  
> +SUBDIRS=AccessControl
> +
>  .PHONY: install
>  install:
>  	for i in ${API2_SOURCES}; do install -D -m 0644 $$i ${DESTDIR}${PERLDIR}/PVE/API2/$$i; done
> +	set -e && for i in ${SUBDIRS}; do ${MAKE} -C $$i $@; done
> diff --git a/src/PVE/Jobs/Makefile b/src/PVE/Jobs/Makefile
> new file mode 100644
> index 0000000..9eed1b2
> --- /dev/null
> +++ b/src/PVE/Jobs/Makefile
> @@ -0,0 +1,6 @@
> +SOURCES=RealmSync.pm
> +
> +.PHONY: install
> +install: ${SOURCES}
> +	install -d -m 0755 ${DESTDIR}${PERLDIR}/PVE/Jobs
> +	for i in ${SOURCES}; do install -D -m 0644 $$i ${DESTDIR}${PERLDIR}/PVE/Jobs/$$i; done
> diff --git a/src/PVE/Jobs/RealmSync.pm b/src/PVE/Jobs/RealmSync.pm
> new file mode 100644
> index 0000000..e8a86e3
> --- /dev/null
> +++ b/src/PVE/Jobs/RealmSync.pm
> @@ -0,0 +1,193 @@
> +package PVE::Jobs::RealmSync;
> +
> +use strict;
> +use warnings;
> +
> +use JSON;
> +
> +use PVE::JSONSchema qw(get_standard_option);
> +use PVE::Cluster;
> +use PVE::CalendarEvent;
> +use PVE::Tools;
> +
> +use PVE::API2::Domains;
> +
> +use base qw(PVE::Job::Registry);
> +
> +sub type {
> +    return 'realmsync';
> +}
> +
> +my $props = get_standard_option('realm-sync-options', {
> +    realm => get_standard_option('realm'),
> +});
> +
> +sub properties {
> +    return $props;
> +}
> +
> +sub options {
> +    my $options = {
> +	enabled => { optional => 1 },
> +	schedule => {},
> +	comment => { optional => 1 },
> +	node => { optional => 1 },

might want to drop that for now (as commented above in the api return schema) 

> +	scope => {},
> +    };
> +    for my $opt (keys %$props) {
> +	next if defined($options->{$opt});
> +	if ($props->{$opt}->{optional}) {
> +	    $options->{$opt} = { optional => 1 };
> +	} else {
> +	    $options->{$opt} = {};
> +	}
> +    }
> +    $options->{realm}->{fixed} = 1;
> +
> +    return $options;
> +}
> +
> +sub decode_value {
> +    my ($class, $type, $key, $value) = @_;
> +    return $value;
> +}
> +
> +sub encode_value {
> +    my ($class, $type, $key, $value) = @_;
> +    return $value;
> +}
> +
> +sub createSchema {
> +    my ($class, $skip_type) = @_;

add extra newline here

> +    my $schema = $class->SUPER::createSchema($skip_type);
> +
> +    my $opts = $class->options();
> +    for my $opt (keys $schema->{properties}->%*) {
> +	next if defined($opts->{$opt}) || $opt eq 'id';
> +	delete $schema->{properties}->{$opt};
> +    }
> +
> +    # remove legacy props

bit confused, we never had sync jobs, so why should it matter?

> +    delete $schema->{properties}->{full};
> +    delete $schema->{properties}->{purge};
> +
> +    return $schema;
> +}
> +
> +sub updateSchema {
> +    my ($class, $skip_type) = @_;
> +    my $schema = $class->SUPER::updateSchema($skip_type);
> +
> +    my $opts = $class->options();
> +    for my $opt (keys $schema->{properties}->%*) {
> +	next if defined($opts->{$opt});
> +	next if $opt eq 'id' || $opt eq 'delete';
> +	delete $schema->{properties}->{$opt};
> +    }
> +
> +    # remove legacy props

same here

> +    delete $schema->{properties}->{full};
> +    delete $schema->{properties}->{purge};
> +
> +    return $schema;
> +}
> +
> +sub run {
> +    my ($class, $conf, $id, $schedule) = @_;
> +
> +    my $node = $conf->{node};
> +    for my $opt (keys %$conf) {
> +	delete $conf->{$opt} if !defined($props->{$opt});
> +    }
> +
> +    my $realm = $conf->{realm};
> +    my $statedir = "/etc/pve/priv/jobs";
> +    my $statefile = "$statedir/realmsync-$id.json";

s/realmsync/realm-sync/

> +
> +    if (!defined($node)) {
> +	# cluster synced
> +	my $now = time();
> +	my $nodename = PVE::INotify::nodename();
> +
> +	# check statefile in pmxcfs if we should start
> +	my $shouldrun = PVE::Cluster::cfs_lock_domain($realm, undef, sub {

erm, using just the $realm as lock ID can go quite bad, e.g., if a realm is named
"ha" it would interfere with the HA domain locks. Can we prefix this, or just use
the general, non-realm specific "realm-sync"? Not much downside in practice and
adding things like groups might be better, even if just for auditability, if not done
in parallel anyway?

In anyway, a domain lock must never be (just) an $id.


> +	    mkdir $statedir;
> +	    my $raw = eval { PVE::Tools::file_get_contents($statefile) } // '';


a non-node specific kv-store might be really nice, would avoid some DB/disk write
churn (note that this specific one is adding that much extra churn, but in sum
we start to have now quite a few places where it would help, but can me  moved to
such a thing later on too)

> +	    my $members = PVE::Cluster::get_members();
> +
> +	    my $state = ($raw =~ m/^(\{.*\})$/) ? decode_json($1) : {};
> +	    my $lastnode = $state->{node} // $nodename;
> +	    my $lastupid = $state->{upid};
> +	    my $lasttime = $state->{time};

nit: please stay consistent with variable name style (snake_case vs noneatall - prefer the
former, at least if not for things where we used nostyle since pretty much always, e.g.,
nodename, albeit just $node is often fitting for that anyway)

> +
> +	    my $node_online = 0;

should this be $last_node_online?

> +	    if ($lastnode ne $nodename) {
> +		if (defined(my $member = $members->{$lastnode})) {
> +		    $node_online = $member->{online};
> +		}
> +	    } else {
> +		$node_online = 1;
> +	    }

could be way shorter (untested though):
 
my $node_online = $last_node eq $nodename || ($members->{$last_node} // {})->{online};

> +
> +	    if (defined($lastupid)) {
> +		# first check if the next run is scheduled
> +		if (my $parsed = PVE::Tools::upid_decode($lastupid, 1)) {
> +		    my $calspec = PVE::CalendarEvent::parse_calendar_event($schedule);

same here w.r.t. variable naming style

> +		    my $next_sync = PVE::CalendarEvent::compute_next_event($calspec, $parsed->{starttime});
> +		    return 0 if !defined($next_sync) || $now < $next_sync; # not yet its (next) turn
> +		}
> +		# check if still running and node is online
> +		my $tasks = PVE::Cluster::get_tasklist();
> +		for my $task (@$tasks) {
> +		    next if $task->{upid} ne $lastupid;
> +		    last if defined($task->{endtime}); # it's already finished
> +		    last if !$node_online; # it's not finished but the node is offline

s/but/and/ ?

> +		    return 0; # not finished and online
> +		}
> +	    } elsif (defined($lasttime) && ($lasttime+60) > $now && $node_online) {
> +		# another node started in the last 60 seconds and is online

"another node started this job in the last 60 seconds and is still online"

> +		return 0;
> +	    }
> +
> +	    # any of the following conditions should be true here:
> +	    # * it was started on another node but that node is offline now
> +	    # * it was started but either too long ago, or with an error
> +	    # * the started task finished
> +
> +	    PVE::Tools::file_set_contents($statefile, encode_json({
> +		node => $nodename,
> +		time => $now,
> +	    }));

I would slightly prefer to move this into a (just an example name) save_state method,
even if really small it's IMO better to contain it there, same for getting the state above.

> +	    return 1;
> +	});
> +	die $@ if $@;
> +
> +	if ($shouldrun) {
> +	    my $upid = eval { PVE::API2::Domains->sync($conf) };
> +	    my $err = $@;
> +	    PVE::Cluster::cfs_lock_domain($realm, undef, sub {

same here w.r.t. wrongly used domain locking

> +		if ($err && !$upid) {
> +		    PVE::Tools::file_set_contents($statefile, encode_json({
> +			node => $nodename,
> +			time => $now,
> +			error => $err,
> +		    }));
> +		    die "$err\n";
> +		}
> +
> +		PVE::Tools::file_set_contents($statefile, encode_json({
> +		    node => $nodename,
> +		    upid => $upid,
> +		}));

same here with state saving

> +	    });
> +	    die $@ if $@;
> +	    return $upid;
> +	}
> +
> +	return "OK"; # all other cases should not run the sync on this node

possibly stupid question, but why return a string here?

> +    }
> +
> +    return PVE::API2::Domains->sync($conf);
> +}
> +
> +1;
> diff --git a/src/PVE/Makefile b/src/PVE/Makefile
> index c839d8f..dfc5314 100644
> --- a/src/PVE/Makefile
> +++ b/src/PVE/Makefile
> @@ -8,3 +8,4 @@ install:
>  	install -D -m 0644 TokenConfig.pm ${DESTDIR}${PERLDIR}/PVE/TokenConfig.pm
>  	make -C API2 install
>  	make -C CLI install
> +	make -C Jobs install






More information about the pve-devel mailing list