[pve-devel] [PATCH manager 4/7] add PVE/Jobs to handle VZDump jobs
Dominik Csapak
d.csapak at proxmox.com
Tue Nov 2 15:33:40 CET 2021
On 11/2/21 14:52, Fabian Ebner wrote:
[snip]
>> +
>> +sub lock_job_state {
>> + my ($jobid, $type, $sub) = @_;
>> +
>> + my $filename = "$lock_dir/$type-$jobid.lck";
>
> Should new locks use .lock?
yes, thanks, most of such things are copied...
>
>> +
>> + my $res = PVE::Tools::lock_file($filename, 10, $sub);
>> + die $@ if $@;
>> +
>> + return $res;
>> +}
>> +
>> +# returns the state and checks if the job was 'started' and is now
>> finished
>> +sub get_job_state {
>
> It feels a bit weird that this function does two things. Maybe it's
> worth separating the "check if finished" part?
yeah probably, but i'd still want to do them under a single lock
so get_and_check_job_state would be better?
>
>> + my ($jobid, $type) = @_;
>> +
>> + # first check unlocked to save time,
>> + my $state = read_job_state($jobid, $type);
>> + if ($state->{state} ne 'started') {
>> + return $state; # nothing to check
>> + }
>> +
>> + return lock_job_state($jobid, $type, sub {
>> + my $state = read_job_state($jobid, $type);
>> +
>> + if ($state->{state} ne 'started') {
>> + return $state; # nothing to check
>> + }
>> +
>> + my ($task, $filename) = PVE::Tools::upid_decode($state->{upid}, 1);
>> + die "unable to parse worker upid\n" if !$task;
>> + die "no such task\n" if ! -f $filename;
>> +
>> + my $pstart = PVE::ProcFSTools::read_proc_starttime($task->{pid});
>> + if ($pstart && $pstart == $task->{pstart}) {
>> + return $state; # still running
>> + }
>> +
>> + my $new_state = {
>> + state => 'stopped',
>> + msg => PVE::Tools::upid_read_status($state->{upid}),
>> + upid => $state->{upid}
>> + };
>> +
>> + my $path = $get_state_file->($jobid, $type);
>> + PVE::Tools::file_set_contents($path, encode_json($new_state));
>> + return $new_state;
>> + });
>> +}
>> +
>> +# must be called when the job is first created
>> +sub create_job {
>> + my ($jobid, $type) = @_;
>> +
>> + lock_job_state($jobid, $type, sub {
>> + my $state = read_job_state($jobid, $type);
>> +
>> + if ($state->{state} ne 'created') {
>> + die "job state already exists\n";
>> + }
>> +
>> + $state->{time} = time();
>> +
>> + my $path = $get_state_file->($jobid, $type);
>> + PVE::Tools::file_set_contents($path, encode_json($state));
>> + });
>> +}
>> +
>> +# to be called when the job is removed
>> +sub remove_job {
>> + my ($jobid, $type) = @_;
>> + my $path = $get_state_file->($jobid, $type);
>> + unlink $path;
>> +}
>
> From what I can tell, this can be called while the job might be active
> and then suddenly the state file is gone. Is that a problem?
you're right, the finished job would be writing to the file again
the solution would imho be to check if the statefile still exists
on finishing. if not, do not write (because the jobs does not exist anymore)
or we could forbid removing a job while its still running..
>
>> +
>> +# will be called when the job was started according to schedule
>> +sub started_job {
>> + my ($jobid, $type, $upid) = @_;
>> + lock_job_state($jobid, $type, sub {
>> + my $state = {
>> + state => 'started',
>> + upid => $upid,
>> + };
>> +
>> + my $path = $get_state_file->($jobid, $type);
>> + PVE::Tools::file_set_contents($path, encode_json($state));
>> + });
>> +}
>> +
>> +# will be called when the job schedule is updated
>> +sub updated_job_schedule {
>> + my ($jobid, $type) = @_;
>> + lock_job_state($jobid, $type, sub {
>> + my $old_state = read_job_state($jobid, $type);
>> +
>> + if ($old_state->{state} eq 'started') {
>> + return; # do not update timestamp on running jobs
>> + }
>> +
>> + $old_state->{updated} = time();
>> +
>> + my $path = $get_state_file->($jobid, $type);
>> + PVE::Tools::file_set_contents($path, encode_json($old_state));
>> + });
>> +}
>> +
>> +sub get_last_runtime {
>> + my ($jobid, $type) = @_;
>> +
>> + my $state = read_job_state($jobid, $type);
>> +
>> + if (defined($state->{updated})) {
>> + return $state->{updated};
>> + }
>
> I'm confused. Why does 'updated' overwrite the last runtime? Should it
> return 0 like for a new job? Is there a problem returning the normal
> last runtime for an updated job?
for the next run we have to consider the updated time
(new jobs get the current time, not 0)
because if i change from e.g. 10:00 to 8:00 and it is
currently 9:00, it would be unexpected that the
job starts instantly, instead of the next time its 10:00
(we do the same in pbs for all kinds of jobs)
>
>> +
>> + if (my $upid = $state->{upid}) {
>> + my ($task) = PVE::Tools::upid_decode($upid, 1);
>> + die "unable to parse worker upid\n" if !$task;
>> + return $task->{starttime};
>> + }
>> +
>> + return $state->{time} // 0;
>> +}
>> +
>> +sub run_jobs {
>> + my $jobs_cfg = cfs_read_file('jobs.cfg');
>> + my $nodename = PVE::INotify::nodename();
>> +
>> + foreach my $id (sort keys %{$jobs_cfg->{ids}}) {
>> + my $cfg = $jobs_cfg->{ids}->{$id};
>> + my $type = $cfg->{type};
>> + my $schedule = delete $cfg->{schedule};
>
> Why delete?
good question, it's maybe leftover of some intermittent code
change. i'll check and change/comment on it in a v2
it's probably because the vzdump code does not understand the
'schedule' parameter and it's more of a 'meta' information
>
>> +
>> + # only schedule local jobs
>> + next if defined($cfg->{node}) && $cfg->{node} eq $nodename;
>
> Shouldn't this be 'ne'?
you are absolutely right ;)
>
>> +
>> + # only schedule enabled jobs
>> + next if defined($cfg->{enabled}) && !$cfg->{enabled};
>> +
>> + my $last_run = get_last_runtime($id, $type);
>> + my $calspec = PVE::CalendarEvent::parse_calendar_event($schedule);
>> + my $next_sync = PVE::CalendarEvent::compute_next_event($calspec,
>> $last_run) // 0;
>> +
>> + if (time() >= $next_sync) {
>> + # only warn on errors, so that all jobs can run
>> + my $state = get_job_state($id, $type); # to update the state
>> +
>> + next if $state->{state} eq 'started'; # still running
>> +
>> + my $plugin = PVE::Jobs::Plugin->lookup($type);
>> +
>> + my $upid = eval { $plugin->run($cfg) };
>> + warn $@ if $@;
>> + if ($upid) {
>> + started_job($id, $type, $upid);
>> + }
>> + }
>> + }
>> +}
>> +
>> +sub setup_dirs {
>> + mkdir $state_dir;
>> + mkdir $lock_dir;
>> +}
>> +
>> +1;
>> diff --git a/PVE/Jobs/Makefile b/PVE/Jobs/Makefile
>> new file mode 100644
>> index 00000000..6023c3ba
>> --- /dev/null
>> +++ b/PVE/Jobs/Makefile
>> @@ -0,0 +1,16 @@
>> +include ../../defines.mk
>> +
>> +PERLSOURCE = \
>> + Plugin.pm\
>> + VZDump.pm
>> +
>> +all:
>> +
>> +.PHONY: clean
>> +clean:
>> + rm -rf *~
>> +
>> +.PHONY: install
>> +install: ${PERLSOURCE}
>> + install -d ${PERLLIBDIR}/PVE/Jobs
>> + install -m 0644 ${PERLSOURCE} ${PERLLIBDIR}/PVE/Jobs
>> diff --git a/PVE/Jobs/Plugin.pm b/PVE/Jobs/Plugin.pm
>> new file mode 100644
>> index 00000000..69c31cf2
>> --- /dev/null
>> +++ b/PVE/Jobs/Plugin.pm
>> @@ -0,0 +1,61 @@
>> +package PVE::Jobs::Plugin;
>> +
>> +use strict;
>> +use warnings;
>> +
>> +use PVE::Cluster qw(cfs_register_file);
>> +
>> +use base qw(PVE::SectionConfig);
>> +
>> +cfs_register_file('jobs.cfg',
>> + sub { __PACKAGE__->parse_config(@_); },
>> + sub { __PACKAGE__->write_config(@_); });
>> +
>> +my $defaultData = {
>> + propertyList => {
>> + type => { description => "Section type." },
>> + id => {
>> + description => "The ID of the VZDump job.",
>> + type => 'string',
>> + format => 'pve-configid',
>> + },
>> + enabled => {
>> + description => "Determines if the job is enabled.",
>> + type => 'boolean',
>> + default => 1,
>> + optional => 1,
>> + },
>> + schedule => {
>> + description => "Backup schedule. The format is a subset of
>> `systemd` calendar events.",
>> + type => 'string', format => 'pve-calendar-event',
>> + maxLength => 128,
>> + },
>> + },
>> +};
>> +
>> +sub private {
>> + return $defaultData;
>> +}
>> +
>> +sub parse_config {
>> + my ($class, $filename, $raw) = @_;
>> +
>> + my $cfg = $class->SUPER::parse_config($filename, $raw);
>> +
>> + foreach my $id (sort keys %{$cfg->{ids}}) {
>> + my $data = $cfg->{ids}->{$id};
>> +
>> + $data->{id} = $id;
>> + $data->{enabled} //= 1;
>> + }
>> +
>> + return $cfg;
>> +}
>> +
>> +sub run {
>> + my ($class, $cfg) = @_;
>> + # implement in subclass
>> + die "not implemented";
>> +}
>> +
>> +1;
>> diff --git a/PVE/Jobs/VZDump.pm b/PVE/Jobs/VZDump.pm
>> new file mode 100644
>> index 00000000..043b7ace
>> --- /dev/null
>> +++ b/PVE/Jobs/VZDump.pm
>> @@ -0,0 +1,54 @@
>> +package PVE::Jobs::VZDump;
>> +
>> +use strict;
>> +use warnings;
>> +
>> +use PVE::INotify;
>> +use PVE::VZDump::Common;
>> +use PVE::API2::VZDump;
>> +use PVE::Cluster;
>> +
>> +use base qw(PVE::Jobs::Plugin);
>> +
>> +sub type {
>> + return 'vzdump';
>> +}
>> +
>> +my $props = PVE::VZDump::Common::json_config_properties();
>> +
>> +sub properties {
>> + return $props;
>> +}
>> +
>> +sub options {
>> + my $options = {
>> + enabled => { optional => 1 },
>> + schedule => {},
>> + };
>> + foreach my $opt (keys %$props) {
>> + if ($props->{$opt}->{optional}) {
>> + $options->{$opt} = { optional => 1 };
>> + } else {
>> + $options->{$opt} = {};
>> + }
>> + }
>> +
>> + return $options;
>> +}
>> +
>> +sub run {
>> + my ($class, $conf) = @_;
>> +
>> + # remove all non vzdump related options
>> + foreach my $opt (keys %$conf) {
>> + delete $conf->{$opt} if !defined($props->{$opt});
>> + }
>> +
>> + $conf->{quiet} = 1; # do not write to stdout/stderr
>> +
>> + PVE::Cluster::cfs_update(); # refresh vmlist
>> +
>> + return PVE::API2::VZDump->vzdump($conf);
>> +}
>> +
>> +1;
>> diff --git a/PVE/Makefile b/PVE/Makefile
>> index 0071fab1..48b85d33 100644
>> --- a/PVE/Makefile
>> +++ b/PVE/Makefile
>> @@ -1,6 +1,6 @@
>> include ../defines.mk
>> -SUBDIRS=API2 Status CLI Service Ceph
>> +SUBDIRS=API2 Status CLI Service Ceph Jobs
>> PERLSOURCE = \
>> API2.pm \
>> @@ -11,6 +11,7 @@ PERLSOURCE = \
>> CertHelpers.pm \
>> ExtMetric.pm \
>> HTTPServer.pm \
>> + Jobs.pm \
>> NodeConfig.pm \
>> Report.pm \
>> VZDump.pm
>>
More information about the pve-devel
mailing list