[pmg-devel] [PATCH pmg-api v3 6/8] quarantine: handle utf8 data

Dominik Csapak d.csapak at proxmox.com
Wed Nov 23 15:15:12 CET 2022


i'd like to have some rationale for the changes in the commit message
at least for the more non-obvious ones (regex changes for example)

comments inline

On 11/23/22 10:23, Stoiko Ivanov wrote:
> Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
> ---
>   src/PMG/API2/Quarantine.pm | 10 +++++-----
>   src/PMG/HTMLMail.pm        |  7 ++++---
>   src/PMG/Quarantine.pm      | 13 +++++++------
>   src/PMG/RuleDB/Spam.pm     | 12 ++++++------
>   4 files changed, 22 insertions(+), 20 deletions(-)
> 
> diff --git a/src/PMG/API2/Quarantine.pm b/src/PMG/API2/Quarantine.pm
> index ddf7c04..819c78c 100644
> --- a/src/PMG/API2/Quarantine.pm
> +++ b/src/PMG/API2/Quarantine.pm
> @@ -141,8 +141,8 @@ my $parse_header_info = sub {
>       my $sender = PMG::Utils::decode_rfc1522(PVE::Tools::trim($head->get('sender')));
>       $res->{sender} = $sender if $sender && ($sender ne $res->{from});
>   
> -    $res->{envelope_sender} = $ref->{sender};
> -    $res->{receiver} = $ref->{receiver} // $ref->{pmail};
> +    $res->{envelope_sender} = PMG::Utils::try_decode_utf8($ref->{sender});
> +    $res->{receiver} = PMG::Utils::try_decode_utf8($ref->{receiver} // $ref->{pmail});

maybe we should note here in a comment that these are not headers
but part of the smtp dialog and cannot be quoted-printable/base64 encoded?

>       $res->{id} = 'C' . $ref->{cid} . 'R' . $ref->{rid} . 'T' . $ref->{ticketid};
>       $res->{time} = $ref->{time};
>       $res->{bytes} = $ref->{bytes};
> @@ -437,7 +437,7 @@ __PACKAGE__->register_method ({
>   	$sth->execute();
>   
>   	while (my $ref = $sth->fetchrow_hashref()) {
> -	    push @$res, { mail => $ref->{pmail} };
> +	    push @$res, { mail => PMG::Utils::try_decode_utf8($ref->{pmail}) };
>   	}
>   
>   	return $res;
> @@ -532,7 +532,7 @@ __PACKAGE__->register_method ({
>   	}
>   
>   	while (my $ref = $sth->fetchrow_hashref()) {
> -	    push @$res, { mail => $ref->{pmail} };
> +	    push @$res, { mail => PMG::Utils::try_decode_utf8($ref->{pmail}) };
>   	}
>   
>   	return $res;
> @@ -569,7 +569,7 @@ my $quarantine_api = sub {
>       }
>   
>       if ($check_pmail || $role eq 'quser') {
> -	$sth->execute($pmail);
> +	$sth->execute(encode('UTF-8', $pmail));
>       } else {
>   	$sth->execute();
>       }
> diff --git a/src/PMG/HTMLMail.pm b/src/PMG/HTMLMail.pm
> index 87f5c40..207c52c 100644
> --- a/src/PMG/HTMLMail.pm
> +++ b/src/PMG/HTMLMail.pm
> @@ -192,9 +192,10 @@ sub read_raw_email {
>       # read header
>       my $header;
>       while (defined(my $line = <$fh>)) {
> -	$raw_header .= $line;
> -	chomp $line;
> -	push @$header, $line;
> +	my $decoded_line = PMG::Utils::try_decode_utf8($line);
> +	$raw_header .= $decoded_line;
> +	chomp $decoded_line;
> +	push @$header, $decoded_line;
>   	last if $line =~ m/^\s*$/;
>       }
>   
> diff --git a/src/PMG/Quarantine.pm b/src/PMG/Quarantine.pm
> index 77af8cc..aa6b948 100644
> --- a/src/PMG/Quarantine.pm
> +++ b/src/PMG/Quarantine.pm
> @@ -3,6 +3,7 @@ package PMG::Quarantine;
>   use strict;
>   use warnings;
>   use Net::SMTP;
> +use Encode qw(encode);
>   
>   use PVE::SafeSyslog;
>   use PVE::Tools;
> @@ -16,7 +17,7 @@ sub add_to_blackwhite {
>   
>       my $name = $listname eq 'BL' ? 'BL' : 'WL';
>       my $oname = $listname eq 'BL' ? 'WL' : 'BL';
> -    my $qu = $dbh->quote ($username);
> +    my $qu = $dbh->quote (encode('UTF-8', $username));
>   
>       my $sth = $dbh->prepare(
>   	"SELECT * FROM UserPrefs WHERE pmail = $qu AND (Name = 'BL' OR Name = 'WL')");
> @@ -25,13 +26,13 @@ sub add_to_blackwhite {
>       my $list = { 'WL' => {}, 'BL' => {} };
>   
>       while (my $ref = $sth->fetchrow_hashref()) {
> -	my $data = $ref->{data};
> +	my $data = PMG::Utils::try_decode_utf8($ref->{data});
>   	$data =~ s/[,;]/ /g;
>   	my @alist = split('\s+', $data);
>   
>   	my $tmp = {};
>   	foreach my $a (@alist) {
> -	    if ($a =~ m/^[[:ascii:]]+$/) {
> +	    if ($a =~ m/^[^\s\\\@]+(?:\@[^\s\/\\\@]+)?$/) {

that change seems a bit dangerous, maybe we should at least
filter out some control characters here?

>   		$tmp->{$a} = 1;
>   	    }
>   	}
> @@ -50,7 +51,7 @@ sub add_to_blackwhite {
>   	    if ($delete) {
>   		delete($list->{$name}->{$v});
>   	    } else {
> -		if ($v =~ m/[[:^ascii:]]/) {
> +		if ($v =~ m/[\s\\]/) {

same here, going from 'non-ascii' is forbidden to 'non whitespace+\' is forbidden
is a bit broad imho

>   		    die "email address '$v' contains invalid characters\n";
>   		}
>   		$list->{$name}->{$v} = 1;
> @@ -58,8 +59,8 @@ sub add_to_blackwhite {
>   	    }
>   	}
>   
> -	my $wlist = $dbh->quote(join (',', keys %{$list->{WL}}) || '');
> -	my $blist = $dbh->quote(join (',', keys %{$list->{BL}}) || '');
> +	my $wlist = $dbh->quote(encode('UTF-8', join (',', keys %{$list->{WL}})) || '');
> +	my $blist = $dbh->quote(encode('UTF-8', join (',', keys %{$list->{BL}})) || '');
>   
>   	if (!$delete) {
>   	    my $maxlen = 200000;
> diff --git a/src/PMG/RuleDB/Spam.pm b/src/PMG/RuleDB/Spam.pm
> index 99056a3..bc1d422 100644
> --- a/src/PMG/RuleDB/Spam.pm
> +++ b/src/PMG/RuleDB/Spam.pm
> @@ -94,7 +94,7 @@ sub parse_addrlist {
>   	my $regex = $addr;
>   	# SA like checks
>   	$regex =~ s/[\000\\\(]/_/gs;		# is this really necessasry ?
> -	$regex =~ s/([^\*\?_a-zA-Z0-9])/\\$1/g;	# escape possible metachars
> +	$regex =~ s/([^\*\?_\w])/\\$1/g;	# escape possible metachars

what does \w include more here than a-zA-Z0-9 ?
(a short explanation in the commit message would be enough imo)

>   	$regex =~ tr/?/./;			# replace "?" with "."
>   	$regex =~ s/\*+/\.\*/g;			# replace "*" with  ".*"
>   
> @@ -149,13 +149,13 @@ sub get_blackwhite {
>   	$sth->execute();
>   
>   	while (my $ref = $sth->fetchrow_hashref()) {
> -	    my $pmail = lc ($ref->{pmail});
> +	    my $pmail = lc (PMG::Utils::try_decode_utf8($ref->{pmail}));
>   	    if ($ref->{name} eq 'WL') {
>   		$target_info->{$pmail}->{whitelist} =
> -		    parse_addrlist($ref->{data});
> +		    parse_addrlist(PMG::Utils::try_decode_utf8($ref->{data}));
>   	    } elsif ($ref->{name} eq 'BL') {
>   		$target_info->{$pmail}->{blacklist} =
> -		    parse_addrlist($ref->{data});
> +		    parse_addrlist(PMG::Utils::try_decode_utf8($ref->{data}));
>   	    }
>   	}
>   
> @@ -205,7 +205,7 @@ sub what_match_targets {
>   		($list = $queue->{blackwhite}->{$pmail}->{whitelist}) &&
>   		check_addrlist($list, $queue->{all_from_addrs})) {
>   		syslog('info', "%s: sender in user (%s) whitelist",
> -		       $queue->{logid}, $pmail);
> +		       $queue->{logid}, encode('UTF-8', $pmail));
>   	    } else {
>   		$target_info->{$t}->{marks} = []; # never add additional marks here
>   		$target_info->{$t}->{spaminfo} = $info;
> @@ -234,7 +234,7 @@ sub what_match_targets {
>   		$target_info->{$t}->{marks} = [];
>   		$target_info->{$t}->{spaminfo} = $info;
>   		syslog ('info', "%s: sender in user (%s) blacklist",
> -			$queue->{logid}, $pmail);
> +			$queue->{logid}, encode('UTF-8',$pmail));
>   	    }
>   	}
>       }





More information about the pmg-devel mailing list