[pmg-devel] [PATCH pmg-api 2/2] ruledb: content-type: add flags for source of matching

Stoiko Ivanov s.ivanov at proxmox.com
Wed Feb 12 16:12:39 CET 2025


our current content-type matching is sensibly quite cautious in
matching if any available information indicates a potential match:
* mime-type detection based on file contents
* mime-type detection based on file suffix
* content-type header

Sometimes this can lead to surprises (e.g. when a MUA sets the
filetype of a pdf to application/octet-stream (the default type if no
information is available).

This change gives users the option to rely only on some of the sources
for matching.

This is a fix for the intial request in #2691 and addresses the
suggestion from Friedrich from:
https://bugzilla.proxmox.com/show_bug.cgi?id=5618#c2

inspired by the changes for disclaimer released with PMG 8.1:
51d1507 ("fix #2430: ruledb disclaimer: make separator configurable")

Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
---
 src/PMG/RuleDB/ContentTypeFilter.pm | 91 +++++++++++++++++++++++++++--
 1 file changed, 86 insertions(+), 5 deletions(-)

diff --git a/src/PMG/RuleDB/ContentTypeFilter.pm b/src/PMG/RuleDB/ContentTypeFilter.pm
index 0199311..0dafa64 100644
--- a/src/PMG/RuleDB/ContentTypeFilter.pm
+++ b/src/PMG/RuleDB/ContentTypeFilter.pm
@@ -26,7 +26,7 @@ sub otype_text {
 }
 
 sub new {
-    my ($type, $fvalue, $ogroup) = @_;
+    my ($type, $fvalue, $ogroup, $header, $magic, $glob) = @_;
 
     my $class = ref($type) || $type;
 
@@ -36,6 +36,9 @@ sub new {
     }
 
     my $self = $class->SUPER::new('content-type', $fvalue, $ogroup);
+    $self->{header} = $header;
+    $self->{magic} = $magic;
+    $self->{glob} = $glob;
 
     return $self;
 }
@@ -52,9 +55,53 @@ sub load_attr {
 	$obj->{field_value} = $nt;
     }
 
+    my $sth = $ruledb->{dbh}->prepare(
+	"SELECT * FROM Attribut WHERE Object_ID = ?");
+
+    $sth->execute($id);
+
+    $obj->{header} = $obj->{magic} = $obj->{glob} = 1;
+
+    while (my $ref = $sth->fetchrow_hashref()) {
+	if ($ref->{name} =~ /^(header|magic|glob)$/) {
+	    $obj->{$1} = $ref->{value};
+	}
+    }
+
+    $sth->finish();
+
+    $obj->{id} = $id;
+
+    $obj->{digest} = Digest::SHA::sha1_hex(
+	$id, $value, $ogroup, $obj->{header} // 1, $obj->{magic} //1 , $obj->{glob} // 1);
+
     return $obj;
 }
 
+sub save {
+    my ($self, $ruledb) = @_;
+
+    if (defined($self->{id})) {
+	#update - clean old attribut entries
+	$ruledb->{dbh}->do(
+	    "DELETE FROM Attribut WHERE Object_ID = ?",
+	    undef, $self->{id});
+    }
+
+    $self->{id} = $self->SUPER::save($ruledb);
+
+    for my $prop (qw(header magic glob)) {
+	if (defined($self->{$prop})) {
+	    $ruledb->{dbh}->do(
+		"INSERT INTO Attribut (Value, Name, Object_ID) VALUES (?, ?, ?) ".
+		"ON CONFLICT(Object_ID, Name) DO UPDATE SET Value = Excluded.Value ",
+		undef, $self->{$prop}, $prop,  $self->{id});
+	}
+    }
+
+    return $self->{id};
+}
+
 sub parse_entity {
     my ($self, $entity) = @_;
 
@@ -78,11 +125,14 @@ sub parse_entity {
 
 	my $glob_ct = $entity->{PMX_glob_ct};
 
-	if ($header_ct && $header_ct =~ m|$self->{field_value}|) {
+	my $check_header = !defined($self->{header}) || ${self}->{header};
+	my $check_magic = !defined($self->{magic}) || ${self}->{magic};
+	my $check_glob = !defined($self->{glob}) || ${self}->{glob};
+	if ($header_ct && $check_header && $header_ct =~ m|$self->{field_value}|) {
 	    push @$res, $id;
-	} elsif ($magic_ct && $magic_ct =~ m|$self->{field_value}|) {
+	} elsif ($magic_ct && $check_magic && $magic_ct =~ m|$self->{field_value}|) {
 	    push @$res, $id;
-	} elsif ($glob_ct && $glob_ct =~ m|$self->{field_value}|) {
+	} elsif ($glob_ct && $check_glob && $glob_ct =~ m|$self->{field_value}|) {
 	    push @$res, $id;
 	}
     }
@@ -112,19 +162,50 @@ sub properties {
 	    pattern => '[0-9a-zA-Z\/\\\[\]\+\-\.\*\_]+',
 	    maxLength => 1024,
 	},
+	header => {
+	    description => "use content-type from mail-header for matching",
+	    type => 'boolean',
+	    optional => 1,
+	    default => 1,
+	},
+	magic => {
+	    description => "use content-type from scanning the content for matching",
+	    type => 'boolean',
+	    optional => 1,
+	    default => 1,
+	},
+	glob => {
+	    description => "use content-type based on file-name for matching",
+	    type => 'boolean',
+	    optional => 1,
+	    default => 1,
+	},
     };
 }
 
 sub get {
     my ($self) = @_;
 
-    return { contenttype => $self->{field_value} };
+    return {
+	contenttype => $self->{field_value},
+	header => $self->{header},
+	magic => $self->{magic},
+	glob => $self->{glob},
+    };
 }
 
 sub update {
     my ($self, $param) = @_;
 
     $self->{field_value} = $param->{contenttype};
+
+    for my $prop (qw(header magic glob)) {
+	if (defined($param->{$prop}) && $param->{$prop} == 0) {
+	    $self->{$prop} = 0;
+	} else {
+	    delete $self->{$prop};
+	}
+    }
 }
 
 1;
-- 
2.39.5





More information about the pmg-devel mailing list