[pmg-devel] [PATCH pmg-api v5 2/4] utils: content-type: don't fallback to header information for magic

Stoiko Ivanov s.ivanov at proxmox.com
Fri Feb 21 17:48:16 CET 2025


file-type detection based on content/magic is the single piece of
information not determined by the headers of the e-mail, and thus not
directly controlled by the sender.

this patch removes the fallback to the content-type header mime-type
in case magic_mime_type_for_file does not detect the type.

one exception to this is trying to eagerly gain information from
archives - where we want to try to unpack an archive if the header
says it is an archive but the content is not detected as such.

Reported-by: Friedrich Weber <f.weber at proxmox.com>
Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
---
 src/PMG/RuleDB/ArchiveFilter.pm     |  2 +-
 src/PMG/RuleDB/ContentTypeFilter.pm |  2 +-
 src/PMG/Utils.pm                    | 12 ++++--------
 src/bin/pmg-smtp-filter             |  8 +++++++-
 4 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/src/PMG/RuleDB/ArchiveFilter.pm b/src/PMG/RuleDB/ArchiveFilter.pm
index 3d9890c..d7f6399 100644
--- a/src/PMG/RuleDB/ArchiveFilter.pm
+++ b/src/PMG/RuleDB/ArchiveFilter.pm
@@ -59,7 +59,7 @@ sub parse_entity {
     if (my $id = $entity->head->mime_attr ('x-proxmox-tmp-aid')) {
 	chomp $id;
 
-	my $header_ct = $entity->head->mime_attr ('content-type');
+	my $header_ct = $entity->{PMX_header_ct};
 
 	my $magic_ct = $entity->{PMX_magic_ct};
 
diff --git a/src/PMG/RuleDB/ContentTypeFilter.pm b/src/PMG/RuleDB/ContentTypeFilter.pm
index 0199311..fb45e95 100644
--- a/src/PMG/RuleDB/ContentTypeFilter.pm
+++ b/src/PMG/RuleDB/ContentTypeFilter.pm
@@ -72,7 +72,7 @@ sub parse_entity {
     if (my $id = $entity->head->mime_attr ('x-proxmox-tmp-aid')) {
 	chomp $id;
 
-	my $header_ct = $entity->head->mime_attr ('content-type');
+	my $header_ct = $entity->{PMX_header_ct};
 
 	my $magic_ct = $entity->{PMX_magic_ct};
 
diff --git a/src/PMG/Utils.pm b/src/PMG/Utils.pm
index 0b8945f..b2a75fb 100644
--- a/src/PMG/Utils.pm
+++ b/src/PMG/Utils.pm
@@ -598,7 +598,7 @@ sub magic_mime_type_for_file {
     my $bufsize = Xdgmime::xdg_mime_get_max_buffer_extents();
     die "got strange value for max_buffer_extents" if $bufsize > 4096*10;
 
-    my $ct = "application/octet-stream";
+    my $ct;
 
     my $fh = IO::File->new("<$filename") ||
 	die "unable to open file '$filename' - $!";
@@ -611,6 +611,7 @@ sub magic_mime_type_for_file {
 
     die "unable to read file '$filename' - $!" if ($len < 0);
 
+    $ct ||= "application/octet-stream";
     return $ct;
 }
 
@@ -619,14 +620,9 @@ sub add_ct_marks {
 
     if (my $path = $entity->{PMX_decoded_path}) {
 
-	# set a reasonable default if magic does not give a result
-	$entity->{PMX_magic_ct} = $entity->head->mime_attr('content-type');
+	$entity->{PMX_header_ct} = $entity->head->mime_attr('content-type');
 
-	if (my $ct = magic_mime_type_for_file($path)) {
-	    if ($ct ne 'application/octet-stream' || !$entity->{PMX_magic_ct}) {
-		$entity->{PMX_magic_ct} = $ct;
-	    }
-	}
+	$entity->{PMX_magic_ct} = magic_mime_type_for_file($path);
 
 	my $filename = $entity->head->recommended_filename;
 	$filename = basename($path) if !defined($filename) || $filename eq '';
diff --git a/src/bin/pmg-smtp-filter b/src/bin/pmg-smtp-filter
index 6061459..60737ea 100755
--- a/src/bin/pmg-smtp-filter
+++ b/src/bin/pmg-smtp-filter
@@ -561,9 +561,15 @@ sub run_dequeue {
 sub unpack_entity {
     my ($self, $unpack, $entity, $msginfo, $queue) = @_;
 
-    my ($magic, $path) = $entity->@{'PMX_magic_ct', 'PMX_decoded_path'};
+    my ($magic, $headerct, $path) = $entity->@{'PMX_magic_ct', 'PMX_header_ct', 'PMX_decoded_path'};
 
     if ($magic && $path) {
+	# in order to not miss information from a misdetected archive use information provided in the
+	# header here as well
+	if ($headerct && ($magic && $magic eq 'application/octet-stream')) {
+	    $magic = $headerct;
+	}
+
 	my $filename = basename ($path);
 
 	if (PMG::Unpack::is_archive ($magic)) {
-- 
2.39.5





More information about the pmg-devel mailing list