[pmg-devel] [PATCH pmg-api v5 2/4] utils: content-type: don't fallback to header information for magic
Stoiko Ivanov
s.ivanov at proxmox.com
Fri Feb 21 17:48:16 CET 2025
file-type detection based on content/magic is the single piece of
information not determined by the headers of the e-mail, and thus not
directly controlled by the sender.
this patch removes the fallback to the content-type header mime-type
in case magic_mime_type_for_file does not detect the type.
one exception to this is trying to eagerly gain information from
archives - where we want to try to unpack an archive if the header
says it is an archive but the content is not detected as such.
Reported-by: Friedrich Weber <f.weber at proxmox.com>
Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
---
src/PMG/RuleDB/ArchiveFilter.pm | 2 +-
src/PMG/RuleDB/ContentTypeFilter.pm | 2 +-
src/PMG/Utils.pm | 12 ++++--------
src/bin/pmg-smtp-filter | 8 +++++++-
4 files changed, 13 insertions(+), 11 deletions(-)
diff --git a/src/PMG/RuleDB/ArchiveFilter.pm b/src/PMG/RuleDB/ArchiveFilter.pm
index 3d9890c..d7f6399 100644
--- a/src/PMG/RuleDB/ArchiveFilter.pm
+++ b/src/PMG/RuleDB/ArchiveFilter.pm
@@ -59,7 +59,7 @@ sub parse_entity {
if (my $id = $entity->head->mime_attr ('x-proxmox-tmp-aid')) {
chomp $id;
- my $header_ct = $entity->head->mime_attr ('content-type');
+ my $header_ct = $entity->{PMX_header_ct};
my $magic_ct = $entity->{PMX_magic_ct};
diff --git a/src/PMG/RuleDB/ContentTypeFilter.pm b/src/PMG/RuleDB/ContentTypeFilter.pm
index 0199311..fb45e95 100644
--- a/src/PMG/RuleDB/ContentTypeFilter.pm
+++ b/src/PMG/RuleDB/ContentTypeFilter.pm
@@ -72,7 +72,7 @@ sub parse_entity {
if (my $id = $entity->head->mime_attr ('x-proxmox-tmp-aid')) {
chomp $id;
- my $header_ct = $entity->head->mime_attr ('content-type');
+ my $header_ct = $entity->{PMX_header_ct};
my $magic_ct = $entity->{PMX_magic_ct};
diff --git a/src/PMG/Utils.pm b/src/PMG/Utils.pm
index 0b8945f..b2a75fb 100644
--- a/src/PMG/Utils.pm
+++ b/src/PMG/Utils.pm
@@ -598,7 +598,7 @@ sub magic_mime_type_for_file {
my $bufsize = Xdgmime::xdg_mime_get_max_buffer_extents();
die "got strange value for max_buffer_extents" if $bufsize > 4096*10;
- my $ct = "application/octet-stream";
+ my $ct;
my $fh = IO::File->new("<$filename") ||
die "unable to open file '$filename' - $!";
@@ -611,6 +611,7 @@ sub magic_mime_type_for_file {
die "unable to read file '$filename' - $!" if ($len < 0);
+ $ct ||= "application/octet-stream";
return $ct;
}
@@ -619,14 +620,9 @@ sub add_ct_marks {
if (my $path = $entity->{PMX_decoded_path}) {
- # set a reasonable default if magic does not give a result
- $entity->{PMX_magic_ct} = $entity->head->mime_attr('content-type');
+ $entity->{PMX_header_ct} = $entity->head->mime_attr('content-type');
- if (my $ct = magic_mime_type_for_file($path)) {
- if ($ct ne 'application/octet-stream' || !$entity->{PMX_magic_ct}) {
- $entity->{PMX_magic_ct} = $ct;
- }
- }
+ $entity->{PMX_magic_ct} = magic_mime_type_for_file($path);
my $filename = $entity->head->recommended_filename;
$filename = basename($path) if !defined($filename) || $filename eq '';
diff --git a/src/bin/pmg-smtp-filter b/src/bin/pmg-smtp-filter
index 6061459..60737ea 100755
--- a/src/bin/pmg-smtp-filter
+++ b/src/bin/pmg-smtp-filter
@@ -561,9 +561,15 @@ sub run_dequeue {
sub unpack_entity {
my ($self, $unpack, $entity, $msginfo, $queue) = @_;
- my ($magic, $path) = $entity->@{'PMX_magic_ct', 'PMX_decoded_path'};
+ my ($magic, $headerct, $path) = $entity->@{'PMX_magic_ct', 'PMX_header_ct', 'PMX_decoded_path'};
if ($magic && $path) {
+ # in order to not miss information from a misdetected archive use information provided in the
+ # header here as well
+ if ($headerct && ($magic && $magic eq 'application/octet-stream')) {
+ $magic = $headerct;
+ }
+
my $filename = basename ($path);
if (PMG::Unpack::is_archive ($magic)) {
--
2.39.5
More information about the pmg-devel
mailing list