#-*-perl-*- # $Id: Mail-ARF.pm.txt,v 1.1.1.1 2006/03/03 20:16:11 steve Exp $ package Mail::ARF; use MIME::Parser; use Mail::Header; use Data::Dumper; use Mail::RFC822::Address; use DateTime::Format::Mail; use strict; my %arf_headers = ( 'feedback-type' => 'mandatory', 'user-agent' => 'mandatory', 'version' => 'mandatory', 'original-mail-from' => 'optional', 'original-rcpt-to' => 'optional', 'received-date' => 'optional', 'source-ip' => 'optional', 'authentication-results' => 'multiple', 'reported-domain' => 'multiple', 'reported-uri' => 'multiple', 'removal-recipient' => 'multiple' ); my %arf_types = ( 'abuse' => 1, 'fraud' => 1, 'opt-out' => 1, 'opt-out-list' => 1, 'other' => 1, 'virus' => 1 ); sub new { my $class = shift; my $self = {}; $self->{tempdir} = '/tmp'; $self->{parser} = new MIME::Parser; $self->{log} = (); bless($self, $class); return $self; } sub init_parser { my ($self) = @_; $self->{parser}->output_under($self->{tempdir}); $self->{parser}->ignore_errors(0); $self->{parser}->extract_uuencode(0); $self->{parser}->extract_nested_messages(0); $self->{parser}->decode_headers(1); } sub message { my ($self, $severe, $message) = @_; push(@{$self->{log}}, {severe => $severe, message => $message}); } sub fatal { my ($self, $msg) = @_; message($self, 'fatal', $msg); } sub parse_arf { # MIME errors my ($self) = @_; my $mimeerror = 0; foreach my $w ($self->{parser}->results->msgs) { $w =~ /^(debug|warning|error): (.*)$/ or die "Bad MIME::Parser message\n"; message($self, $1, $2); $mimeerror++ if $1 ne 'debug'; } # message($self, 'ok', 'Valid MIME') unless $mimeerror; # Go through and check individual elements # 1) RFC 2822 Structure my $head = $self->{entity}->head; if($head->count('Date') != 1) { message($self, 'must', 'There must be exactly one Date header'); } if($head->count('From') != 1) { message($self, 'must', 'There must be exactly one From header'); } if($head->count('Sender') > 1) { message($self, 'must', 'There may be no more than one Sender header'); } if($head->count('Reply-To') > 1) { message($self, 'must', 'There may be no more than one Reply-To header'); } if($head->count('To') > 1) { message($self, 'must', 'There may be no more than one To header'); } if($head->count('To') < 1) { message($self, 'error', 'A missing To header is legal, but will confuse some ticketing systems'); } if($head->count('message-id') > 1) { message($self, 'must', 'There may be no more than one Message-Id header'); } if($head->count('message-id') < 1) { message($self, 'should', 'No Message-Id header'); } if($head->count('in-reply-to') > 1) { message($self, 'must', 'There may be no more than one In-Reply-To header (and an ARF report should not have any)'); } if($head->count('in-reply-to') == 1) { message($self, 'warning', 'An ARF report is not a reply, so should not have an In-Reply-To header'); } if($head->count('references') > 1) { message($self, 'must', 'There may be no more than one References header (and an ARF report should not have any)'); } if($head->count('references') == 1) { message($self, 'warning', 'An ARF report is not a reply, so should not have a References header'); } if($head->count('subject') > 1) { message($self, 'must', 'There may be no more than one Subject header'); } if($head->count('subject') < 1) { message($self, 'error', 'The Subject header is technically optional, but many ticketing systems will be confused if it is not there.'); } # 2) MIME Structure if($head->count('MIME-Version') != 1) { message($self, 'must', 'An ARF report is a MIME document, so must have a MIME-Version header'); } else { if($head->get('MIME-Version') !~ /^1.0\s*$/s) { message($self, 'must', 'MIME-Version should be 1.0' . ' not "' . $head->get('MIME-Version') . '"'); } } my $entity = $self->{entity}; unless($entity->is_multipart) { message($self, 'critical', 'Message must be multipart MIME'); return; } if($head->mime_type ne 'multipart/report') { message($self, 'must', 'Content-Type must be multipart/report'); } if($head->mime_attr('content-type.report-type') ne 'feedback-report') { message($self, 'must', 'Content-type attribute report-type must be feedback-report'); } my @parts = $entity->parts; if(@parts != 3) { message($self, 'critical', 'The ARF report must consist of three MIME entities - description, metadata and forwarded message'); return; } # Check preamble, epilogue if(defined $entity->preamble && join('', @{$entity->preamble}) !~ /^\s*$/s) { message($self, 'warning', 'Message preamble is not empty - this may be discarded by processing systems, so should not contain important information'); print "preamble = '", join('', @{$entity->preamble}), "'\n"; } if(defined $entity->epilogue && join('', @{$entity->epilogue}) !~ /^\s*$/s) { message($self, 'warning', 'Message epilogue is not empty - this may be discarded by processing systems, so should not contain important information'); print "epilogue = '", join('', @{$entity->epilogue}), "'\n"; } # MIME::Entity parts of the message my($desc, $meta, $fwd) = @parts; if($desc->mime_type !~ /^text\//) { message($self, 'must', 'The first part of the report should be a human readable description - so needs to be a text mime type'); } elsif($desc->mime_type ne 'text/plain') { message($self, 'error' ,'The first part of the report should be a human readable description - only text/plain is handled well by some ticketing systems'); } if($desc->effective_type ne $desc->mime_type) { message($self, 'error', "The effective content type of the first part of the report is " . $desc->effective_type . " rather than " . $desc->mime_type . " - a content-encoding problem, perhaps?"); } if($meta->mime_type ne 'message/feedback-report') { message($self, 'must', 'The second MIME element of the report must be of type message/feedback-report, not ' . $meta->mime_type); } if($meta->head->mime_encoding ne '7bit') { message($self, 'must', 'The second MIME element of the report must be encoded as 7bit'); } if($fwd->mime_type ne 'message/rfc822' && $fwd->mime_type ne 'text/rfc822-headers') { message($self, 'must', 'The third MIME element must be of type message/rfc822 or text/rfc822-headers'); } if($meta->mime_type eq 'message/feedback-report') { checkmeta($self, $meta, $fwd); } } sub checkmeta { my ($self ,$meta, $fwd) = @_; my %fieldcount; my %fieldcontent; my @h = split /\r?\n/, $meta->bodyhandle->as_string; my $head = new Mail::Header \@h; # print "\n\n---- headers ----\n", $head->as_string(), "\n--------\n"; # 1) Run through and check that all the mandatories exist while(my ($tag ,$v) = each %arf_headers) { my $count = $head->count($tag); if($v eq 'mandatory' && $count == 0) { message($self, 'must', "Mandatory tag $tag is missing"); } if(($v eq 'mandatory' || $v eq 'optional') && $count > 1) { message($self, 'must', "Tag $tag appears multiple times"); } } # 2) Check that all the headers are allowed foreach my $h ($head->tags) { chomp($h); # print "TAG: $h\n"; unless(exists $arf_headers{lc($h)}) { message($self, 'must', "Tag $h is invalid"); } } # 3) Check that type is valid and type specific fields my $type; if($head->count('feedback-type') == 1) { $type = lc($head->get('feedback-type')); chomp($type); if(exists $arf_types{$type}) { if($type =~ /^opt-out/) { if($head->count('removal-recipient') == 0) { message($self, 'warning', "An opt-out request should include a Removal-Recipient field"); } } else { if($head->count('removal-recipient') > 0) { message($self, 'must', "A Removal-Recipient field is only valid in an opt-out report"); } } } else { message($self, 'must', "'$type' is not a valid Feedback-Type"); } } # 4) Check syntax of fields # User-Agent if($head->count('user-agent') == 1) { my $ua = $head->get('user-agent'); chomp($ua); # Do crude breakdown, we don't handle quoted strings correctly # discard quoted strings $ua =~ s/"[^"]*"//g; $ua =~ s/\([^()]*\)//g; foreach my $el (split /\s+/, $ua) { if($el !~ /^[^][()<>@,;:\\"\/?={}]+(?:\/[^]()<>@,;:\\"\/[?={}[:space:]]+)$/) { message($self, 'must', "'$el' is not a valid token within a User-Agent field"); } } } # Version if($head->count('version') == 1) { my $ver = $head->get('version'); chomp($ver); if($ver !~ /^\d+(\.\d+)+$/) { message($self, 'error', "'$ver' does not look like a valid version string"); } elsif($ver ne '0.1') { message($self, 'warning', "This validator only supports verision 0.1 of the draft"); } } # Original-Mail-From if($head->count('original-mail-from') == 1) { my $email = $head->get('original-mail-from'); chomp($email); unless(Mail::RFC822::Address::valid($email)) { message($self, 'must', "The content of the Original-Mail-From field does not look like a valid email address"); } } # Original-Rcpt-To if($head->count('original-rcpt-to') == 1) { my $email = $head->get('original-rcpt-to'); chomp($email); unless(Mail::RFC822::Address::valid($email)) { message($self, 'must', "The content of the Original-Rcpt-To field does not look like a valid email address"); } } # Received-Date if($head->count('received-date') == 1) { my $date = $head->get('received-date'); chomp($date); eval { my $parser = DateTime::Format::Mail->new; $parser->strict; eval { my $p = $parser->parse_datetime($date); }; if($@) { message($self, 'must', "Invalid Received-Date: $date"); } } } # Source-IP if($head->count('source-ip') == 1) { my $ip = $head->get('source-ip'); chomp($ip); if($ip =~ /^25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9](?:\.25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9]){3}$/) { message($self, 'debug', "ip4: $ip"); } elsif($ip =~ /^[0-9A-F]{1,4}(?::[0-9A-F]{1,4}){7}$/) { message($self, 'debug', "ip6: $ip"); } elsif($ip =~ /^[:0-9A-F]+$/ && $ip =~ /::/) { message($self, 'debug', "ip6: $ip"); } else { message($self, 'error', "'$ip' is not obviously a valid IP address (it may be in a format this validator doesn't handle - but in that case other code may not handle it either)"); } } # Authentication-Results if($head->count('authentication-results') > 0) { message($self, 'note', 'We do not verify the syntax of the Authentication-Results field'); } # Reported-Domain if($head->count('reported-domain') > 0) { foreach my $dom ($head->get('reported-domain')) { chomp($dom); if($dom =~ /(?:[0-9a-z-]+\.)+[a-z]+/i) { warn($self, 'must', "The content of the Reported-Domain field ($dom) does not look like a valid domain"); } } } # Reported-URI if($head->count('reported-uri') > 0) { foreach my $uri ($head->get('reported-uri')) { chomp($uri); if($uri =~ /^https?:(.*)/) { warn($self, 'debug', "Reported-URI is a webpage"); } elsif($uri =~ /^mailto:/) { warn($self, 'debug', "It's email"); } elsif($uri =~ /^[a-z0-9]:/) { warn($self, 'error', "The contents of the Reported-URI field ($uri) do not look like a URI"); } else { warn($self, 'warning', "The contents of the Reported-URI field ($uri) do not look like a common URI"); } } } # Removal Recipient if($head->count('removal-recipient') > 0) { foreach my $email ($head->get('removal-recipient')) { chomp($email); unless(Mail::RFC822::Address::valid($email)) { message($self, 'must', "The content of the Removal-Recipient field ($email) does not look like a valid email address"); } } } } sub parse_data { my ($self, $msg) = @_; init_parser($self); eval { $self->{entity} = $self->{parser}->parse_data($msg); parse_arf($self); }; if($@) { fatal($self, $@); } } sub parse { my ($self, $msg) = @_; init_parser($self); eval { $self->{entity} = $self->{parser}->parse($msg); parse_arf($self); }; if($@) { fatal($self, $@); } } sub parse_open { my ($self, $msg) = @_; init_parser($self); eval { $self->{entity} = $self->{parser}->parse_open($msg); parse_arf($self); }; if($@) { fatal($self, $@); } } sub purge { my ($self) = @_; $self->{parser}->purge if $self->{parser}; } sub DESTROY { } sub dump { my ($self) = @_; if($self->{entity}) { # $self->{entity}->dump_skeleton; } print "\n\nMessages\n--------\n"; foreach my $m (@{$self->{log}}) { print $m->{severe}, ": ", $m->{message}, "\n" unless $m->{severe} eq 'debug'; } } 1;