I was looking into this as incoming mail handling is completely broken in our downstream instance.
The external library mimemailparser was imported in May 2011 according to https://we.phorge.it/source/phorge/browse/master/externals/mimemailparser and since then has received some smaller custom downstream patches according to https://we.phorge.it/source/phorge/history/master/externals/mimemailparser
Instead of writing and maintaining more custom whack-a-mole Phorge-only changes like T15766, T15767, T15769, it could make more sense to pull a more recent upstream version instead of duplicating work.
This external codebase was migrated from now de-funct code.google.com to GitHub at https://github.com/php-mime-mail-parser/php-mime-mail-parser/ on 2013-06-09.
The first commit on Github in https://github.com/php-mime-mail-parser/php-mime-mail-parser/tree/bf6e4365fc21f5983be7ada90d9eb94d6b5c39d9 already differs quite a bit from that ancient copy in Phorge.
https://github.com/php-mime-mail-parser/php-mime-mail-parser/releases
Phorge is basically using https://web.archive.org/web/20111009083009/http://code.google.com/p/php-mime-mail-parser/source/browse/#svn%2Ftrunk (with some trivial whitespace vs tab and indentation differences), plus the following (cleaned up) diff of MimeMailParser.class.php (attachment.class.php has no non-trivial differences at all):
[acko@ak ~]$ diff -pu ~/codegooglecom-MimeMailParser.class.php /var/www/html/phorge/phorge/externals/mimemailparser/MimeMailParser.class.php --- /home/acko/codegooglecom-MimeMailParser.class.php 2024-09-04 19:51:24.160627808 +0000 +++ /var/www/html/phorge/phorge/externals/mimemailparser/MimeMailParser.class.php 2024-08-23 12:19:53.138306742 +0000 @@ -32,6 +32,11 @@ class MimeMailParser { public $attachment_streams; /** + * Parts of an email + */ + private $parts = array(); + + /** * Inialize some stuff * @return */ @@ -111,6 +116,14 @@ class MimeMailParser { * @param $data String */ public function setText($data) { + // NOTE: This has been modified for Phabricator. If the input data does not + // end in a newline, Mailparse fails to include the last line in the mail + // body. This happens somewhere deep, deep inside the mailparse extension, + // so adding a newline here seems like the most straightforward fix. + if (!preg_match('/\n\z/', $data)) { + $data = $data."\n"; + } + $this->resource = mailparse_msg_create(); // does not parse incrementally, fast memory hog might explode mailparse_msg_parse($this->resource, $data); @@ -181,6 +194,13 @@ class MimeMailParser { * @param $type Object[optional] */ public function getMessageBody($type = 'text') { + + // NOTE: This function has been modified for Phabricator. The default + // implementation returns the last matching part, which throws away text + // for many emails. Instead, we concatenate all matching parts. See + // issue 22 for discussion: + // http://code.google.com/p/php-mime-mail-parser/issues/detail?id=22 + $body = false; $mime_types = array( 'text'=> 'text/plain', @@ -188,9 +208,23 @@ class MimeMailParser { ); if (in_array($type, array_keys($mime_types))) { foreach($this->parts as $part) { + $disposition = $this->getPartContentDisposition($part); + if ($disposition == 'attachment') { + // text/plain parts with "Content-Disposition: attachment" are + // attachments, not part of the text body. + continue; + } if ($this->getPartContentType($part) == $mime_types[$type]) { - $headers = $this->getPartHeaders($part); - $body = $this->decode($this->getPartBody($part), array_key_exists('content-transfer-encoding', $headers) ? $headers['content-transfer-encoding'] : ''); + $headers = $this->getPartHeaders($part); + // Concatenate all the matching parts into the body text. For example, + // if a user sends a message with some text, then an image, and then + // some more text, the text body of the email gets split over several + // attachments. + $body .= $this->decode( + $this->getPartBody($part), + array_key_exists('content-transfer-encoding', $headers) + ? $headers['content-transfer-encoding'] + : ''); } } } else { @@ -228,14 +262,37 @@ class MimeMailParser { * @param $type Object[optional] */ public function getAttachments() { + // NOTE: This has been modified for Phabricator. Some mail clients do not + // send attachments with "Content-Disposition" headers. $attachments = array(); $dispositions = array("attachment","inline"); - foreach($this->parts as $part) { + $non_attachment_types = array("text/plain", "text/html"); + $nonameIter = 0; + foreach ($this->parts as $part) { $disposition = $this->getPartContentDisposition($part); - if (in_array($disposition, $dispositions)) { + $filename = 'noname'; + if (isset($part['disposition-filename'])) { + $filename = $part['disposition-filename']; + } elseif (isset($part['content-name'])) { + // if we have no disposition but we have a content-name, it's a valid attachment. + // we simulate the presence of an attachment disposition with a disposition filename + $filename = $part['content-name']; + $disposition = 'attachment'; + } elseif (!in_array($part['content-type'], $non_attachment_types, true) + && substr($part['content-type'], 0, 10) !== 'multipart/' + ) { + // if we cannot get it with getMessageBody, we assume it is an attachment + $disposition = 'attachment'; + } + + if (in_array($disposition, $dispositions) && isset($filename) === true) { + if ($filename == 'noname') { + $nonameIter++; + $filename = 'noname'.$nonameIter; + } $attachments[] = new MimeMailParser_attachment( - $part['disposition-filename'], - $this->getPartContentType($part), + $filename, + $this->getPartContentType($part), $this->getAttachmentStream($part), $disposition, $this->getPartHeaders($part) @@ -251,10 +308,10 @@ class MimeMailParser { * @param $part Array */ private function getPartHeaders($part) { - if (isset($part['headers'])) { + if (isset($part['headers']) && $part['headers']) { return $part['headers']; } - return false; + throw new Exception('MimeMailParser::getHeaders() could not parse any email headers.'); } /**
Phorge's only calls to this external library are in /scripts/mail/mail_handler.php.
Note that getMessageBodyHeaders() being called by Phorge was removed in upstream https://github.com/php-mime-mail-parser/php-mime-mail-parser/commit/26b7f45c434b04f217755657d652adbc8114a254