//I was looking into this as [incoming mail handling is completely broken in our downstream instance](https://phabricator.wikimedia.org/T356077).//
The external library `mimemailparser` was imported in May 2011 according to https://we.phorge.it/source/phorge/browse/master/externals/mimemailparser and since then has received some smaller custom downstream patches according to https://we.phorge.it/source/phorge/history/master/externals/mimemailparser
Instead of writing and maintaining more custom whack-a-mole Phorge-only changes like T15766, T15767, T15769, it could make more sense to pull a more recent upstream version instead of duplicating work.
This external codebase was migrated from now de-funct code.google.com to GitHub at https://github.com/php-mime-mail-parser/php-mime-mail-parser/ on 2013-06-09.
The first commit on Github in https://github.com/php-mime-mail-parser/php-mime-mail-parser/tree/bf6e4365fc21f5983be7ada90d9eb94d6b5c39d9 already differs quite a bit from that ancient copy in Phorge.
https://github.com/php-mime-mail-parser/php-mime-mail-parser/releases
Phorge is basically using https://web.archive.org/web/20111009083009/http://code.google.com/p/php-mime-mail-parser/source/browse/#svn%2Ftrunk (with some trivial whitespace vs tab and indentation differences), plus the following (cleaned up) diff of `MimeMailParser.class.php` (`attachment.class.php` has no non-trivial differences at all):
```
[acko@ak ~]$ diff -pu ~/codegooglecom-MimeMailParser.class.php /var/www/html/phorge/phorge/externals/mimemailparser/MimeMailParser.class.php
--- /home/acko/codegooglecom-MimeMailParser.class.php 2024-09-04 19:51:24.160627808 +0000
+++ /var/www/html/phorge/phorge/externals/mimemailparser/MimeMailParser.class.php 2024-08-23 12:19:53.138306742 +0000
@@ -32,6 +32,11 @@ class MimeMailParser {
public $attachment_streams;
/**
+ * Parts of an email
+ */
+ private $parts = array();
+
+ /**
* Inialize some stuff
* @return
*/
@@ -111,6 +116,14 @@ class MimeMailParser {
* @param $data String
*/
public function setText($data) {
+ // NOTE: This has been modified for Phabricator. If the input data does not
+ // end in a newline, Mailparse fails to include the last line in the mail
+ // body. This happens somewhere deep, deep inside the mailparse extension,
+ // so adding a newline here seems like the most straightforward fix.
+ if (!preg_match('/\n\z/', $data)) {
+ $data = $data."\n";
+ }
+
$this->resource = mailparse_msg_create();
// does not parse incrementally, fast memory hog might explode
mailparse_msg_parse($this->resource, $data);
@@ -181,6 +194,13 @@ class MimeMailParser {
* @param $type Object[optional]
*/
public function getMessageBody($type = 'text') {
+
+ // NOTE: This function has been modified for Phabricator. The default
+ // implementation returns the last matching part, which throws away text
+ // for many emails. Instead, we concatenate all matching parts. See
+ // issue 22 for discussion:
+ // http://code.google.com/p/php-mime-mail-parser/issues/detail?id=22
+
$body = false;
$mime_types = array(
'text'=> 'text/plain',
@@ -188,9 +208,23 @@ class MimeMailParser {
);
if (in_array($type, array_keys($mime_types))) {
foreach($this->parts as $part) {
+ $disposition = $this->getPartContentDisposition($part);
+ if ($disposition == 'attachment') {
+ // text/plain parts with "Content-Disposition: attachment" are
+ // attachments, not part of the text body.
+ continue;
+ }
if ($this->getPartContentType($part) == $mime_types[$type]) {
- $headers = $this->getPartHeaders($part);
- $body = $this->decode($this->getPartBody($part), array_key_exists('content-transfer-encoding', $headers) ? $headers['content-transfer-encoding'] : '');
+ $headers = $this->getPartHeaders($part);
+ // Concatenate all the matching parts into the body text. For example,
+ // if a user sends a message with some text, then an image, and then
+ // some more text, the text body of the email gets split over several
+ // attachments.
+ $body .= $this->decode(
+ $this->getPartBody($part),
+ array_key_exists('content-transfer-encoding', $headers)
+ ? $headers['content-transfer-encoding']
+ : '');
}
}
} else {
@@ -228,14 +262,37 @@ class MimeMailParser {
* @param $type Object[optional]
*/
public function getAttachments() {
+ // NOTE: This has been modified for Phabricator. Some mail clients do not
+ // send attachments with "Content-Disposition" headers.
$attachments = array();
$dispositions = array("attachment","inline");
- foreach($this->parts as $part) {
+ $non_attachment_types = array("text/plain", "text/html");
+ $nonameIter = 0;
+ foreach ($this->parts as $part) {
$disposition = $this->getPartContentDisposition($part);
- if (in_array($disposition, $dispositions)) {
+ $filename = 'noname';
+ if (isset($part['disposition-filename'])) {
+ $filename = $part['disposition-filename'];
+ } elseif (isset($part['content-name'])) {
+ // if we have no disposition but we have a content-name, it's a valid attachment.
+ // we simulate the presence of an attachment disposition with a disposition filename
+ $filename = $part['content-name'];
+ $disposition = 'attachment';
+ } elseif (!in_array($part['content-type'], $non_attachment_types, true)
+ && substr($part['content-type'], 0, 10) !== 'multipart/'
+ ) {
+ // if we cannot get it with getMessageBody, we assume it is an attachment
+ $disposition = 'attachment';
+ }
+
+ if (in_array($disposition, $dispositions) && isset($filename) === true) {
+ if ($filename == 'noname') {
+ $nonameIter++;
+ $filename = 'noname'.$nonameIter;
+ }
$attachments[] = new MimeMailParser_attachment(
- $part['disposition-filename'],
- $this->getPartContentType($part),
+ $filename,
+ $this->getPartContentType($part),
$this->getAttachmentStream($part),
$disposition,
$this->getPartHeaders($part)
@@ -251,10 +308,10 @@ class MimeMailParser {
* @param $part Array
*/
private function getPartHeaders($part) {
- if (isset($part['headers'])) {
+ if (isset($part['headers']) && $part['headers']) {
return $part['headers'];
}
- return false;
+ throw new Exception('MimeMailParser::getHeaders() could not parse any email headers.');
}
/**
```