Page MenuHomePhorge

No OneTemporary

diff --git a/src/applications/metamta/parser/PhabricatorMetaMTAEmailBodyParser.php b/src/applications/metamta/parser/PhabricatorMetaMTAEmailBodyParser.php
index 5138c1a2bd..07770600e6 100644
--- a/src/applications/metamta/parser/PhabricatorMetaMTAEmailBodyParser.php
+++ b/src/applications/metamta/parser/PhabricatorMetaMTAEmailBodyParser.php
@@ -1,161 +1,167 @@
<?php
final class PhabricatorMetaMTAEmailBodyParser {
/**
* Mails can have bodies such as
*
* !claim
*
* taking this task
*
* Or
*
* !assign epriestley
*
* please, take this task I took; its hard
*
* This function parses such an email body and returns a dictionary
* containing a clean body text (e.g. "taking this task"), and a list of
* commands. For example, this body above might parse as:
*
* array(
* 'body' => 'please, take this task I took; its hard',
* 'commands' => array(
* array('assign', 'epriestley'),
* ),
* )
*
* @param string Raw mail text body.
* @return dict Parsed body.
*/
public function parseBody($body) {
$body = $this->stripTextBody($body);
$commands = array();
$lines = phutil_split_lines($body, $retain_endings = true);
// We'll match commands at the beginning and end of the mail, but not
// in the middle of the mail body.
list($top_commands, $lines) = $this->stripCommands($lines);
list($end_commands, $lines) = $this->stripCommands(array_reverse($lines));
$lines = array_reverse($lines);
$commands = array_merge($top_commands, array_reverse($end_commands));
$lines = rtrim(implode('', $lines));
return array(
'body' => $lines,
'commands' => $commands,
);
}
private function stripCommands(array $lines) {
$saw_command = false;
$commands = array();
foreach ($lines as $key => $line) {
if (!strlen(trim($line)) && $saw_command) {
unset($lines[$key]);
continue;
}
$matches = null;
if (!preg_match('/^\s*!(\w+.*$)/', $line, $matches)) {
break;
}
$arg_str = $matches[1];
$argv = preg_split('/[,\s]+/', trim($arg_str));
$commands[] = $argv;
unset($lines[$key]);
$saw_command = true;
}
return array($commands, $lines);
}
public function stripTextBody($body) {
return trim($this->stripSignature($this->stripQuotedText($body)));
}
private function stripQuotedText($body) {
// Look for "On <date>, <user> wrote:". This may be split across multiple
// lines. We need to be careful not to remove all of a message like this:
//
// On which day do you want to meet?
//
// On <date>, <user> wrote:
// > Let's set up a meeting.
$start = null;
$lines = phutil_split_lines($body);
foreach ($lines as $key => $line) {
if (preg_match('/^\s*>?\s*On\b/', $line)) {
$start = $key;
}
if ($start !== null) {
if (preg_match('/\bwrote:/', $line)) {
$lines = array_slice($lines, 0, $start);
$body = implode('', $lines);
break;
}
}
}
// Outlook english
$body = preg_replace(
'/^\s*(> )?-----Original Message-----.*?/imsU',
'',
$body);
// Outlook danish
$body = preg_replace(
'/^\s*(> )?-----Oprindelig Meddelelse-----.*?/imsU',
'',
$body);
// See example in T3217.
$body = preg_replace(
'/^________________________________________\s+From:.*?/imsU',
'',
$body);
+ // French GMail quoted text. See T8199.
+ $body = preg_replace(
+ '/^\s*\d{4}-\d{2}-\d{2} \d+:\d+ GMT.*:.*?/imsU',
+ '',
+ $body);
+
return rtrim($body);
}
private function stripSignature($body) {
// Quasi-"standard" delimiter, for lols see:
// https://bugzilla.mozilla.org/show_bug.cgi?id=58406
$body = preg_replace(
'/^-- +$.*/sm',
'',
$body);
// Mailbox seems to make an attempt to comply with the "standard" but
// omits the leading newline and uses an em dash. This may or may not have
// the trailing space, but it's unique enough that there's no real ambiguity
// in detecting it.
$body = preg_replace(
"/\s*\xE2\x80\x94\s*\nSent from Mailbox\s*\z/su",
'',
$body);
// HTC Mail application (mobile)
$body = preg_replace(
'/^\s*^Sent from my HTC smartphone.*/sm',
'',
$body);
// Apple iPhone
$body = preg_replace(
'/^\s*^Sent from my iPhone\s*$.*/sm',
'',
$body);
return rtrim($body);
}
}
diff --git a/src/applications/metamta/parser/__tests__/PhabricatorMetaMTAEmailBodyParserTestCase.php b/src/applications/metamta/parser/__tests__/PhabricatorMetaMTAEmailBodyParserTestCase.php
index 6a124e30f4..d8ad49956d 100644
--- a/src/applications/metamta/parser/__tests__/PhabricatorMetaMTAEmailBodyParserTestCase.php
+++ b/src/applications/metamta/parser/__tests__/PhabricatorMetaMTAEmailBodyParserTestCase.php
@@ -1,260 +1,267 @@
<?php
final class PhabricatorMetaMTAEmailBodyParserTestCase
extends PhabricatorTestCase {
public function testQuotedTextStripping() {
$bodies = $this->getEmailBodies();
foreach ($bodies as $body) {
$parser = new PhabricatorMetaMTAEmailBodyParser();
$stripped = $parser->stripTextBody($body);
$this->assertEqual('OKAY', $stripped);
}
}
public function testEmailBodyCommandParsing() {
$bodies = $this->getEmailBodiesWithFullCommands();
foreach ($bodies as $body) {
$parser = new PhabricatorMetaMTAEmailBodyParser();
$body_data = $parser->parseBody($body);
$this->assertEqual('OKAY', $body_data['body']);
$this->assertEqual(
array(
array('whatevs', 'dude'),
),
$body_data['commands']);
}
$bodies = $this->getEmailBodiesWithPartialCommands();
foreach ($bodies as $body) {
$parser = new PhabricatorMetaMTAEmailBodyParser();
$body_data = $parser->parseBody($body);
$this->assertEqual('OKAY', $body_data['body']);
$this->assertEqual(
array(
array('whatevs'),
),
$body_data['commands']);
}
$bodies = $this->getEmailBodiesWithMultipleCommands();
foreach ($bodies as $body) {
$parser = new PhabricatorMetaMTAEmailBodyParser();
$body_data = $parser->parseBody($body);
$this->assertEqual("preface\n\nOKAY", $body_data['body']);
$this->assertEqual(
array(
array('top1'),
array('top2'),
),
$body_data['commands']);
}
$bodies = $this->getEmailBodiesWithSplitCommands();
foreach ($bodies as $body) {
$parser = new PhabricatorMetaMTAEmailBodyParser();
$body_data = $parser->parseBody($body);
$this->assertEqual('OKAY', $body_data['body']);
$this->assertEqual(
array(
array('cmd1'),
array('cmd2'),
),
$body_data['commands']);
}
$bodies = $this->getEmailBodiesWithMiddleCommands();
foreach ($bodies as $body) {
$parser = new PhabricatorMetaMTAEmailBodyParser();
$body_data = $parser->parseBody($body);
$this->assertEqual("HEAD\n!cmd2\nTAIL", $body_data['body']);
}
}
public function testFalsePositiveForOnWrote() {
$body = <<<EOEMAIL
On which horse shall you ride?
On Sep 23, alincoln wrote:
> Hey bro do you want to go ride horses tomorrow?
EOEMAIL;
$parser = new PhabricatorMetaMTAEmailBodyParser();
$stripped = $parser->stripTextBody($body);
$this->assertEqual('On which horse shall you ride?', $stripped);
}
private function getEmailBodiesWithFullCommands() {
$bodies = $this->getEmailBodies();
$with_commands = array();
foreach ($bodies as $body) {
$with_commands[] = "!whatevs dude\n".$body;
}
return $with_commands;
}
private function getEmailBodiesWithPartialCommands() {
$bodies = $this->getEmailBodies();
$with_commands = array();
foreach ($bodies as $body) {
$with_commands[] = "!whatevs\n".$body;
}
return $with_commands;
}
private function getEmailBodiesWithMultipleCommands() {
$bodies = $this->getEmailBodies();
$with_commands = array();
foreach ($bodies as $body) {
$with_commands[] = "!top1\n\n!top2\n\npreface\n\n".$body;
}
return $with_commands;
}
private function getEmailBodiesWithSplitCommands() {
$with_split = array();
$with_split[] = "!cmd1\n!cmd2\nOKAY";
$with_split[] = "!cmd1\nOKAY\n!cmd2";
$with_split[] = "OKAY\n!cmd1\n!cmd2";
return $with_split;
}
private function getEmailBodiesWithMiddleCommands() {
$with_middle = array();
$with_middle[] = "!cmd1\nHEAD\n!cmd2\nTAIL\n!cmd3";
$with_middle[] = "!cmd1\nHEAD\n!cmd2\nTAIL";
$with_middle[] = "HEAD\n!cmd2\nTAIL\n!cmd3";
return $with_middle;
}
private function getEmailBodies() {
$trailing_space = ' ';
$emdash = "\xE2\x80\x94";
return array(
<<<EOEMAIL
OKAY
On May 30, 2011, at 8:36 PM, Someone wrote:
> ...
EOEMAIL
,
<<<EOEMAIL
OKAY
On Fri, May 27, 2011 at 9:39 AM, Someone <
somebody@somewhere.com> wrote:
> ...
EOEMAIL
,
<<<EOEMAIL
OKAY
On Fri, May 27, 2011 at 9:39 AM, Someone
<somebody@somewhere.com> wrote:
> ...
EOEMAIL
,
<<<EOEMAIL
OKAY
-----Oprindelig Meddelelse-----
> ...
EOEMAIL
,
<<<EOEMAIL
OKAY
-----Original Message-----
> ...
EOEMAIL
,
<<<EOEMAIL
OKAY
-----oprindelig meddelelse-----
> ...
EOEMAIL
,
<<<EOEMAIL
OKAY
-----original message-----
> ...
EOEMAIL
,
<<<EOEMAIL
OKAY
Sent from my HTC smartphone on the Now Network from Sprint!
-Reply message ----- From: "somebody (someone)" <
somebody@somewhere.com>
To: <somebody@somewhere.com>
Subject: Some Text Date: Mon, Apr 2, 2012 1:42 pm
> ...
EOEMAIL
,
<<<EOEMAIL
OKAY
--{$trailing_space}
Abraham Lincoln
Supreme Galactic Emperor
EOEMAIL
,
<<<EOEMAIL
OKAY
Sent from my iPhone
EOEMAIL
,
<<<EOMAIL
OKAY
________________________________________
From: Abraham Lincoln <alincoln@logcab.in>
Subject: Core World Tariffs
EOMAIL
,
<<<EOMAIL
OKAY
> On 17 Oct 2013, at 17:47, "Someone" <somebody@somewhere> wrote:
> ...
EOMAIL
,
<<<EOMAIL
OKAY
> -----Original Message-----
>
> ...
EOMAIL
,
<<<EOMAIL
OKAY {$emdash}{$trailing_space}
Sent from Mailbox
EOMAIL
,
<<<EOMAIL
OKAY
{$emdash}
Sent from Mailbox
+EOMAIL
+,
+<<<EOMAIL
+OKAY
+
+2015-05-06 11:21 GMT-07:00 Someone <someone@somewhere.com>:
+> ...
EOMAIL
);
}
}

File Metadata

Mime Type
text/x-diff
Expires
Jan 19 2025, 20:37 (6 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1128513
Default Alt Text
(10 KB)

Event Timeline