Page Menu
Home
Phorge
Search
Configure Global Search
Log In
Files
F2894888
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Award Token
Flag For Later
Advanced/Developer...
View Handle
View Hovercard
Size
10 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/src/applications/metamta/parser/PhabricatorMetaMTAEmailBodyParser.php b/src/applications/metamta/parser/PhabricatorMetaMTAEmailBodyParser.php
index 5138c1a2bd..07770600e6 100644
--- a/src/applications/metamta/parser/PhabricatorMetaMTAEmailBodyParser.php
+++ b/src/applications/metamta/parser/PhabricatorMetaMTAEmailBodyParser.php
@@ -1,161 +1,167 @@
<?php
final class PhabricatorMetaMTAEmailBodyParser {
/**
* Mails can have bodies such as
*
* !claim
*
* taking this task
*
* Or
*
* !assign epriestley
*
* please, take this task I took; its hard
*
* This function parses such an email body and returns a dictionary
* containing a clean body text (e.g. "taking this task"), and a list of
* commands. For example, this body above might parse as:
*
* array(
* 'body' => 'please, take this task I took; its hard',
* 'commands' => array(
* array('assign', 'epriestley'),
* ),
* )
*
* @param string Raw mail text body.
* @return dict Parsed body.
*/
public function parseBody($body) {
$body = $this->stripTextBody($body);
$commands = array();
$lines = phutil_split_lines($body, $retain_endings = true);
// We'll match commands at the beginning and end of the mail, but not
// in the middle of the mail body.
list($top_commands, $lines) = $this->stripCommands($lines);
list($end_commands, $lines) = $this->stripCommands(array_reverse($lines));
$lines = array_reverse($lines);
$commands = array_merge($top_commands, array_reverse($end_commands));
$lines = rtrim(implode('', $lines));
return array(
'body' => $lines,
'commands' => $commands,
);
}
private function stripCommands(array $lines) {
$saw_command = false;
$commands = array();
foreach ($lines as $key => $line) {
if (!strlen(trim($line)) && $saw_command) {
unset($lines[$key]);
continue;
}
$matches = null;
if (!preg_match('/^\s*!(\w+.*$)/', $line, $matches)) {
break;
}
$arg_str = $matches[1];
$argv = preg_split('/[,\s]+/', trim($arg_str));
$commands[] = $argv;
unset($lines[$key]);
$saw_command = true;
}
return array($commands, $lines);
}
public function stripTextBody($body) {
return trim($this->stripSignature($this->stripQuotedText($body)));
}
private function stripQuotedText($body) {
// Look for "On <date>, <user> wrote:". This may be split across multiple
// lines. We need to be careful not to remove all of a message like this:
//
// On which day do you want to meet?
//
// On <date>, <user> wrote:
// > Let's set up a meeting.
$start = null;
$lines = phutil_split_lines($body);
foreach ($lines as $key => $line) {
if (preg_match('/^\s*>?\s*On\b/', $line)) {
$start = $key;
}
if ($start !== null) {
if (preg_match('/\bwrote:/', $line)) {
$lines = array_slice($lines, 0, $start);
$body = implode('', $lines);
break;
}
}
}
// Outlook english
$body = preg_replace(
'/^\s*(> )?-----Original Message-----.*?/imsU',
'',
$body);
// Outlook danish
$body = preg_replace(
'/^\s*(> )?-----Oprindelig Meddelelse-----.*?/imsU',
'',
$body);
// See example in T3217.
$body = preg_replace(
'/^________________________________________\s+From:.*?/imsU',
'',
$body);
+ // French GMail quoted text. See T8199.
+ $body = preg_replace(
+ '/^\s*\d{4}-\d{2}-\d{2} \d+:\d+ GMT.*:.*?/imsU',
+ '',
+ $body);
+
return rtrim($body);
}
private function stripSignature($body) {
// Quasi-"standard" delimiter, for lols see:
// https://bugzilla.mozilla.org/show_bug.cgi?id=58406
$body = preg_replace(
'/^-- +$.*/sm',
'',
$body);
// Mailbox seems to make an attempt to comply with the "standard" but
// omits the leading newline and uses an em dash. This may or may not have
// the trailing space, but it's unique enough that there's no real ambiguity
// in detecting it.
$body = preg_replace(
"/\s*\xE2\x80\x94\s*\nSent from Mailbox\s*\z/su",
'',
$body);
// HTC Mail application (mobile)
$body = preg_replace(
'/^\s*^Sent from my HTC smartphone.*/sm',
'',
$body);
// Apple iPhone
$body = preg_replace(
'/^\s*^Sent from my iPhone\s*$.*/sm',
'',
$body);
return rtrim($body);
}
}
diff --git a/src/applications/metamta/parser/__tests__/PhabricatorMetaMTAEmailBodyParserTestCase.php b/src/applications/metamta/parser/__tests__/PhabricatorMetaMTAEmailBodyParserTestCase.php
index 6a124e30f4..d8ad49956d 100644
--- a/src/applications/metamta/parser/__tests__/PhabricatorMetaMTAEmailBodyParserTestCase.php
+++ b/src/applications/metamta/parser/__tests__/PhabricatorMetaMTAEmailBodyParserTestCase.php
@@ -1,260 +1,267 @@
<?php
final class PhabricatorMetaMTAEmailBodyParserTestCase
extends PhabricatorTestCase {
public function testQuotedTextStripping() {
$bodies = $this->getEmailBodies();
foreach ($bodies as $body) {
$parser = new PhabricatorMetaMTAEmailBodyParser();
$stripped = $parser->stripTextBody($body);
$this->assertEqual('OKAY', $stripped);
}
}
public function testEmailBodyCommandParsing() {
$bodies = $this->getEmailBodiesWithFullCommands();
foreach ($bodies as $body) {
$parser = new PhabricatorMetaMTAEmailBodyParser();
$body_data = $parser->parseBody($body);
$this->assertEqual('OKAY', $body_data['body']);
$this->assertEqual(
array(
array('whatevs', 'dude'),
),
$body_data['commands']);
}
$bodies = $this->getEmailBodiesWithPartialCommands();
foreach ($bodies as $body) {
$parser = new PhabricatorMetaMTAEmailBodyParser();
$body_data = $parser->parseBody($body);
$this->assertEqual('OKAY', $body_data['body']);
$this->assertEqual(
array(
array('whatevs'),
),
$body_data['commands']);
}
$bodies = $this->getEmailBodiesWithMultipleCommands();
foreach ($bodies as $body) {
$parser = new PhabricatorMetaMTAEmailBodyParser();
$body_data = $parser->parseBody($body);
$this->assertEqual("preface\n\nOKAY", $body_data['body']);
$this->assertEqual(
array(
array('top1'),
array('top2'),
),
$body_data['commands']);
}
$bodies = $this->getEmailBodiesWithSplitCommands();
foreach ($bodies as $body) {
$parser = new PhabricatorMetaMTAEmailBodyParser();
$body_data = $parser->parseBody($body);
$this->assertEqual('OKAY', $body_data['body']);
$this->assertEqual(
array(
array('cmd1'),
array('cmd2'),
),
$body_data['commands']);
}
$bodies = $this->getEmailBodiesWithMiddleCommands();
foreach ($bodies as $body) {
$parser = new PhabricatorMetaMTAEmailBodyParser();
$body_data = $parser->parseBody($body);
$this->assertEqual("HEAD\n!cmd2\nTAIL", $body_data['body']);
}
}
public function testFalsePositiveForOnWrote() {
$body = <<<EOEMAIL
On which horse shall you ride?
On Sep 23, alincoln wrote:
> Hey bro do you want to go ride horses tomorrow?
EOEMAIL;
$parser = new PhabricatorMetaMTAEmailBodyParser();
$stripped = $parser->stripTextBody($body);
$this->assertEqual('On which horse shall you ride?', $stripped);
}
private function getEmailBodiesWithFullCommands() {
$bodies = $this->getEmailBodies();
$with_commands = array();
foreach ($bodies as $body) {
$with_commands[] = "!whatevs dude\n".$body;
}
return $with_commands;
}
private function getEmailBodiesWithPartialCommands() {
$bodies = $this->getEmailBodies();
$with_commands = array();
foreach ($bodies as $body) {
$with_commands[] = "!whatevs\n".$body;
}
return $with_commands;
}
private function getEmailBodiesWithMultipleCommands() {
$bodies = $this->getEmailBodies();
$with_commands = array();
foreach ($bodies as $body) {
$with_commands[] = "!top1\n\n!top2\n\npreface\n\n".$body;
}
return $with_commands;
}
private function getEmailBodiesWithSplitCommands() {
$with_split = array();
$with_split[] = "!cmd1\n!cmd2\nOKAY";
$with_split[] = "!cmd1\nOKAY\n!cmd2";
$with_split[] = "OKAY\n!cmd1\n!cmd2";
return $with_split;
}
private function getEmailBodiesWithMiddleCommands() {
$with_middle = array();
$with_middle[] = "!cmd1\nHEAD\n!cmd2\nTAIL\n!cmd3";
$with_middle[] = "!cmd1\nHEAD\n!cmd2\nTAIL";
$with_middle[] = "HEAD\n!cmd2\nTAIL\n!cmd3";
return $with_middle;
}
private function getEmailBodies() {
$trailing_space = ' ';
$emdash = "\xE2\x80\x94";
return array(
<<<EOEMAIL
OKAY
On May 30, 2011, at 8:36 PM, Someone wrote:
> ...
EOEMAIL
,
<<<EOEMAIL
OKAY
On Fri, May 27, 2011 at 9:39 AM, Someone <
somebody@somewhere.com> wrote:
> ...
EOEMAIL
,
<<<EOEMAIL
OKAY
On Fri, May 27, 2011 at 9:39 AM, Someone
<somebody@somewhere.com> wrote:
> ...
EOEMAIL
,
<<<EOEMAIL
OKAY
-----Oprindelig Meddelelse-----
> ...
EOEMAIL
,
<<<EOEMAIL
OKAY
-----Original Message-----
> ...
EOEMAIL
,
<<<EOEMAIL
OKAY
-----oprindelig meddelelse-----
> ...
EOEMAIL
,
<<<EOEMAIL
OKAY
-----original message-----
> ...
EOEMAIL
,
<<<EOEMAIL
OKAY
Sent from my HTC smartphone on the Now Network from Sprint!
-Reply message ----- From: "somebody (someone)" <
somebody@somewhere.com>
To: <somebody@somewhere.com>
Subject: Some Text Date: Mon, Apr 2, 2012 1:42 pm
> ...
EOEMAIL
,
<<<EOEMAIL
OKAY
--{$trailing_space}
Abraham Lincoln
Supreme Galactic Emperor
EOEMAIL
,
<<<EOEMAIL
OKAY
Sent from my iPhone
EOEMAIL
,
<<<EOMAIL
OKAY
________________________________________
From: Abraham Lincoln <alincoln@logcab.in>
Subject: Core World Tariffs
EOMAIL
,
<<<EOMAIL
OKAY
> On 17 Oct 2013, at 17:47, "Someone" <somebody@somewhere> wrote:
> ...
EOMAIL
,
<<<EOMAIL
OKAY
> -----Original Message-----
>
> ...
EOMAIL
,
<<<EOMAIL
OKAY {$emdash}{$trailing_space}
Sent from Mailbox
EOMAIL
,
<<<EOMAIL
OKAY
{$emdash}
Sent from Mailbox
+EOMAIL
+,
+<<<EOMAIL
+OKAY
+
+2015-05-06 11:21 GMT-07:00 Someone <someone@somewhere.com>:
+> ...
EOMAIL
);
}
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Jan 19 2025, 20:37 (6 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1128513
Default Alt Text
(10 KB)
Attached To
Mode
rP Phorge
Attached
Detach File
Event Timeline
Log In to Comment