Page Menu
Home
Phorge
Search
Configure Global Search
Log In
Files
F2892832
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Award Token
Flag For Later
Advanced/Developer...
View Handle
View Hovercard
Size
13 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/src/difference/ArcanistDiffUtils.php b/src/difference/ArcanistDiffUtils.php
index dab50e0c..f09573a1 100644
--- a/src/difference/ArcanistDiffUtils.php
+++ b/src/difference/ArcanistDiffUtils.php
@@ -1,254 +1,236 @@
<?php
/**
* Dumping ground for diff- and diff-algorithm-related miscellany.
*/
final class ArcanistDiffUtils extends Phobject {
/**
* Make a best-effort attempt to determine if a file is definitely binary.
*
* @return bool If true, the file is almost certainly binary. If false, the
* file might still be binary but is subtle about it.
*/
public static function isHeuristicBinaryFile($data) {
// Detect if a file is binary according to the Git heuristic, which is the
// presence of NULL ("\0") bytes. Git only examines the first "few" bytes of
// each file (8KB or so) as an optimization, but we don't have a reasonable
// equivalent in PHP, so just look at all of it.
return (strpos($data, "\0") !== false);
}
public static function renderDifferences(
$old,
$new,
$context_lines = 3,
$diff_options = "-L 'Old Value' -L 'New Value'") {
if ((string)$old === (string)$new) {
$new .= "\n".pht('(Old and new values are identical.)');
}
$file_old = new TempFile();
$file_new = new TempFile();
Filesystem::writeFile($file_old, (string)$old."\n");
Filesystem::writeFile($file_new, (string)$new."\n");
list($err, $stdout) = exec_manual(
'diff %C -U %s %s %s',
$diff_options,
$context_lines,
$file_old,
$file_new);
return $stdout;
}
public static function generateIntralineDiff($o, $n) {
$ol = strlen($o);
$nl = strlen($n);
if (($o === $n) || !$ol || !$nl) {
return array(
array(array(0, $ol)),
array(array(0, $nl)),
);
}
return self::computeIntralineEdits($o, $n);
}
public static function applyIntralineDiff($str, $intra_stack) {
$buf = '';
$p = $s = $e = 0; // position, start, end
$highlight = $tag = $ent = false;
$highlight_o = '<span class="bright">';
$highlight_c = '</span>';
$is_html = false;
if ($str instanceof PhutilSafeHTML) {
$is_html = true;
$str = $str->getHTMLContent();
}
$n = strlen($str);
for ($i = 0; $i < $n; $i++) {
if ($p == $e) {
do {
if (empty($intra_stack)) {
$buf .= substr($str, $i);
break 2;
}
$stack = array_shift($intra_stack);
$s = $e;
$e += $stack[1];
} while ($stack[0] == 0);
}
if (!$highlight && !$tag && !$ent && $p == $s) {
$buf .= $highlight_o;
$highlight = true;
}
if ($str[$i] == '<') {
$tag = true;
if ($highlight) {
$buf .= $highlight_c;
}
}
if (!$tag) {
if ($str[$i] == '&') {
$ent = true;
}
if ($ent && $str[$i] == ';') {
$ent = false;
}
if (!$ent) {
$p++;
}
}
$buf .= $str[$i];
if ($tag && $str[$i] == '>') {
$tag = false;
if ($highlight) {
$buf .= $highlight_o;
}
}
if ($highlight && ($p == $e || $i == $n - 1)) {
$buf .= $highlight_c;
$highlight = false;
}
}
if ($is_html) {
return phutil_safe_html($buf);
}
return $buf;
}
private static function collapseIntralineRuns($runs) {
$count = count($runs);
for ($ii = 0; $ii < $count - 1; $ii++) {
if ($runs[$ii][0] == $runs[$ii + 1][0]) {
$runs[$ii + 1][1] += $runs[$ii][1];
unset($runs[$ii]);
}
}
return array_values($runs);
}
public static function generateEditString(array $ov, array $nv, $max = 80) {
return id(new PhutilEditDistanceMatrix())
->setComputeString(true)
->setAlterCost(1 / ($max * 2))
->setReplaceCost(2)
->setMaximumLength($max)
->setSequences($ov, $nv)
+ ->setApplySmoothing(true)
->getEditString();
}
public static function computeIntralineEdits($o, $n) {
if (preg_match('/[\x80-\xFF]/', $o.$n)) {
$ov = phutil_utf8v_combined($o);
$nv = phutil_utf8v_combined($n);
$multibyte = true;
} else {
$ov = str_split($o);
$nv = str_split($n);
$multibyte = false;
}
$result = self::generateEditString($ov, $nv);
- // Smooth the string out, by replacing short runs of similar characters
- // with 'x' operations. This makes the result more readable to humans, since
- // there are fewer choppy runs of short added and removed substrings.
- do {
- $original = $result;
- $result = preg_replace(
- '/([xdi])(s{3})([xdi])/',
- '$1xxx$3',
- $result);
- $result = preg_replace(
- '/([xdi])(s{2})([xdi])/',
- '$1xx$3',
- $result);
- $result = preg_replace(
- '/([xdi])(s{1})([xdi])/',
- '$1x$3',
- $result);
- } while ($result != $original);
-
// Now we have a character-based description of the edit. We need to
// convert into a byte-based description. Walk through the edit string and
// adjust each operation to reflect the number of bytes in the underlying
// character.
$o_pos = 0;
$n_pos = 0;
$result_len = strlen($result);
$o_run = array();
$n_run = array();
$old_char_len = 1;
$new_char_len = 1;
for ($ii = 0; $ii < $result_len; $ii++) {
$c = $result[$ii];
if ($multibyte) {
$old_char_len = strlen($ov[$o_pos]);
$new_char_len = strlen($nv[$n_pos]);
}
switch ($c) {
case 's':
case 'x':
$byte_o = $old_char_len;
$byte_n = $new_char_len;
$o_pos++;
$n_pos++;
break;
case 'i':
$byte_o = 0;
$byte_n = $new_char_len;
$n_pos++;
break;
case 'd':
$byte_o = $old_char_len;
$byte_n = 0;
$o_pos++;
break;
}
if ($byte_o) {
if ($c == 's') {
$o_run[] = array(0, $byte_o);
} else {
$o_run[] = array(1, $byte_o);
}
}
if ($byte_n) {
if ($c == 's') {
$n_run[] = array(0, $byte_n);
} else {
$n_run[] = array(1, $byte_n);
}
}
}
$o_run = self::collapseIntralineRuns($o_run);
$n_run = self::collapseIntralineRuns($n_run);
return array($o_run, $n_run);
}
}
diff --git a/src/difference/__tests__/ArcanistDiffUtilsTestCase.php b/src/difference/__tests__/ArcanistDiffUtilsTestCase.php
index 26fa9e47..cce90c6f 100644
--- a/src/difference/__tests__/ArcanistDiffUtilsTestCase.php
+++ b/src/difference/__tests__/ArcanistDiffUtilsTestCase.php
@@ -1,242 +1,242 @@
<?php
/**
* Test cases for @{class:ArcanistDiffUtils}.
*/
final class ArcanistDiffUtilsTestCase extends PhutilTestCase {
public function testLevenshtein() {
$tests = array(
array(
'a',
'b',
'x',
),
array(
'kalrmr(array($b))',
'array($b)',
'dddddddssssssssds',
),
array(
'array($b)',
'kalrmr(array($b))',
'iiiiiiissssssssis',
),
array(
'zkalrmr(array($b))z',
'xarray($b)x',
'dddddddxsssssssssdx',
),
array(
'xarray($b)x',
'zkalrmr(array($b))z',
'iiiiiiixsssssssssix',
),
array(
'abcdefghi',
'abcdefghi',
'sssssssss',
),
array(
'abcdefghi',
'abcdefghijkl',
'sssssssssiii',
),
array(
'abcdefghijkl',
'abcdefghi',
'sssssssssddd',
),
array(
'xyzabcdefghi',
'abcdefghi',
'dddsssssssss',
),
array(
'abcdefghi',
'xyzabcdefghi',
'iiisssssssss',
),
array(
'abcdefg',
'abxdxfg',
- 'ssxsxss',
+ 'ssxxxss',
),
array(
'private function a($a, $b) {',
'public function and($b, $c) {',
- 'siixsdddxsssssssssssiissxsssxsss',
+ 'siixxdddxsssssssssssiixxxxxxxsss',
),
array(
// This is a test that we correctly detect shared prefixes and suffixes
// and don't trigger "give up, too long" mode if there's a small text
// change in an ocean of similar text.
' if ('.
'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'.
'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'.
'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx) {',
' if('.
'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'.
'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'.
'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx) {',
'ssssssssssds'.
'ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss'.
'ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss'.
'sssssssssssssssssssssssssssssssssssssss',
),
);
foreach ($tests as $test) {
$this->assertEqual(
$test[2],
ArcanistDiffUtils::generateEditString(
str_split($test[0]),
str_split($test[1])),
pht("'%s' vs '%s'", $test[0], $test[1]));
}
$utf8_tests = array(
array(
'GrumpyCat',
"Grumpy\xE2\x98\x83at",
'ssssssxss',
),
);
foreach ($tests as $test) {
$this->assertEqual(
$test[2],
ArcanistDiffUtils::generateEditString(
phutil_utf8v_combined($test[0]),
phutil_utf8v_combined($test[1])),
pht("'%s' vs '%s' (utf8)", $test[0], $test[1]));
}
}
public function testGenerateUTF8IntralineDiff() {
// Both Strings Empty.
$left = '';
$right = '';
$result = array(
array(array(0, 0)),
array(array(0, 0)),
);
$this->assertEqual(
$result,
ArcanistDiffUtils::generateIntralineDiff($left, $right));
// Left String Empty.
$left = '';
$right = "Grumpy\xE2\x98\x83at";
$result = array(
array(array(0, 0)),
array(array(0, 11)),
);
$this->assertEqual(
$result,
ArcanistDiffUtils::generateIntralineDiff($left, $right));
// Right String Empty.
$left = "Grumpy\xE2\x98\x83at";
$right = '';
$result = array(
array(array(0, 11)),
array(array(0, 0)),
);
$this->assertEqual(
$result,
ArcanistDiffUtils::generateIntralineDiff($left, $right));
// Both Strings Same
$left = "Grumpy\xE2\x98\x83at";
$right = "Grumpy\xE2\x98\x83at";
$result = array(
array(array(0, 11)),
array(array(0, 11)),
);
$this->assertEqual(
$result,
ArcanistDiffUtils::generateIntralineDiff($left, $right));
// Both Strings are different.
$left = "Grumpy\xE2\x98\x83at";
$right = 'Smiling Dog';
$result = array(
array(array(1, 11)),
array(array(1, 11)),
);
$this->assertEqual(
$result,
ArcanistDiffUtils::generateIntralineDiff($left, $right));
// String with one difference in the middle.
$left = 'GrumpyCat';
$right = "Grumpy\xE2\x98\x83at";
$result = array(
array(array(0, 6), array(1, 1), array(0, 2)),
array(array(0, 6), array(1, 3), array(0, 2)),
);
$this->assertEqual(
$result,
ArcanistDiffUtils::generateIntralineDiff($left, $right));
// Differences in middle, not connected to each other.
$left = 'GrumpyCat';
$right = "Grumpy\xE2\x98\x83a\xE2\x98\x83t";
$result = array(
array(array(0, 6), array(1, 2), array(0, 1)),
array(array(0, 6), array(1, 7), array(0, 1)),
);
$this->assertEqual(
$result,
ArcanistDiffUtils::generateIntralineDiff($left, $right));
// String with difference at the beginning.
$left = "GrumpyC\xE2\x98\x83t";
$right = "DrumpyC\xE2\x98\x83t";
$result = array(
array(array(1, 1), array(0, 10)),
array(array(1, 1), array(0, 10)),
);
$this->assertEqual(
$result,
ArcanistDiffUtils::generateIntralineDiff($left, $right));
// String with difference at the end.
$left = "GrumpyC\xE2\x98\x83t";
$right = "GrumpyC\xE2\x98\x83P";
$result = array(
array(array(0, 10), array(1, 1)),
array(array(0, 10), array(1, 1)),
);
$this->assertEqual(
$result,
ArcanistDiffUtils::generateIntralineDiff($left, $right));
// String with differences at the beginning and end.
$left = "GrumpyC\xE2\x98\x83t";
$right = "DrumpyC\xE2\x98\x83P";
$result = array(
array(array(1, 1), array(0, 9), array(1, 1)),
array(array(1, 1), array(0, 9), array(1, 1)),
);
$this->assertEqual(
$result,
ArcanistDiffUtils::generateIntralineDiff($left, $right));
// This is a unicode combining character, "COMBINING DOUBLE TILDE".
$cc = "\xCD\xA0";
$left = 'Senor';
$right = "Sen{$cc}or";
$result = array(
array(array(0, 2), array(1, 1), array(0, 2)),
array(array(0, 2), array(1, 3), array(0, 2)),
);
$this->assertEqual(
$result,
ArcanistDiffUtils::generateIntralineDiff($left, $right));
}
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Sun, Jan 19, 17:25 (1 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1126939
Default Alt Text
(13 KB)
Attached To
Mode
rARC Arcanist
Attached
Detach File
Event Timeline
Log In to Comment