Page MenuHomePhorge

No OneTemporary

diff --git a/src/difference/ArcanistDiffUtils.php b/src/difference/ArcanistDiffUtils.php
index dab50e0c..f09573a1 100644
--- a/src/difference/ArcanistDiffUtils.php
+++ b/src/difference/ArcanistDiffUtils.php
@@ -1,254 +1,236 @@
<?php
/**
* Dumping ground for diff- and diff-algorithm-related miscellany.
*/
final class ArcanistDiffUtils extends Phobject {
/**
* Make a best-effort attempt to determine if a file is definitely binary.
*
* @return bool If true, the file is almost certainly binary. If false, the
* file might still be binary but is subtle about it.
*/
public static function isHeuristicBinaryFile($data) {
// Detect if a file is binary according to the Git heuristic, which is the
// presence of NULL ("\0") bytes. Git only examines the first "few" bytes of
// each file (8KB or so) as an optimization, but we don't have a reasonable
// equivalent in PHP, so just look at all of it.
return (strpos($data, "\0") !== false);
}
public static function renderDifferences(
$old,
$new,
$context_lines = 3,
$diff_options = "-L 'Old Value' -L 'New Value'") {
if ((string)$old === (string)$new) {
$new .= "\n".pht('(Old and new values are identical.)');
}
$file_old = new TempFile();
$file_new = new TempFile();
Filesystem::writeFile($file_old, (string)$old."\n");
Filesystem::writeFile($file_new, (string)$new."\n");
list($err, $stdout) = exec_manual(
'diff %C -U %s %s %s',
$diff_options,
$context_lines,
$file_old,
$file_new);
return $stdout;
}
public static function generateIntralineDiff($o, $n) {
$ol = strlen($o);
$nl = strlen($n);
if (($o === $n) || !$ol || !$nl) {
return array(
array(array(0, $ol)),
array(array(0, $nl)),
);
}
return self::computeIntralineEdits($o, $n);
}
public static function applyIntralineDiff($str, $intra_stack) {
$buf = '';
$p = $s = $e = 0; // position, start, end
$highlight = $tag = $ent = false;
$highlight_o = '<span class="bright">';
$highlight_c = '</span>';
$is_html = false;
if ($str instanceof PhutilSafeHTML) {
$is_html = true;
$str = $str->getHTMLContent();
}
$n = strlen($str);
for ($i = 0; $i < $n; $i++) {
if ($p == $e) {
do {
if (empty($intra_stack)) {
$buf .= substr($str, $i);
break 2;
}
$stack = array_shift($intra_stack);
$s = $e;
$e += $stack[1];
} while ($stack[0] == 0);
}
if (!$highlight && !$tag && !$ent && $p == $s) {
$buf .= $highlight_o;
$highlight = true;
}
if ($str[$i] == '<') {
$tag = true;
if ($highlight) {
$buf .= $highlight_c;
}
}
if (!$tag) {
if ($str[$i] == '&') {
$ent = true;
}
if ($ent && $str[$i] == ';') {
$ent = false;
}
if (!$ent) {
$p++;
}
}
$buf .= $str[$i];
if ($tag && $str[$i] == '>') {
$tag = false;
if ($highlight) {
$buf .= $highlight_o;
}
}
if ($highlight && ($p == $e || $i == $n - 1)) {
$buf .= $highlight_c;
$highlight = false;
}
}
if ($is_html) {
return phutil_safe_html($buf);
}
return $buf;
}
private static function collapseIntralineRuns($runs) {
$count = count($runs);
for ($ii = 0; $ii < $count - 1; $ii++) {
if ($runs[$ii][0] == $runs[$ii + 1][0]) {
$runs[$ii + 1][1] += $runs[$ii][1];
unset($runs[$ii]);
}
}
return array_values($runs);
}
public static function generateEditString(array $ov, array $nv, $max = 80) {
return id(new PhutilEditDistanceMatrix())
->setComputeString(true)
->setAlterCost(1 / ($max * 2))
->setReplaceCost(2)
->setMaximumLength($max)
->setSequences($ov, $nv)
+ ->setApplySmoothing(true)
->getEditString();
}
public static function computeIntralineEdits($o, $n) {
if (preg_match('/[\x80-\xFF]/', $o.$n)) {
$ov = phutil_utf8v_combined($o);
$nv = phutil_utf8v_combined($n);
$multibyte = true;
} else {
$ov = str_split($o);
$nv = str_split($n);
$multibyte = false;
}
$result = self::generateEditString($ov, $nv);
- // Smooth the string out, by replacing short runs of similar characters
- // with 'x' operations. This makes the result more readable to humans, since
- // there are fewer choppy runs of short added and removed substrings.
- do {
- $original = $result;
- $result = preg_replace(
- '/([xdi])(s{3})([xdi])/',
- '$1xxx$3',
- $result);
- $result = preg_replace(
- '/([xdi])(s{2})([xdi])/',
- '$1xx$3',
- $result);
- $result = preg_replace(
- '/([xdi])(s{1})([xdi])/',
- '$1x$3',
- $result);
- } while ($result != $original);
-
// Now we have a character-based description of the edit. We need to
// convert into a byte-based description. Walk through the edit string and
// adjust each operation to reflect the number of bytes in the underlying
// character.
$o_pos = 0;
$n_pos = 0;
$result_len = strlen($result);
$o_run = array();
$n_run = array();
$old_char_len = 1;
$new_char_len = 1;
for ($ii = 0; $ii < $result_len; $ii++) {
$c = $result[$ii];
if ($multibyte) {
$old_char_len = strlen($ov[$o_pos]);
$new_char_len = strlen($nv[$n_pos]);
}
switch ($c) {
case 's':
case 'x':
$byte_o = $old_char_len;
$byte_n = $new_char_len;
$o_pos++;
$n_pos++;
break;
case 'i':
$byte_o = 0;
$byte_n = $new_char_len;
$n_pos++;
break;
case 'd':
$byte_o = $old_char_len;
$byte_n = 0;
$o_pos++;
break;
}
if ($byte_o) {
if ($c == 's') {
$o_run[] = array(0, $byte_o);
} else {
$o_run[] = array(1, $byte_o);
}
}
if ($byte_n) {
if ($c == 's') {
$n_run[] = array(0, $byte_n);
} else {
$n_run[] = array(1, $byte_n);
}
}
}
$o_run = self::collapseIntralineRuns($o_run);
$n_run = self::collapseIntralineRuns($n_run);
return array($o_run, $n_run);
}
}
diff --git a/src/difference/__tests__/ArcanistDiffUtilsTestCase.php b/src/difference/__tests__/ArcanistDiffUtilsTestCase.php
index 26fa9e47..cce90c6f 100644
--- a/src/difference/__tests__/ArcanistDiffUtilsTestCase.php
+++ b/src/difference/__tests__/ArcanistDiffUtilsTestCase.php
@@ -1,242 +1,242 @@
<?php
/**
* Test cases for @{class:ArcanistDiffUtils}.
*/
final class ArcanistDiffUtilsTestCase extends PhutilTestCase {
public function testLevenshtein() {
$tests = array(
array(
'a',
'b',
'x',
),
array(
'kalrmr(array($b))',
'array($b)',
'dddddddssssssssds',
),
array(
'array($b)',
'kalrmr(array($b))',
'iiiiiiissssssssis',
),
array(
'zkalrmr(array($b))z',
'xarray($b)x',
'dddddddxsssssssssdx',
),
array(
'xarray($b)x',
'zkalrmr(array($b))z',
'iiiiiiixsssssssssix',
),
array(
'abcdefghi',
'abcdefghi',
'sssssssss',
),
array(
'abcdefghi',
'abcdefghijkl',
'sssssssssiii',
),
array(
'abcdefghijkl',
'abcdefghi',
'sssssssssddd',
),
array(
'xyzabcdefghi',
'abcdefghi',
'dddsssssssss',
),
array(
'abcdefghi',
'xyzabcdefghi',
'iiisssssssss',
),
array(
'abcdefg',
'abxdxfg',
- 'ssxsxss',
+ 'ssxxxss',
),
array(
'private function a($a, $b) {',
'public function and($b, $c) {',
- 'siixsdddxsssssssssssiissxsssxsss',
+ 'siixxdddxsssssssssssiixxxxxxxsss',
),
array(
// This is a test that we correctly detect shared prefixes and suffixes
// and don't trigger "give up, too long" mode if there's a small text
// change in an ocean of similar text.
' if ('.
'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'.
'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'.
'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx) {',
' if('.
'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'.
'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'.
'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx) {',
'ssssssssssds'.
'ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss'.
'ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss'.
'sssssssssssssssssssssssssssssssssssssss',
),
);
foreach ($tests as $test) {
$this->assertEqual(
$test[2],
ArcanistDiffUtils::generateEditString(
str_split($test[0]),
str_split($test[1])),
pht("'%s' vs '%s'", $test[0], $test[1]));
}
$utf8_tests = array(
array(
'GrumpyCat',
"Grumpy\xE2\x98\x83at",
'ssssssxss',
),
);
foreach ($tests as $test) {
$this->assertEqual(
$test[2],
ArcanistDiffUtils::generateEditString(
phutil_utf8v_combined($test[0]),
phutil_utf8v_combined($test[1])),
pht("'%s' vs '%s' (utf8)", $test[0], $test[1]));
}
}
public function testGenerateUTF8IntralineDiff() {
// Both Strings Empty.
$left = '';
$right = '';
$result = array(
array(array(0, 0)),
array(array(0, 0)),
);
$this->assertEqual(
$result,
ArcanistDiffUtils::generateIntralineDiff($left, $right));
// Left String Empty.
$left = '';
$right = "Grumpy\xE2\x98\x83at";
$result = array(
array(array(0, 0)),
array(array(0, 11)),
);
$this->assertEqual(
$result,
ArcanistDiffUtils::generateIntralineDiff($left, $right));
// Right String Empty.
$left = "Grumpy\xE2\x98\x83at";
$right = '';
$result = array(
array(array(0, 11)),
array(array(0, 0)),
);
$this->assertEqual(
$result,
ArcanistDiffUtils::generateIntralineDiff($left, $right));
// Both Strings Same
$left = "Grumpy\xE2\x98\x83at";
$right = "Grumpy\xE2\x98\x83at";
$result = array(
array(array(0, 11)),
array(array(0, 11)),
);
$this->assertEqual(
$result,
ArcanistDiffUtils::generateIntralineDiff($left, $right));
// Both Strings are different.
$left = "Grumpy\xE2\x98\x83at";
$right = 'Smiling Dog';
$result = array(
array(array(1, 11)),
array(array(1, 11)),
);
$this->assertEqual(
$result,
ArcanistDiffUtils::generateIntralineDiff($left, $right));
// String with one difference in the middle.
$left = 'GrumpyCat';
$right = "Grumpy\xE2\x98\x83at";
$result = array(
array(array(0, 6), array(1, 1), array(0, 2)),
array(array(0, 6), array(1, 3), array(0, 2)),
);
$this->assertEqual(
$result,
ArcanistDiffUtils::generateIntralineDiff($left, $right));
// Differences in middle, not connected to each other.
$left = 'GrumpyCat';
$right = "Grumpy\xE2\x98\x83a\xE2\x98\x83t";
$result = array(
array(array(0, 6), array(1, 2), array(0, 1)),
array(array(0, 6), array(1, 7), array(0, 1)),
);
$this->assertEqual(
$result,
ArcanistDiffUtils::generateIntralineDiff($left, $right));
// String with difference at the beginning.
$left = "GrumpyC\xE2\x98\x83t";
$right = "DrumpyC\xE2\x98\x83t";
$result = array(
array(array(1, 1), array(0, 10)),
array(array(1, 1), array(0, 10)),
);
$this->assertEqual(
$result,
ArcanistDiffUtils::generateIntralineDiff($left, $right));
// String with difference at the end.
$left = "GrumpyC\xE2\x98\x83t";
$right = "GrumpyC\xE2\x98\x83P";
$result = array(
array(array(0, 10), array(1, 1)),
array(array(0, 10), array(1, 1)),
);
$this->assertEqual(
$result,
ArcanistDiffUtils::generateIntralineDiff($left, $right));
// String with differences at the beginning and end.
$left = "GrumpyC\xE2\x98\x83t";
$right = "DrumpyC\xE2\x98\x83P";
$result = array(
array(array(1, 1), array(0, 9), array(1, 1)),
array(array(1, 1), array(0, 9), array(1, 1)),
);
$this->assertEqual(
$result,
ArcanistDiffUtils::generateIntralineDiff($left, $right));
// This is a unicode combining character, "COMBINING DOUBLE TILDE".
$cc = "\xCD\xA0";
$left = 'Senor';
$right = "Sen{$cc}or";
$result = array(
array(array(0, 2), array(1, 1), array(0, 2)),
array(array(0, 2), array(1, 3), array(0, 2)),
);
$this->assertEqual(
$result,
ArcanistDiffUtils::generateIntralineDiff($left, $right));
}
}

File Metadata

Mime Type
text/x-diff
Expires
Sun, Jan 19, 17:25 (1 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1126939
Default Alt Text
(13 KB)

Event Timeline