diff --git a/src/applications/differential/storage/diff/DifferentialDiff.php b/src/applications/differential/storage/diff/DifferentialDiff.php index 9ae0fa4a9b..1e245b4bfb 100644 --- a/src/applications/differential/storage/diff/DifferentialDiff.php +++ b/src/applications/differential/storage/diff/DifferentialDiff.php @@ -1,213 +1,279 @@ changesets === null) { $this->changesets = array(); } $this->unsavedChangesets[] = $changeset; $this->changesets[] = $changeset; return $this; } public function attachChangesets(array $changesets) { assert_instances_of($changesets, 'DifferentialChangeset'); $this->changesets = $changesets; return $this; } public function getChangesets() { if ($this->changesets === null) { throw new Exception("Must load and attach changesets first!"); } return $this->changesets; } public function loadChangesets() { if (!$this->getID()) { return array(); } return id(new DifferentialChangeset())->loadAllWhere( 'diffID = %d', $this->getID()); } public function loadArcanistProject() { if (!$this->getArcanistProjectPHID()) { return null; } return id(new PhabricatorRepositoryArcanistProject())->loadOneWhere( 'phid = %s', $this->getArcanistProjectPHID()); } public function save() { // TODO: sort out transactions // $this->openTransaction(); $ret = parent::save(); foreach ($this->unsavedChangesets as $changeset) { $changeset->setDiffID($this->getID()); $changeset->save(); } // $this->saveTransaction(); return $ret; } public function delete() { // $this->openTransaction(); foreach ($this->loadChangesets() as $changeset) { $changeset->delete(); } $ret = parent::delete(); // $this->saveTransaction(); return $ret; } public static function newFromRawChanges(array $changes) { assert_instances_of($changes, 'ArcanistDiffChange'); $diff = new DifferentialDiff(); $lines = 0; foreach ($changes as $change) { $changeset = new DifferentialChangeset(); $add_lines = 0; $del_lines = 0; $hunks = $change->getHunks(); if ($hunks) { foreach ($hunks as $hunk) { $dhunk = new DifferentialHunk(); $dhunk->setOldOffset($hunk->getOldOffset()); $dhunk->setOldLen($hunk->getOldLength()); $dhunk->setNewOffset($hunk->getNewOffset()); $dhunk->setNewLen($hunk->getNewLength()); $dhunk->setChanges($hunk->getCorpus()); $changeset->addUnsavedHunk($dhunk); $add_lines += $hunk->getAddLines(); $del_lines += $hunk->getDelLines(); $lines += $add_lines + $del_lines; } } else { // This happens when you add empty files. $changeset->attachHunks(array()); } $changeset->setOldFile($change->getOldPath()); $changeset->setFilename($change->getCurrentPath()); $changeset->setChangeType($change->getType()); $changeset->setFileType($change->getFileType()); $changeset->setMetadata($change->getAllMetadata()); $changeset->setOldProperties($change->getOldProperties()); $changeset->setNewProperties($change->getNewProperties()); $changeset->setAwayPaths($change->getAwayPaths()); $changeset->setAddLines($add_lines); $changeset->setDelLines($del_lines); $diff->addUnsavedChangeset($changeset); } $diff->setLineCount($lines); + $diff->detectCopiedCode(); + return $diff; } + private function detectCopiedCode($min_width = 40, $min_lines = 3) { + $map = array(); + $files = array(); + foreach ($this->changesets as $changeset) { + $file = $changeset->getFilename(); + foreach ($changeset->getHunks() as $hunk) { + $line = $hunk->getOldOffset(); + foreach (explode("\n", $hunk->makeOldFile()) as $code) { + $files[$file][$line] = $code; + if (strlen($code) >= $min_width) { + $map[$code][] = array($file, $line); + } + $line++; + } + } + } + + foreach ($this->changesets as $changeset) { + $copies = array(); + foreach ($changeset->getHunks() as $hunk) { + $added = $hunk->getAddedLines(); + for (reset($added); list($line, $code) = each($added); next($added)) { + if (isset($map[$code])) { // We found a long matching line. + $lengths = array(); + $max_offsets = array(); + foreach ($map[$code] as $val) { // Explore all candidates. + list($file, $orig_line) = $val; + $lengths["$orig_line:$file"] = 1; + // Search also backwards for short lines. + foreach (array(-1, 1) as $direction) { + $offset = $direction; + $orig_code = idx($files[$file], $orig_line + $offset); + while (!isset($copies[$line + $offset]) && + isset($added[$line + $offset]) && + $orig_code === $added[$line + $offset]) { + $lengths["$orig_line:$file"]++; + $offset += $direction; + } + } + // ($offset - 1) contains number of forward matching lines. + $max_offsets["$orig_line:$file"] = $offset - 1; + } + $length = max($lengths); // Choose longest candidate. + $val = array_search($length, $lengths); + $offset = $max_offsets[$val]; + list($orig_line, $file) = explode(':', $val, 2); + $save_file = ($file == $changeset->getFilename() ? '' : $file); + for ($i = $length; $i--; ) { + $copies[$line + $offset - $i] = ($length < $min_lines + ? array() // Ignore short blocks. + : array($save_file, $orig_line + $offset - $i)); + } + for ($i = 0; $i < $offset; $i++) { + next($added); + } + } + } + } + $metadata = $changeset->getMetadata(); + $metadata['copy:lines'] = array_filter($copies); + $changeset->setMetadata($metadata); + } + } + public function getDiffDict() { $dict = array( 'id' => $this->getID(), 'parent' => $this->getParentRevisionID(), 'revisionID' => $this->getRevisionID(), 'sourceControlBaseRevision' => $this->getSourceControlBaseRevision(), 'sourceControlPath' => $this->getSourceControlPath(), 'sourceControlSystem' => $this->getSourceControlSystem(), 'branch' => $this->getBranch(), 'unitStatus' => $this->getUnitStatus(), 'lintStatus' => $this->getLintStatus(), 'changes' => array(), 'properties' => array(), ); foreach ($this->getChangesets() as $changeset) { $hunks = array(); foreach ($changeset->getHunks() as $hunk) { $hunks[] = array( 'oldOffset' => $hunk->getOldOffset(), 'newOffset' => $hunk->getNewOffset(), 'oldLength' => $hunk->getOldLen(), 'newLength' => $hunk->getNewLen(), 'addLines' => null, 'delLines' => null, 'isMissingOldNewline' => null, 'isMissingNewNewline' => null, 'corpus' => $hunk->getChanges(), ); } $change = array( 'metadata' => $changeset->getMetadata(), 'oldPath' => $changeset->getOldFile(), 'currentPath' => $changeset->getFilename(), 'awayPaths' => $changeset->getAwayPaths(), 'oldProperties' => $changeset->getOldProperties(), 'newProperties' => $changeset->getNewProperties(), 'type' => $changeset->getChangeType(), 'fileType' => $changeset->getFileType(), 'commitHash' => null, 'addLines' => $changeset->getAddLines(), 'delLines' => $changeset->getDelLines(), 'hunks' => $hunks, ); $dict['changes'][] = $change; } $properties = id(new DifferentialDiffProperty())->loadAllWhere( 'diffID = %d', $this->getID()); foreach ($properties as $property) { $dict['properties'][$property->getName()] = $property->getData(); } return $dict; } } diff --git a/src/applications/differential/storage/hunk/DifferentialHunk.php b/src/applications/differential/storage/hunk/DifferentialHunk.php index 629c2e4f79..b53a6955f2 100644 --- a/src/applications/differential/storage/hunk/DifferentialHunk.php +++ b/src/applications/differential/storage/hunk/DifferentialHunk.php @@ -1,88 +1,105 @@ newOffset; + foreach (explode("\n", $this->changes) as $diff_line) { + if ($diff_line == '' || $diff_line[0] == '\\') { + continue; + } + if ($diff_line[0] == '+') { + $lines[$n] = substr($diff_line, 1); + } + if ($diff_line[0] != '-') { + $n++; + } + } + return $lines; + } + public function makeNewFile() { return $this->makeContent($exclude = '-'); } public function makeOldFile() { return $this->makeContent($exclude = '+'); } public function makeChanges() { return $this->makeContent($exclude = ' '); } final private function makeContent($exclude) { $results = array(); $lines = explode("\n", $this->changes); // NOTE: To determine whether the recomposed file should have a trailing // newline, we look for a "\ No newline at end of file" line which appears // after a line which we don't exclude. For example, if we're constructing // the "new" side of a diff (excluding "-"), we want to ignore this one: // // - x // \ No newline at end of file // + x // // ...since it's talking about the "old" side of the diff, but interpret // this as meaning we should omit the newline: // // - x // + x // \ No newline at end of file $use_next_newline = false; $has_newline = true; foreach ($lines as $line) { if (isset($line[0])) { if ($line[0] == $exclude) { $use_next_newline = false; continue; } if ($line[0] == '\\') { if ($use_next_newline) { $has_newline = false; } continue; } } $use_next_newline = true; $results[] = substr($line, 1); } $possible_newline = ''; if ($has_newline) { $possible_newline = "\n"; } return implode("\n", $results).$possible_newline; } }