diff --git a/src/applications/repository/daemon/commitdiscovery/git/PhabricatorRepositoryGitCommitDiscoveryDaemon.php b/src/applications/repository/daemon/commitdiscovery/git/PhabricatorRepositoryGitCommitDiscoveryDaemon.php index 9ad82fdb28..cfb9685dd8 100644 --- a/src/applications/repository/daemon/commitdiscovery/git/PhabricatorRepositoryGitCommitDiscoveryDaemon.php +++ b/src/applications/repository/daemon/commitdiscovery/git/PhabricatorRepositoryGitCommitDiscoveryDaemon.php @@ -1,110 +1,105 @@ getRepository(); $vcs = $repository->getVersionControlSystem(); if ($vcs != PhabricatorRepositoryType::REPOSITORY_TYPE_GIT) { throw new Exception("Repository is not a git repository."); } $repository_phid = $repository->getPHID(); - $repo_base = $repository->getDetail('local-path'); - list($stdout) = execx( - '(cd %s && git branch -r --verbose --no-abbrev)', - $repo_base); + list($stdout) = $repository->execxLocalCommand( + 'branch -r --verbose --no-abbrev'); $branches = DiffusionGitBranchQuery::parseGitRemoteBranchOutput($stdout); $got_something = false; foreach ($branches as $name => $commit) { if ($this->isKnownCommit($commit)) { continue; } else { $this->discoverCommit($commit); $got_something = true; } } return $got_something; } private function discoverCommit($commit) { $discover = array(); $insert = array(); $repository = $this->getRepository(); - $repo_base = $repository->getDetail('local-path'); $discover[] = $commit; $insert[] = $commit; $seen_parent = array(); while (true) { $target = array_pop($discover); - list($parents) = execx( - '(cd %s && git log -n1 --pretty="%%P" %s)', - $repo_base, + list($parents) = $repository->execxLocalCommand( + 'log -n1 --pretty="%%P" %s', $target); $parents = array_filter(explode(' ', trim($parents))); foreach ($parents as $parent) { if (isset($seen_parent[$parent])) { // We end up in a loop here somehow when we parse Arcanist if we // don't do this. TODO: Figure out why and draw a pretty diagram // since it's not evident how parsing a DAG with this causes the // loop to stop terminating. continue; } $seen_parent[$parent] = true; if (!$this->isKnownCommit($parent)) { $discover[] = $parent; $insert[] = $parent; } } if (empty($discover)) { break; } $this->stillWorking(); } while (true) { $target = array_pop($insert); - list($epoch) = execx( - '(cd %s && git log -n1 --pretty="%%at" %s)', - $repo_base, + list($epoch) = $repository->execxLocalCommand( + 'log -n1 --pretty="%%at" %s', $target); $epoch = trim($epoch); $this->recordCommit($target, $epoch); if (empty($insert)) { break; } } } } diff --git a/src/applications/repository/daemon/commitdiscovery/git/__init__.php b/src/applications/repository/daemon/commitdiscovery/git/__init__.php index 8fc9566ada..685253a440 100644 --- a/src/applications/repository/daemon/commitdiscovery/git/__init__.php +++ b/src/applications/repository/daemon/commitdiscovery/git/__init__.php @@ -1,16 +1,14 @@ getRepository(); $vcs = $repository->getVersionControlSystem(); if ($vcs != PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL) { throw new Exception("Repository is not a Mercurial repository."); } $repository_phid = $repository->getPHID(); - $repo_base = $repository->getDetail('local-path'); list($stdout) = $repository->execxLocalCommand('branches'); $branches = ArcanistMercurialParser::parseMercurialBranches($stdout); $got_something = false; foreach ($branches as $name => $branch) { $commit = $branch['rev']; $commit = $this->getFullHash($commit); if ($this->isKnownCommit($commit)) { continue; } else { $this->discoverCommit($commit); $got_something = true; } } return $got_something; } private function getFullHash($commit) { // NOTE: Mercurial shortens hashes to 12 characters by default. This // implies collisions with as few as a few million commits. The // documentation sensibly advises "Do not use short-form IDs for // long-lived representations". It then continues "You can use the // --debug option to display the full changeset ID". What?! Yes, this // is in fact the only way to turn on full hashes, and the hg source // code is littered with "hexfn = ui.debugflag and hex or short" and // similar. There is no more-selective flag or config option. // // Unfortunately, "hg --debug" turns on tons of other extra output, // including full commit messages in "hg log" and "hg parents" (which // ignore --style); this renders them unparseable. So we have to use // "hg id" to convert short hashes into full hashes. See: // // // // Of course, this means that if there are collisions we will break here // (the short commit identifier won't be unambiguous) but maybe Mercurial // will have a --full-hashes flag or something by then and we can fix it // properly. Until we run into that, this allows us to store data in the // right format so when we eventually encounter this we won't have to // reparse every Mercurial repository. $repository = $this->getRepository(); list($stdout) = $repository->execxLocalCommand( 'id --debug -i --rev %s', $commit); return trim($stdout); } private function discoverCommit($commit) { $discover = array(); $insert = array(); $repository = $this->getRepository(); $discover[] = $commit; $insert[] = $commit; $seen_parent = array(); // For all the new commits at the branch heads, walk backward until we find // only commits we've aleady seen. while (true) { $target = array_pop($discover); list($stdout) = $repository->execxLocalCommand( 'parents --style default --rev %s', $target); $parents = ArcanistMercurialParser::parseMercurialLog($stdout); if ($parents) { foreach ($parents as $parent) { $parent_commit = $parent['rev']; $parent_commit = $this->getFullHash($parent_commit); if (isset($seen_parent[$parent_commit])) { continue; } $seen_parent[$parent_commit] = true; if (!$this->isKnownCommit($parent_commit)) { $discover[] = $parent_commit; $insert[] = $parent_commit; } } } if (empty($discover)) { break; } $this->stillWorking(); } while (true) { $target = array_pop($insert); list($stdout) = $repository->execxLocalCommand( 'log --rev %s --template %s', $target, '{date|rfc822date}'); $epoch = strtotime($stdout); $this->recordCommit($target, $epoch); if (empty($insert)) { break; } } } } diff --git a/src/applications/repository/daemon/commitdiscovery/svn/PhabricatorRepositorySvnCommitDiscoveryDaemon.php b/src/applications/repository/daemon/commitdiscovery/svn/PhabricatorRepositorySvnCommitDiscoveryDaemon.php index fc68dad836..4020c887b5 100644 --- a/src/applications/repository/daemon/commitdiscovery/svn/PhabricatorRepositorySvnCommitDiscoveryDaemon.php +++ b/src/applications/repository/daemon/commitdiscovery/svn/PhabricatorRepositorySvnCommitDiscoveryDaemon.php @@ -1,123 +1,123 @@ getRepository(); $vcs = $repository->getVersionControlSystem(); if ($vcs != PhabricatorRepositoryType::REPOSITORY_TYPE_SVN) { throw new Exception("Repository is not a svn repository."); } $uri = $this->getBaseSVNLogURI(); list($xml) = $repository->execxRemoteCommand( - ' log --xml --quiet --limit 1 %s@HEAD', - $uri); + 'log --xml --quiet --limit 1 %s@HEAD', + $uri); $results = $this->parseSVNLogXML($xml); $commit = key($results); $epoch = reset($results); if ($this->isKnownCommit($commit)) { return false; } $this->discoverCommit($commit, $epoch); return true; } private function discoverCommit($commit, $epoch) { $uri = $this->getBaseSVNLogURI(); $repository = $this->getRepository(); $discover = array( $commit => $epoch, ); $upper_bound = $commit; $limit = 1; while ($upper_bound > 1 && !$this->isKnownCommit($upper_bound)) { // Find all the unknown commits on this path. Note that we permit // importing an SVN subdirectory rather than the entire repository, so // commits may be nonsequential. list($err, $xml, $stderr) = $repository->execRemoteCommand( ' log --xml --quiet --limit %d %s@%d', $limit, $uri, $upper_bound - 1); if ($err) { if (preg_match('/(path|File) not found/', $stderr)) { // We've gone all the way back through history and this path was not // affected by earlier commits. break; } else { throw new Exception("svn log error #{$err}: {$stderr}"); } } $discover += $this->parseSVNLogXML($xml); $upper_bound = min(array_keys($discover)); // Discover 2, 4, 8, ... 256 logs at a time. This allows us to initially // import large repositories fairly quickly, while pulling only as much // data as we need in the common case (when we've already imported the // repository and are just grabbing one commit at a time). $limit = min($limit * 2, 256); } // NOTE: We do writes only after discovering all the commits so that we're // never left in a state where we've missed commits -- if the discovery // script terminates it can always resume and restore the import to a good // state. This is also why we sort the discovered commits so we can do // writes forward from the smallest one. ksort($discover); foreach ($discover as $commit => $epoch) { $this->recordCommit($commit, $epoch); } } private function parseSVNLogXML($xml) { $xml = phutil_utf8ize($xml); $result = array(); $log = new SimpleXMLElement($xml); foreach ($log->logentry as $entry) { $commit = (int)$entry['revision']; $epoch = (int)strtotime((string)$entry->date[0]); $result[$commit] = $epoch; } return $result; } private function getBaseSVNLogURI() { $repository = $this->getRepository(); $uri = $repository->getDetail('remote-uri'); $subpath = $repository->getDetail('svn-subpath'); return $uri.$subpath; } } diff --git a/src/applications/repository/daemon/gitfetch/PhabricatorRepositoryGitFetchDaemon.php b/src/applications/repository/daemon/gitfetch/PhabricatorRepositoryGitFetchDaemon.php index aab67da028..7b67006c4e 100644 --- a/src/applications/repository/daemon/gitfetch/PhabricatorRepositoryGitFetchDaemon.php +++ b/src/applications/repository/daemon/gitfetch/PhabricatorRepositoryGitFetchDaemon.php @@ -1,34 +1,44 @@ execxRemoteCommand( + 'clone %s %s', + $repository->getRemoteURI(), + rtrim($local_path, '/')); } - protected function executeUpdate($remote_uri, $local_path) { - execx('(cd %s && git fetch --all)', $local_path); + protected function executeUpdate( + PhabricatorRepository $repository, + $local_path) { + + $repository->execxLocalCommand( + 'fetch --all'); } } diff --git a/src/applications/repository/daemon/gitfetch/__init__.php b/src/applications/repository/daemon/gitfetch/__init__.php index 61b3f4bad3..d59679cb9a 100644 --- a/src/applications/repository/daemon/gitfetch/__init__.php +++ b/src/applications/repository/daemon/gitfetch/__init__.php @@ -1,15 +1,13 @@ execxRemoteCommand( + 'clone %s %s', + $repository->getRemoteURI(), + rtrim($local_path, '/')); } - protected function executeUpdate($remote_uri, $local_path) { - execx('(cd %s && hg pull -u)', $local_path); + protected function executeUpdate( + PhabricatorRepository $repository, + $local_path) { + $repository->execxLocalCommand( + 'pull -u'); } } diff --git a/src/applications/repository/daemon/mercurialpull/__init__.php b/src/applications/repository/daemon/mercurialpull/__init__.php index c2ca3a6c33..e2b6f2382d 100644 --- a/src/applications/repository/daemon/mercurialpull/__init__.php +++ b/src/applications/repository/daemon/mercurialpull/__init__.php @@ -1,15 +1,13 @@ loadRepository(); $expected_type = $this->getSupportedRepositoryType(); $repo_type = $repository->getVersionControlSystem(); if ($repo_type != $expected_type) { $repo_type_name = PhabricatorRepositoryType::getNameForRepositoryType( $repo_type); $expected_type_name = PhabricatorRepositoryType::getNameForRepositoryType( $expected_type); $repo_name = $repository->getName().' ('.$repository->getCallsign().')'; throw new Exception( "This daemon pulls '{$expected_type_name}' repositories, but the ". "repository '{$repo_name}' is a '{$repo_type_name}' repository."); } $tracked = $repository->isTracked(); if (!$tracked) { throw new Exception("Tracking is not enabled for this repository."); } $local_path = $repository->getDetail('local-path'); - $remote_uri = $repository->getDetail('remote-uri'); if (!$local_path) { throw new Exception("No local path is available for this repository."); } while (true) { if (!Filesystem::pathExists($local_path)) { - if (!$remote_uri) { - throw new Exception("No remote URI is available."); - } execx('mkdir -p %s', dirname($local_path)); - $this->executeCreate($remote_uri, $local_path); + $this->executeCreate($repository, $local_path); } else { - $this->executeUpdate($remote_uri, $local_path); + $this->executeUpdate($repository, $local_path); } $this->sleep($repository->getDetail('pull-frequency', 15)); } } } diff --git a/src/applications/repository/worker/base/PhabricatorRepositoryCommitParserWorker.php b/src/applications/repository/worker/base/PhabricatorRepositoryCommitParserWorker.php index 938c9eb199..8641cd8c7d 100644 --- a/src/applications/repository/worker/base/PhabricatorRepositoryCommitParserWorker.php +++ b/src/applications/repository/worker/base/PhabricatorRepositoryCommitParserWorker.php @@ -1,110 +1,94 @@ getTaskData(), 'commitID'); if (!$commit_id) { return; } $commit = id(new PhabricatorRepositoryCommit())->load($commit_id); if (!$commit) { // TODO: Communicate permanent failure? return; } $this->commit = $commit; $repository = id(new PhabricatorRepository())->load( $commit->getRepositoryID()); if (!$repository) { return; } $this->repository = $repository; return $this->parseCommit($repository, $commit); } final protected function shouldQueueFollowupTasks() { return !idx($this->getTaskData(), 'only'); } abstract protected function parseCommit( PhabricatorRepository $repository, PhabricatorRepositoryCommit $commit); /** * This method is kind of awkward here but both the SVN message and * change parsers use it. */ protected function getSVNLogXMLObject($uri, $revision, $verbose = false) { if ($verbose) { $verbose = '--verbose'; } - try { - list($xml) = $this->repository->execxRemoteCommand( - "log --xml {$verbose} --limit 1 %s@%d", - $uri, - $revision); - } catch (CommandException $ex) { - // HTTPS is generally faster and more reliable than svn+ssh, but some - // commit messages with non-UTF8 text can't be retrieved over HTTPS, see - // Facebook rE197184 for one example. Make an attempt to fall back to - // svn+ssh if we've failed outright to retrieve the message. - $fallback_uri = new PhutilURI($uri); - if ($fallback_uri->getProtocol() != 'https') { - throw $ex; - } - $fallback_uri->setProtocol('svn+ssh'); - list($xml) = execx( - "svn log --xml {$verbose} --limit 1 --non-interactive %s@%d", - $fallback_uri, - $revision); - } + list($xml) = $this->repository->execxRemoteCommand( + "log --xml {$verbose} --limit 1 %s@%d", + $uri, + $revision); // Subversion may send us back commit messages which won't parse because // they have non UTF-8 garbage in them. Slam them into valid UTF-8. $xml = phutil_utf8ize($xml); return new SimpleXMLElement($xml); } protected function isBadCommit($full_commit_name) { $repository = new PhabricatorRepository(); $bad_commit = queryfx_one( $repository->establishConnection('w'), 'SELECT * FROM %T WHERE fullCommitName = %s', PhabricatorRepository::TABLE_BADCOMMIT, $full_commit_name); return (bool)$bad_commit; } } diff --git a/src/applications/repository/worker/base/__init__.php b/src/applications/repository/worker/base/__init__.php index d78aaa2839..564fece8ea 100644 --- a/src/applications/repository/worker/base/__init__.php +++ b/src/applications/repository/worker/base/__init__.php @@ -1,19 +1,17 @@ getCallsign().$commit->getCommitIdentifier(); echo "Parsing {$full_name}...\n"; if ($this->isBadCommit($full_name)) { echo "This commit is marked bad!\n"; return; } - $local_path = $repository->getDetail('local-path'); - - list($raw) = execx( - '(cd %s && git log -n1 -M -C -B --find-copies-harder --raw -t '. - '--abbrev=40 --pretty=format: %s)', - $local_path, + // NOTE: "--pretty=format: " is to disable log output, we only want the + // part we get from "--raw". + list($raw) = $repository->execLocalCommand( + 'log -n1 -M -C -B --find-copies-harder --raw -t '. + '--abbrev=40 --pretty=format: %s', $commit->getCommitIdentifier()); $changes = array(); $move_away = array(); $copy_away = array(); $lines = explode("\n", $raw); foreach ($lines as $line) { if (!strlen(trim($line))) { continue; } list($old_mode, $new_mode, $old_hash, $new_hash, $more_stuff) = preg_split('/ +/', $line); // We may only have two pieces here. list($action, $src_path, $dst_path) = array_merge( explode("\t", $more_stuff), array(null)); // Normalize the paths for consistency with the SVN workflow. $src_path = '/'.$src_path; if ($dst_path) { $dst_path = '/'.$dst_path; } $old_mode = intval($old_mode, 8); $new_mode = intval($new_mode, 8); $file_type = DifferentialChangeType::FILE_NORMAL; if ($new_mode & 040000) { $file_type = DifferentialChangeType::FILE_DIRECTORY; } else if ($new_mode & 0120000) { $file_type = DifferentialChangeType::FILE_SYMLINK; } // TODO: We can detect binary changes as git does, through a combination // of running 'git check-attr' for stuff like 'binary', 'merge' or 'diff', // and by falling back to inspecting the first 8,000 characters of the // buffer for null bytes (this is seriously git's algorithm, see // buffer_is_binary() in xdiff-interface.c). $change_type = null; $change_path = $src_path; $change_target = null; $is_direct = true; switch ($action[0]) { case 'A': $change_type = DifferentialChangeType::TYPE_ADD; break; case 'D': $change_type = DifferentialChangeType::TYPE_DELETE; break; case 'C': $change_type = DifferentialChangeType::TYPE_COPY_HERE; $change_path = $dst_path; $change_target = $src_path; $copy_away[$change_target][] = $change_path; break; case 'R': $change_type = DifferentialChangeType::TYPE_MOVE_HERE; $change_path = $dst_path; $change_target = $src_path; $move_away[$change_target][] = $change_path; break; case 'T': // Type of the file changed, fall through and treat it as a // modification. Not 100% sure this is the right thing to do but it // seems reasonable. case 'M': if ($file_type == DifferentialChangeType::FILE_DIRECTORY) { $change_type = DifferentialChangeType::TYPE_CHILD; $is_direct = false; } else { $change_type = DifferentialChangeType::TYPE_CHANGE; } break; // NOTE: "U" (unmerged) and "X" (unknown) statuses are also possible // in theory but shouldn't appear here. default: throw new Exception("Failed to parse line '{$line}'."); } $changes[$change_path] = array( 'repositoryID' => $repository->getID(), 'commitID' => $commit->getID(), 'path' => $change_path, 'changeType' => $change_type, 'fileType' => $file_type, 'isDirect' => $is_direct, 'commitSequence' => $commit->getEpoch(), 'targetPath' => $change_target, 'targetCommitID' => $change_target ? $commit->getID() : null, ); } // Add a change to '/' since git doesn't mention it. $changes['/'] = array( 'repositoryID' => $repository->getID(), 'commitID' => $commit->getID(), 'path' => '/', 'changeType' => DifferentialChangeType::TYPE_CHILD, 'fileType' => DifferentialChangeType::FILE_DIRECTORY, 'isDirect' => false, 'commitSequence' => $commit->getEpoch(), 'targetPath' => null, 'targetCommitID' => null, ); foreach ($copy_away as $change_path => $destinations) { if (isset($move_away[$change_path])) { $change_type = DifferentialChangeType::TYPE_MULTICOPY; $is_direct = true; unset($move_away[$change_path]); } else { $change_type = DifferentialChangeType::TYPE_COPY_AWAY; $is_direct = false; } $reference = $changes[reset($destinations)]; $changes[$change_path] = array( 'repositoryID' => $repository->getID(), 'commitID' => $commit->getID(), 'path' => $change_path, 'changeType' => $change_type, 'fileType' => $reference['fileType'], 'isDirect' => $is_direct, 'commitSequence' => $commit->getEpoch(), 'targetPath' => null, 'targetCommitID' => null, ); } foreach ($move_away as $change_path => $destinations) { $reference = $changes[reset($destinations)]; $changes[$change_path] = array( 'repositoryID' => $repository->getID(), 'commitID' => $commit->getID(), 'path' => $change_path, 'changeType' => DifferentialChangeType::TYPE_MOVE_AWAY, 'fileType' => $reference['fileType'], 'isDirect' => true, 'commitSequence' => $commit->getEpoch(), 'targetPath' => null, 'targetCommitID' => null, ); } $paths = array(); foreach ($changes as $change) { $paths[$change['path']] = true; if ($change['targetPath']) { $paths[$change['targetPath']] = true; } } $path_map = $this->lookupOrCreatePaths(array_keys($paths)); foreach ($changes as $key => $change) { $changes[$key]['pathID'] = $path_map[$change['path']]; if ($change['targetPath']) { $changes[$key]['targetPathID'] = $path_map[$change['targetPath']]; } else { $changes[$key]['targetPathID'] = null; } } $conn_w = $repository->establishConnection('w'); $changes_sql = array(); foreach ($changes as $change) { $values = array( (int)$change['repositoryID'], (int)$change['pathID'], (int)$change['commitID'], $change['targetPathID'] ? (int)$change['targetPathID'] : 'null', $change['targetCommitID'] ? (int)$change['targetCommitID'] : 'null', (int)$change['changeType'], (int)$change['fileType'], (int)$change['isDirect'], (int)$change['commitSequence'], ); $changes_sql[] = '('.implode(', ', $values).')'; } queryfx( $conn_w, 'DELETE FROM %T WHERE commitID = %d', PhabricatorRepository::TABLE_PATHCHANGE, $commit->getID()); foreach (array_chunk($changes_sql, 256) as $sql_chunk) { queryfx( $conn_w, 'INSERT INTO %T (repositoryID, pathID, commitID, targetPathID, targetCommitID, changeType, fileType, isDirect, commitSequence) VALUES %Q', PhabricatorRepository::TABLE_PATHCHANGE, implode(', ', $sql_chunk)); } $this->finishParse(); } } diff --git a/src/applications/repository/worker/commitchangeparser/git/__init__.php b/src/applications/repository/worker/commitchangeparser/git/__init__.php index 6bc6d8d55e..012d74a45c 100644 --- a/src/applications/repository/worker/commitchangeparser/git/__init__.php +++ b/src/applications/repository/worker/commitchangeparser/git/__init__.php @@ -1,17 +1,15 @@