Page Menu
Home
Phorge
Search
Configure Global Search
Log In
Files
F2896597
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Award Token
Flag For Later
Advanced/Developer...
View Handle
View Hovercard
Size
19 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/src/applications/diffusion/conduit/DiffusionLastModifiedQueryConduitAPIMethod.php b/src/applications/diffusion/conduit/DiffusionLastModifiedQueryConduitAPIMethod.php
index 30b436cfff..1135420a6e 100644
--- a/src/applications/diffusion/conduit/DiffusionLastModifiedQueryConduitAPIMethod.php
+++ b/src/applications/diffusion/conduit/DiffusionLastModifiedQueryConduitAPIMethod.php
@@ -1,156 +1,168 @@
<?php
final class DiffusionLastModifiedQueryConduitAPIMethod
extends DiffusionQueryConduitAPIMethod {
public function getAPIMethodName() {
return 'diffusion.lastmodifiedquery';
}
public function getMethodDescription() {
return pht('Get the commits at which paths were last modified.');
}
protected function defineReturnType() {
return 'map<string, string>';
}
protected function defineCustomParamTypes() {
return array(
'paths' => 'required map<string, string>',
);
}
protected function getGitResult(ConduitAPIRequest $request) {
$drequest = $this->getDiffusionRequest();
$repository = $drequest->getRepository();
$paths = $request->getValue('paths');
$results = $this->loadCommitsFromCache($paths);
foreach ($paths as $path => $commit) {
if (array_key_exists($path, $results)) {
continue;
}
list($hash) = $repository->execxLocalCommand(
'log -n1 --format=%%H %s -- %s',
$commit,
$path);
$results[$path] = trim($hash);
}
return $results;
}
protected function getSVNResult(ConduitAPIRequest $request) {
$drequest = $this->getDiffusionRequest();
$repository = $drequest->getRepository();
$results = array();
foreach ($request->getValue('paths') as $path => $commit) {
$history_result = DiffusionQuery::callConduitWithDiffusionRequest(
$request->getUser(),
$drequest,
'diffusion.historyquery',
array(
'commit' => $commit,
'path' => $path,
'limit' => 1,
'offset' => 0,
'needDirectChanges' => true,
'needChildChanges' => true,
));
$history_array = DiffusionPathChange::newFromConduit(
$history_result['pathChanges']);
if ($history_array) {
$results[$path] = head($history_array)
->getCommit()
->getCommitIdentifier();
}
}
return $results;
}
protected function getMercurialResult(ConduitAPIRequest $request) {
$drequest = $this->getDiffusionRequest();
$repository = $drequest->getRepository();
$paths = $request->getValue('paths');
$results = $this->loadCommitsFromCache($paths);
foreach ($paths as $path => $commit) {
if (array_key_exists($path, $results)) {
continue;
}
list($hash) = $repository->execxLocalCommand(
'log --template %s --limit 1 --removed --rev %s -- %s',
'{node}',
hgsprintf('reverse(ancestors(%s))', $commit),
nonempty(ltrim($path, '/'), '.'));
$results[$path] = trim($hash);
}
return $results;
}
private function loadCommitsFromCache(array $map) {
$drequest = $this->getDiffusionRequest();
$repository = $drequest->getRepository();
$path_map = id(new DiffusionPathIDQuery(array_keys($map)))
->loadPathIDs();
$commit_query = id(new DiffusionCommitQuery())
->setViewer($drequest->getUser())
->withRepository($repository)
->withIdentifiers(array_values($map));
$commit_query->execute();
$commit_map = $commit_query->getIdentifierMap();
$commit_map = mpull($commit_map, 'getID');
$graph_cache = new PhabricatorRepositoryGraphCache();
$results = array();
+
+ // Spend no more than this many total seconds trying to satisfy queries
+ // via the graph cache.
+ $remaining_time = 10.0;
foreach ($map as $path => $commit) {
$path_id = idx($path_map, $path);
if (!$path_id) {
continue;
}
$commit_id = idx($commit_map, $commit);
if (!$commit_id) {
continue;
}
+ $t_start = microtime(true);
$cache_result = $graph_cache->loadLastModifiedCommitID(
$commit_id,
- $path_id);
+ $path_id,
+ $remaining_time);
+ $t_end = microtime(true);
if ($cache_result !== false) {
$results[$path] = $cache_result;
}
+
+ $remaining_time -= ($t_end - $t_start);
+ if ($remaining_time <= 0) {
+ break;
+ }
}
if ($results) {
$commits = id(new DiffusionCommitQuery())
->setViewer($drequest->getUser())
->withRepository($repository)
->withIDs($results)
->execute();
foreach ($results as $path => $id) {
$commit = idx($commits, $id);
if ($commit) {
$results[$path] = $commit->getCommitIdentifier();
} else {
unset($results[$path]);
}
}
}
return $results;
}
}
diff --git a/src/applications/repository/graphcache/PhabricatorRepositoryGraphCache.php b/src/applications/repository/graphcache/PhabricatorRepositoryGraphCache.php
index d52182fcc9..c4adc61ab0 100644
--- a/src/applications/repository/graphcache/PhabricatorRepositoryGraphCache.php
+++ b/src/applications/repository/graphcache/PhabricatorRepositoryGraphCache.php
@@ -1,409 +1,418 @@
<?php
/**
* Given a commit and a path, efficiently determine the most recent ancestor
* commit where the path was touched.
*
* In Git and Mercurial, log operations with a path are relatively slow. For
* example:
*
* git log -n1 <commit> -- <path>
*
* ...routinely takes several hundred milliseconds, and equivalent requests
* often take longer in Mercurial.
*
* Unfortunately, this operation is fundamental to rendering a repository for
* the web, and essentially everything else that's slow can be reduced to this
* plus some trivial work afterward. Making this fast is desirable and powerful,
* and allows us to make other things fast by expressing them in terms of this
* query.
*
* Because the query is fundamentally a graph query, it isn't easy to express
* in a reasonable way in MySQL, and we can't do round trips to the server to
* walk the graph without incurring huge performance penalties.
*
* However, the total amount of data in the graph is relatively small. By
* caching it in chunks and keeping it in APC, we can reasonably load and walk
* the graph in PHP quickly.
*
* For more context, see T2683.
*
* Structure of the Cache
* ======================
*
* The cache divides commits into buckets (see @{method:getBucketSize}). To
* walk the graph, we pull a commit's bucket. The bucket is a map from commit
* IDs to a list of parents and changed paths, separated by `null`. For
* example, a bucket might look like this:
*
* array(
* 1 => array(0, null, 17, 18),
* 2 => array(1, null, 4),
* // ...
* )
*
* This means that commit ID 1 has parent commit 0 (a special value meaning
* no parents) and affected path IDs 17 and 18. Commit ID 2 has parent commit 1,
* and affected path 4.
*
* This data structure attempts to balance compactness, ease of construction,
* simplicity of cache semantics, and lookup performance. In the average case,
* it appears to do a reasonable job at this.
*
* @task query Querying the Graph Cache
* @task cache Cache Internals
*/
final class PhabricatorRepositoryGraphCache extends Phobject {
private $rebuiltKeys = array();
/* -( Querying the Graph Cache )------------------------------------------- */
/**
* Search the graph cache for the most modification to a path.
*
* @param int The commit ID to search ancestors of.
* @param int The path ID to search for changes to.
* @param float Maximum number of seconds to spend trying to satisfy this
* query using the graph cache. By default, `0.5` (500ms).
* @return mixed Commit ID, or `null` if no ancestors exist, or `false` if
* the graph cache was unable to determine the answer.
* @task query
*/
public function loadLastModifiedCommitID($commit_id, $path_id, $time = 0.5) {
$commit_id = (int)$commit_id;
$path_id = (int)$path_id;
$bucket_data = null;
$data_key = null;
$seen = array();
$t_start = microtime(true);
$iterations = 0;
while (true) {
$bucket_key = $this->getBucketKey($commit_id);
if (($data_key != $bucket_key) || $bucket_data === null) {
$bucket_data = $this->getBucketData($bucket_key);
$data_key = $bucket_key;
}
if (empty($bucket_data[$commit_id])) {
// Rebuild the cache bucket, since the commit might be a very recent
// one that we'll pick up by rebuilding.
$bucket_data = $this->getBucketData($bucket_key, $bucket_data);
if (empty($bucket_data[$commit_id])) {
// A rebuild didn't help. This can occur legitimately if the commit
// is new and hasn't parsed yet.
return false;
}
// Otherwise, the rebuild gave us the data, so we can keep going.
+
+ $did_fill = true;
+ } else {
+ $did_fill = false;
}
// Sanity check so we can survive and recover from bad data.
if (isset($seen[$commit_id])) {
phlog(pht('Unexpected infinite loop in %s!', __CLASS__));
return false;
} else {
$seen[$commit_id] = true;
}
// `$data` is a list: the commit's parent IDs, followed by `null`,
// followed by the modified paths in ascending order. We figure out the
// first parent first, then check if the path was touched. If the path
// was touched, this is the commit we're after. If not, walk backward
// in the tree.
$items = $bucket_data[$commit_id];
$size = count($items);
// Walk past the parent information.
$parent_id = null;
for ($ii = 0;; ++$ii) {
if ($items[$ii] === null) {
break;
}
if ($parent_id === null) {
$parent_id = $items[$ii];
}
}
// Look for a modification to the path.
for (; $ii < $size; ++$ii) {
$item = $items[$ii];
if ($item > $path_id) {
break;
}
if ($item === $path_id) {
return $commit_id;
}
}
if ($parent_id) {
$commit_id = $parent_id;
// Periodically check if we've spent too long looking for a result
- // in the cache, and return so we can fall back to a VCS operation. This
- // keeps us from having a degenerate worst case if, e.g., the cache
- // is cold and we need to inspect a very large number of blocks
+ // in the cache, and return so we can fall back to a VCS operation.
+ // This keeps us from having a degenerate worst case if, e.g., the
+ // cache is cold and we need to inspect a very large number of blocks
// to satisfy the query.
- if (((++$iterations) % 64) === 0) {
+ ++$iterations;
+
+ // If we performed a cache fill in this cycle, always check the time
+ // limit, since cache fills may take a significant amount of time.
+
+ if ($did_fill || ($iterations % 64 === 0)) {
$t_end = microtime(true);
if (($t_end - $t_start) > $time) {
return false;
}
}
continue;
}
// If we have an explicit 0, that means this commit really has no parents.
// Usually, it is the first commit in the repository.
if ($parent_id === 0) {
return null;
}
// If we didn't find a parent, the parent data isn't available. We fail
// to find an answer in the cache and fall back to querying the VCS.
return false;
}
}
/* -( Cache Internals )---------------------------------------------------- */
/**
* Get the bucket key for a given commit ID.
*
* @param int Commit ID.
* @return int Bucket key.
* @task cache
*/
private function getBucketKey($commit_id) {
return (int)floor($commit_id / $this->getBucketSize());
}
/**
* Get the cache key for a given bucket key (from @{method:getBucketKey}).
*
* @param int Bucket key.
* @return string Cache key.
* @task cache
*/
private function getBucketCacheKey($bucket_key) {
static $prefix;
if ($prefix === null) {
$self = get_class($this);
$size = $this->getBucketSize();
$prefix = "{$self}:{$size}:2:";
}
return $prefix.$bucket_key;
}
/**
* Get the number of items per bucket.
*
* @return int Number of items to store per bucket.
* @task cache
*/
private function getBucketSize() {
return 4096;
}
/**
* Retrieve or build a graph cache bucket from the cache.
*
* Normally, this operates as a readthrough cache call. It can also be used
* to force a cache update by passing the existing data to `$rebuild_data`.
*
* @param int Bucket key, from @{method:getBucketKey}.
* @param mixed Current data, to force a cache rebuild of this bucket.
* @return array Data from the cache.
* @task cache
*/
private function getBucketData($bucket_key, $rebuild_data = null) {
$cache_key = $this->getBucketCacheKey($bucket_key);
// TODO: This cache stuff could be handled more gracefully, but the
// database cache currently requires values to be strings and needs
// some tweaking to support this as part of a stack. Our cache semantics
// here are also unusual (not purely readthrough) because this cache is
// appendable.
$cache_level1 = PhabricatorCaches::getRepositoryGraphL1Cache();
$cache_level2 = PhabricatorCaches::getRepositoryGraphL2Cache();
if ($rebuild_data === null) {
$bucket_data = $cache_level1->getKey($cache_key);
if ($bucket_data) {
return $bucket_data;
}
$bucket_data = $cache_level2->getKey($cache_key);
if ($bucket_data) {
$unserialized = @unserialize($bucket_data);
if ($unserialized) {
// Fill APC if we got a database hit but missed in APC.
$cache_level1->setKey($cache_key, $unserialized);
return $unserialized;
}
}
}
if (!is_array($rebuild_data)) {
$rebuild_data = array();
}
$bucket_data = $this->rebuildBucket($bucket_key, $rebuild_data);
// Don't bother writing the data if we didn't update anything.
if ($bucket_data !== $rebuild_data) {
$cache_level2->setKey($cache_key, serialize($bucket_data));
$cache_level1->setKey($cache_key, $bucket_data);
}
return $bucket_data;
}
/**
* Rebuild a cache bucket, amending existing data if available.
*
* @param int Bucket key, from @{method:getBucketKey}.
* @param array Existing bucket data.
* @return array Rebuilt bucket data.
* @task cache
*/
private function rebuildBucket($bucket_key, array $current_data) {
// First, check if we've already rebuilt this bucket. In some cases (like
// browsing a repository at some commit) it's common to issue many lookups
// against one commit. If that commit has been discovered but not yet
// fully imported, we'll repeatedly attempt to rebuild the bucket. If the
// first rebuild did not work, subsequent rebuilds are very unlikely to
// have any effect. We can just skip the rebuild in these cases.
if (isset($this->rebuiltKeys[$bucket_key])) {
return $current_data;
} else {
$this->rebuiltKeys[$bucket_key] = true;
}
$bucket_min = ($bucket_key * $this->getBucketSize());
$bucket_max = ($bucket_min + $this->getBucketSize()) - 1;
// We need to reload all of the commits in the bucket because there is
// no guarantee that they'll get parsed in order, so we can fill large
// commit IDs before small ones. Later on, we'll ignore the commits we
// already know about.
$table_commit = new PhabricatorRepositoryCommit();
$table_repository = new PhabricatorRepository();
$conn_r = $table_commit->establishConnection('r');
// Find all the Git and Mercurial commits in the block which have completed
// change import. We can't fill the cache accurately for commits which have
// not completed change import, so just pretend we don't know about them.
// In these cases, we will ultimately fall back to VCS queries.
$commit_rows = queryfx_all(
$conn_r,
'SELECT c.id FROM %T c
JOIN %T r ON c.repositoryID = r.id AND r.versionControlSystem IN (%Ls)
WHERE c.id BETWEEN %d AND %d
AND (c.importStatus & %d) = %d',
$table_commit->getTableName(),
$table_repository->getTableName(),
array(
PhabricatorRepositoryType::REPOSITORY_TYPE_GIT,
PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL,
),
$bucket_min,
$bucket_max,
PhabricatorRepositoryCommit::IMPORTED_CHANGE,
PhabricatorRepositoryCommit::IMPORTED_CHANGE);
// If we don't have any data, just return the existing data.
if (!$commit_rows) {
return $current_data;
}
// Remove the commits we already have data for. We don't need to rebuild
// these. If there's nothing left, return the existing data.
$commit_ids = ipull($commit_rows, 'id', 'id');
$commit_ids = array_diff_key($commit_ids, $current_data);
if (!$commit_ids) {
return $current_data;
}
// Find all the path changes for the new commits.
$path_changes = queryfx_all(
$conn_r,
'SELECT commitID, pathID FROM %T
WHERE commitID IN (%Ld)
AND (isDirect = 1 OR changeType = %d)',
PhabricatorRepository::TABLE_PATHCHANGE,
$commit_ids,
DifferentialChangeType::TYPE_CHILD);
$path_changes = igroup($path_changes, 'commitID');
// Find all the parents for the new commits.
$parents = queryfx_all(
$conn_r,
'SELECT childCommitID, parentCommitID FROM %T
WHERE childCommitID IN (%Ld)
ORDER BY id ASC',
PhabricatorRepository::TABLE_PARENTS,
$commit_ids);
$parents = igroup($parents, 'childCommitID');
// Build the actual data for the cache.
foreach ($commit_ids as $commit_id) {
$parent_ids = array();
if (!empty($parents[$commit_id])) {
foreach ($parents[$commit_id] as $row) {
$parent_ids[] = (int)$row['parentCommitID'];
}
} else {
// We expect all rows to have parents (commits with no parents get
// an explicit "0" placeholder). If we're in an older repository, the
// parent information might not have been populated yet. Decline to fill
// the cache if we don't have the parent information, since the fill
// will be incorrect.
continue;
}
if (isset($path_changes[$commit_id])) {
$path_ids = $path_changes[$commit_id];
foreach ($path_ids as $key => $path_id) {
$path_ids[$key] = (int)$path_id['pathID'];
}
sort($path_ids);
} else {
$path_ids = array();
}
$value = $parent_ids;
$value[] = null;
foreach ($path_ids as $path_id) {
$value[] = $path_id;
}
$current_data[$commit_id] = $value;
}
return $current_data;
}
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Jan 19 2025, 23:18 (6 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1129819
Default Alt Text
(19 KB)
Attached To
Mode
rP Phorge
Attached
Detach File
Event Timeline
Log In to Comment